Lines Matching +full:link +full:- +full:trigger +full:- +full:order +full:- +full:start

1 // SPDX-License-Identifier: GPL-2.0-only
5 * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
51 #include "pgalloc-track.h"
54 static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
106 return -ENOMEM; in vmap_pte_range()
144 if ((end - addr) != PMD_SIZE) in vmap_try_huge_pmd()
168 return -ENOMEM; in vmap_pmd_range()
179 return -ENOMEM; in vmap_pmd_range()
180 } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pmd_range()
194 if ((end - addr) != PUD_SIZE) in vmap_try_huge_pud()
218 return -ENOMEM; in vmap_pud_range()
230 return -ENOMEM; in vmap_pud_range()
231 } while (pud++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pud_range()
245 if ((end - addr) != P4D_SIZE) in vmap_try_huge_p4d()
269 return -ENOMEM; in vmap_p4d_range()
281 return -ENOMEM; in vmap_p4d_range()
282 } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); in vmap_p4d_range()
291 unsigned long start; in vmap_range_noflush() local
299 start = addr; in vmap_range_noflush()
307 } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_range_noflush()
310 arch_sync_kernel_mappings(start, end); in vmap_range_noflush()
335 if (!area || !(area->flags & VM_IOREMAP)) { in ioremap_page_range()
337 return -EINVAL; in ioremap_page_range()
339 if (addr != (unsigned long)area->addr || in ioremap_page_range()
340 (void *)end != area->addr + get_vm_area_size(area)) { in ioremap_page_range()
342 addr, end, (long)area->addr, in ioremap_page_range()
343 (long)area->addr + get_vm_area_size(area)); in ioremap_page_range()
344 return -ERANGE; in ioremap_page_range()
437 * or be re-mapped for something else, if TLB flushes are being delayed or
442 void __vunmap_range_noflush(unsigned long start, unsigned long end) in __vunmap_range_noflush() argument
446 unsigned long addr = start; in __vunmap_range_noflush()
461 arch_sync_kernel_mappings(start, end); in __vunmap_range_noflush()
464 void vunmap_range_noflush(unsigned long start, unsigned long end) in vunmap_range_noflush() argument
466 kmsan_vunmap_range_noflush(start, end); in vunmap_range_noflush()
467 __vunmap_range_noflush(start, end); in vunmap_range_noflush()
471 * vunmap_range - unmap kernel virtual addresses
472 * @addr: start of the VM area to unmap
473 * @end: end of the VM area to unmap (non-inclusive)
476 * caches. Any subsequent access to the address before it has been re-mapped
499 return -ENOMEM; in vmap_pages_pte_range()
504 return -EBUSY; in vmap_pages_pte_range()
506 return -ENOMEM; in vmap_pages_pte_range()
508 return -EINVAL; in vmap_pages_pte_range()
526 return -ENOMEM; in vmap_pages_pmd_range()
530 return -ENOMEM; in vmap_pages_pmd_range()
544 return -ENOMEM; in vmap_pages_pud_range()
548 return -ENOMEM; in vmap_pages_pud_range()
562 return -ENOMEM; in vmap_pages_p4d_range()
566 return -ENOMEM; in vmap_pages_p4d_range()
574 unsigned long start = addr; in vmap_small_pages_range_noflush() local
593 arch_sync_kernel_mappings(start, end); in vmap_small_pages_range_noflush()
610 unsigned int i, nr = (end - addr) >> PAGE_SHIFT; in __vmap_pages_range_noflush()
618 for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { in __vmap_pages_range_noflush()
645 * vmap_pages_range - map pages to a kernel virtual address
646 * @addr: start of the VM area to map
647 * @end: end of the VM area to map (non-inclusive)
654 * 0 on success, -errno on failure.
666 static int check_sparse_vm_area(struct vm_struct *area, unsigned long start, in check_sparse_vm_area() argument
670 if (WARN_ON_ONCE(area->flags & VM_FLUSH_RESET_PERMS)) in check_sparse_vm_area()
671 return -EINVAL; in check_sparse_vm_area()
672 if (WARN_ON_ONCE(area->flags & VM_NO_GUARD)) in check_sparse_vm_area()
673 return -EINVAL; in check_sparse_vm_area()
674 if (WARN_ON_ONCE(!(area->flags & VM_SPARSE))) in check_sparse_vm_area()
675 return -EINVAL; in check_sparse_vm_area()
676 if ((end - start) >> PAGE_SHIFT > totalram_pages()) in check_sparse_vm_area()
677 return -E2BIG; in check_sparse_vm_area()
678 if (start < (unsigned long)area->addr || in check_sparse_vm_area()
679 (void *)end > area->addr + get_vm_area_size(area)) in check_sparse_vm_area()
680 return -ERANGE; in check_sparse_vm_area()
685 * vm_area_map_pages - map pages inside given sparse vm_area
687 * @start: start address inside vm_area
691 int vm_area_map_pages(struct vm_struct *area, unsigned long start, in vm_area_map_pages() argument
696 err = check_sparse_vm_area(area, start, end); in vm_area_map_pages()
700 return vmap_pages_range(start, end, PAGE_KERNEL, pages, PAGE_SHIFT); in vm_area_map_pages()
704 * vm_area_unmap_pages - unmap pages inside given sparse vm_area
706 * @start: start address inside vm_area
709 void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, in vm_area_unmap_pages() argument
712 if (check_sparse_vm_area(area, start, end)) in vm_area_unmap_pages()
715 vunmap_range(start, end); in vm_area_unmap_pages()
721 * ARM, x86-64 and sparc64 put modules in a special place, in is_vmalloc_or_module_addr()
796 * Map a vmalloc()-space virtual address to the physical page frame number.
829 * This augment red-black tree represents the free vmap space.
830 * All vmap_area objects in this tree are sorted by va->va_start
835 * of its sub-tree, right or left. Therefore it is possible to
849 * rb-tree are part of one entity protected by the lock. Nodes are
850 * sorted in ascending order, thus for O(1) access to left/right
862 * An index in the pool-array corresponds to number of pages + 1.
872 * An effective vmap-node logic. Users make use of nodes instead
887 * Ready-to-free areas.
923 * an encoded value will be the node-id incremented by 1.
925 * be encoded is [0:nr_vmap_nodes - 1]. If a passed node_id
941 * Returns an encoded node-id, the valid range is within
942 * [0:nr_vmap_nodes-1] values. Otherwise nr_vmap_nodes is
948 unsigned int node_id = (val >> BITS_PER_BYTE) - 1; in decode_vn_id()
973 return (va->va_end - va->va_start); in va_size()
982 return va ? va->subtree_max_size : 0; in get_subtree_max_size()
1002 struct rb_node *n = root->rb_node; in __find_vmap_area()
1010 if (addr < va->va_start) in __find_vmap_area()
1011 n = n->rb_left; in __find_vmap_area()
1012 else if (addr >= va->va_end) in __find_vmap_area()
1013 n = n->rb_right; in __find_vmap_area()
1026 struct rb_node *n = root->rb_node; in __find_vmap_area_exceed_addr()
1034 if (tmp->va_end > addr) { in __find_vmap_area_exceed_addr()
1036 if (tmp->va_start <= addr) in __find_vmap_area_exceed_addr()
1039 n = n->rb_left; in __find_vmap_area_exceed_addr()
1041 n = n->rb_right; in __find_vmap_area_exceed_addr()
1065 spin_lock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1066 *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root); in find_vmap_area_exceed_addr_lock()
1069 if (!va_start_lowest || (*va)->va_start < va_start_lowest) in find_vmap_area_exceed_addr_lock()
1070 va_start_lowest = (*va)->va_start; in find_vmap_area_exceed_addr_lock()
1071 spin_unlock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1082 spin_lock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1083 *va = __find_vmap_area(va_start_lowest, &vn->busy.root); in find_vmap_area_exceed_addr_lock()
1088 spin_unlock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1097 * and its left or right link for further processing.
1109 struct rb_node **link; in find_va_links() local
1112 link = &root->rb_node; in find_va_links()
1113 if (unlikely(!*link)) { in find_va_links()
1115 return link; in find_va_links()
1118 link = &from; in find_va_links()
1124 * it link, where the new va->rb_node will be attached to. in find_va_links()
1127 tmp_va = rb_entry(*link, struct vmap_area, rb_node); in find_va_links()
1131 * Trigger the BUG() if there are sides(left/right) in find_va_links()
1134 if (va->va_end <= tmp_va->va_start) in find_va_links()
1135 link = &(*link)->rb_left; in find_va_links()
1136 else if (va->va_start >= tmp_va->va_end) in find_va_links()
1137 link = &(*link)->rb_right; in find_va_links()
1139 WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n", in find_va_links()
1140 va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end); in find_va_links()
1144 } while (*link); in find_va_links()
1146 *parent = &tmp_va->rb_node; in find_va_links()
1147 return link; in find_va_links()
1151 get_va_next_sibling(struct rb_node *parent, struct rb_node **link) in get_va_next_sibling() argument
1157 * The red-black tree where we try to find VA neighbors in get_va_next_sibling()
1164 list = &rb_entry(parent, struct vmap_area, rb_node)->list; in get_va_next_sibling()
1165 return (&parent->rb_right == link ? list->next : list); in get_va_next_sibling()
1170 struct rb_node *parent, struct rb_node **link, in __link_va() argument
1178 head = &rb_entry(parent, struct vmap_area, rb_node)->list; in __link_va()
1179 if (&parent->rb_right != link) in __link_va()
1180 head = head->prev; in __link_va()
1183 /* Insert to the rb-tree */ in __link_va()
1184 rb_link_node(&va->rb_node, parent, link); in __link_va()
1188 * to the tree. We do not set va->subtree_max_size to in __link_va()
1195 * the correct order later on. in __link_va()
1197 rb_insert_augmented(&va->rb_node, in __link_va()
1199 va->subtree_max_size = 0; in __link_va()
1201 rb_insert_color(&va->rb_node, root); in __link_va()
1204 /* Address-sort this list */ in __link_va()
1205 list_add(&va->list, head); in __link_va()
1210 struct rb_node *parent, struct rb_node **link, in link_va() argument
1213 __link_va(va, root, parent, link, head, false); in link_va()
1218 struct rb_node *parent, struct rb_node **link, in link_va_augment() argument
1221 __link_va(va, root, parent, link, head, true); in link_va_augment()
1227 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node))) in __unlink_va()
1231 rb_erase_augmented(&va->rb_node, in __unlink_va()
1234 rb_erase(&va->rb_node, root); in __unlink_va()
1236 list_del_init(&va->list); in __unlink_va()
1237 RB_CLEAR_NODE(&va->rb_node); in __unlink_va()
1260 get_subtree_max_size(va->rb_node.rb_left), in compute_subtree_max_size()
1261 get_subtree_max_size(va->rb_node.rb_right)); in compute_subtree_max_size()
1272 if (computed_size != va->subtree_max_size) in augment_tree_propagate_check()
1274 va_size(va), va->subtree_max_size); in augment_tree_propagate_check()
1286 * - After VA has been inserted to the tree(free path);
1287 * - After VA has been shrunk(allocation path);
1288 * - After VA has been increased(merging path).
1294 * 4--8
1298 * 2--2 8--8
1304 * node becomes 4--6.
1314 free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL); in augment_tree_propagate_from()
1325 struct rb_node **link; in insert_vmap_area() local
1328 link = find_va_links(va, root, NULL, &parent); in insert_vmap_area()
1329 if (link) in insert_vmap_area()
1330 link_va(va, root, parent, link, head); in insert_vmap_area()
1338 struct rb_node **link; in insert_vmap_area_augment() local
1342 link = find_va_links(va, NULL, from, &parent); in insert_vmap_area_augment()
1344 link = find_va_links(va, root, NULL, &parent); in insert_vmap_area_augment()
1346 if (link) { in insert_vmap_area_augment()
1347 link_va_augment(va, root, parent, link, head); in insert_vmap_area_augment()
1353 * Merge de-allocated chunk of VA memory with previous
1369 struct rb_node **link; in __merge_or_add_vmap_area() local
1377 link = find_va_links(va, root, NULL, &parent); in __merge_or_add_vmap_area()
1378 if (!link) in __merge_or_add_vmap_area()
1384 next = get_va_next_sibling(parent, link); in __merge_or_add_vmap_area()
1389 * start end in __merge_or_add_vmap_area()
1391 * |<------VA------>|<-----Next----->| in __merge_or_add_vmap_area()
1393 * start end in __merge_or_add_vmap_area()
1397 if (sibling->va_start == va->va_end) { in __merge_or_add_vmap_area()
1398 sibling->va_start = va->va_start; in __merge_or_add_vmap_area()
1410 * start end in __merge_or_add_vmap_area()
1412 * |<-----Prev----->|<------VA------>| in __merge_or_add_vmap_area()
1414 * start end in __merge_or_add_vmap_area()
1416 if (next->prev != head) { in __merge_or_add_vmap_area()
1417 sibling = list_entry(next->prev, struct vmap_area, list); in __merge_or_add_vmap_area()
1418 if (sibling->va_end == va->va_start) { in __merge_or_add_vmap_area()
1429 sibling->va_end = va->va_end; in __merge_or_add_vmap_area()
1442 __link_va(va, root, parent, link, head, augment); in __merge_or_add_vmap_area()
1471 if (va->va_start > vstart) in is_within_this_va()
1472 nva_start_addr = ALIGN(va->va_start, align); in is_within_this_va()
1481 return (nva_start_addr + size <= va->va_end); in is_within_this_va()
1485 * Find the first free block(lowest start address) in the tree,
1499 /* Start from the root. */ in find_vmap_lowest_match()
1500 node = root->rb_node; in find_vmap_lowest_match()
1503 length = adjust_search_size ? size + align - 1 : size; in find_vmap_lowest_match()
1508 if (get_subtree_max_size(node->rb_left) >= length && in find_vmap_lowest_match()
1509 vstart < va->va_start) { in find_vmap_lowest_match()
1510 node = node->rb_left; in find_vmap_lowest_match()
1517 * sub-tree if it does not have a free block that is in find_vmap_lowest_match()
1520 if (get_subtree_max_size(node->rb_right) >= length) { in find_vmap_lowest_match()
1521 node = node->rb_right; in find_vmap_lowest_match()
1526 * OK. We roll back and find the first right sub-tree, in find_vmap_lowest_match()
1536 if (get_subtree_max_size(node->rb_right) >= length && in find_vmap_lowest_match()
1537 vstart <= va->va_start) { in find_vmap_lowest_match()
1540 * parent's start address adding "1" because we do not want in find_vmap_lowest_match()
1541 * to enter same sub-tree after it has already been checked in find_vmap_lowest_match()
1544 vstart = va->va_start + 1; in find_vmap_lowest_match()
1545 node = node->rb_right; in find_vmap_lowest_match()
1609 if (nva_start_addr < va->va_start || in classify_va_fit_type()
1610 nva_start_addr + size > va->va_end) in classify_va_fit_type()
1614 if (va->va_start == nva_start_addr) { in classify_va_fit_type()
1615 if (va->va_end == nva_start_addr + size) in classify_va_fit_type()
1619 } else if (va->va_end == nva_start_addr + size) { in classify_va_fit_type()
1642 * |---------------| in va_clip()
1652 * |-------|-------| in va_clip()
1654 va->va_start += size; in va_clip()
1661 * |-------|-------| in va_clip()
1663 va->va_end = nva_start_addr; in va_clip()
1670 * |---|-------|---| in va_clip()
1675 * For percpu allocator we do not do any pre-allocation in va_clip()
1701 return -1; in va_clip()
1707 lva->va_start = va->va_start; in va_clip()
1708 lva->va_end = nva_start_addr; in va_clip()
1713 va->va_start = nva_start_addr + size; in va_clip()
1715 return -1; in va_clip()
1722 insert_vmap_area_augment(lva, &va->rb_node, root, head); in va_clip()
1737 if (va->va_start > vstart) in va_alloc()
1738 nva_start_addr = ALIGN(va->va_start, align); in va_alloc()
1755 * Returns a start address of the newly allocated area, if success.
1770 * All blocks(their start addresses) are at least PAGE_SIZE in __alloc_vmap_area()
1776 if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size)) in __alloc_vmap_area()
1799 struct vmap_node *vn = addr_to_node(va->va_start); in free_vmap_area()
1804 spin_lock(&vn->busy.lock); in free_vmap_area()
1805 unlink_va(va, &vn->busy.root); in free_vmap_area()
1806 spin_unlock(&vn->busy.lock); in free_vmap_area()
1826 * We do it in non-atomic context, thus it allows us to use more in preload_this_cpu_lock()
1843 unsigned int idx = (size - 1) / PAGE_SIZE; in size_to_va_pool()
1846 return &vn->pool[idx]; in size_to_va_pool()
1860 spin_lock(&n->pool_lock); in node_pool_add_va()
1861 list_add(&va->list, &vp->head); in node_pool_add_va()
1862 WRITE_ONCE(vp->len, vp->len + 1); in node_pool_add_va()
1863 spin_unlock(&n->pool_lock); in node_pool_add_va()
1878 if (!vp || list_empty(&vp->head)) in node_pool_del_va()
1881 spin_lock(&vn->pool_lock); in node_pool_del_va()
1882 if (!list_empty(&vp->head)) { in node_pool_del_va()
1883 va = list_first_entry(&vp->head, struct vmap_area, list); in node_pool_del_va()
1885 if (IS_ALIGNED(va->va_start, align)) { in node_pool_del_va()
1891 err |= (va->va_start < vstart); in node_pool_del_va()
1892 err |= (va->va_end > vend); in node_pool_del_va()
1895 list_del_init(&va->list); in node_pool_del_va()
1896 WRITE_ONCE(vp->len, vp->len - 1); in node_pool_del_va()
1901 list_move_tail(&va->list, &vp->head); in node_pool_del_va()
1905 spin_unlock(&vn->pool_lock); in node_pool_del_va()
1933 *addr = va->va_start; in node_alloc()
1941 vm->flags = flags; in setup_vmalloc_vm()
1942 vm->addr = (void *)va->va_start; in setup_vmalloc_vm()
1943 vm->size = va_size(va); in setup_vmalloc_vm()
1944 vm->caller = caller; in setup_vmalloc_vm()
1945 va->vm = vm; in setup_vmalloc_vm()
1967 return ERR_PTR(-EINVAL); in alloc_vmap_area()
1970 return ERR_PTR(-EBUSY); in alloc_vmap_area()
1988 return ERR_PTR(-ENOMEM); in alloc_vmap_area()
1994 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); in alloc_vmap_area()
2009 * returned. Therefore trigger the overflow path. in alloc_vmap_area()
2014 va->va_start = addr; in alloc_vmap_area()
2015 va->va_end = addr + size; in alloc_vmap_area()
2016 va->vm = NULL; in alloc_vmap_area()
2017 va->flags = (va_flags | vn_id); in alloc_vmap_area()
2020 vm->addr = (void *)va->va_start; in alloc_vmap_area()
2021 vm->size = va_size(va); in alloc_vmap_area()
2022 va->vm = vm; in alloc_vmap_area()
2025 vn = addr_to_node(va->va_start); in alloc_vmap_area()
2027 spin_lock(&vn->busy.lock); in alloc_vmap_area()
2028 insert_vmap_area(va, &vn->busy.root, &vn->busy.head); in alloc_vmap_area()
2029 spin_unlock(&vn->busy.lock); in alloc_vmap_area()
2031 BUG_ON(!IS_ALIGNED(va->va_start, align)); in alloc_vmap_area()
2032 BUG_ON(va->va_start < vstart); in alloc_vmap_area()
2033 BUG_ON(va->va_end > vend); in alloc_vmap_area()
2059 pr_warn("vmalloc_node_range for size %lu failed: Address range restricted to %#lx - %#lx\n", in alloc_vmap_area()
2063 return ERR_PTR(-EBUSY); in alloc_vmap_area()
2112 /* for per-CPU blocks */
2143 if (list_empty(&vn->pool[i].head)) in decay_va_pool_node()
2146 /* Detach the pool, so no-one can access it. */ in decay_va_pool_node()
2147 spin_lock(&vn->pool_lock); in decay_va_pool_node()
2148 list_replace_init(&vn->pool[i].head, &tmp_list); in decay_va_pool_node()
2149 spin_unlock(&vn->pool_lock); in decay_va_pool_node()
2152 WRITE_ONCE(vn->pool[i].len, 0); in decay_va_pool_node()
2155 n_decay = vn->pool[i].len >> 2; in decay_va_pool_node()
2158 list_del_init(&va->list); in decay_va_pool_node()
2162 WRITE_ONCE(vn->pool[i].len, vn->pool[i].len - 1); in decay_va_pool_node()
2164 if (!--n_decay) in decay_va_pool_node()
2176 spin_lock(&vn->pool_lock); in decay_va_pool_node()
2177 list_replace_init(&tmp_list, &vn->pool[i].head); in decay_va_pool_node()
2178 spin_unlock(&vn->pool_lock); in decay_va_pool_node()
2189 unsigned long start, end; in kasan_release_vmalloc_node() local
2191 start = list_first_entry(&vn->purge_list, struct vmap_area, list)->va_start; in kasan_release_vmalloc_node()
2192 end = list_last_entry(&vn->purge_list, struct vmap_area, list)->va_end; in kasan_release_vmalloc_node()
2194 list_for_each_entry(va, &vn->purge_list, list) { in kasan_release_vmalloc_node()
2195 if (is_vmalloc_or_module_addr((void *) va->va_start)) in kasan_release_vmalloc_node()
2196 kasan_release_vmalloc(va->va_start, va->va_end, in kasan_release_vmalloc_node()
2197 va->va_start, va->va_end, in kasan_release_vmalloc_node()
2201 kasan_release_vmalloc(start, end, start, end, KASAN_VMALLOC_TLB_FLUSH); in kasan_release_vmalloc_node()
2215 vn->nr_purged = 0; in purge_vmap_node()
2217 list_for_each_entry_safe(va, n_va, &vn->purge_list, list) { in purge_vmap_node()
2219 unsigned int vn_id = decode_vn_id(va->flags); in purge_vmap_node()
2221 list_del_init(&va->list); in purge_vmap_node()
2224 vn->nr_purged++; in purge_vmap_node()
2226 if (is_vn_id_valid(vn_id) && !vn->skip_populate) in purge_vmap_node()
2231 list_add(&va->list, &local_list); in purge_vmap_node()
2240 * Purges all lazily-freed vmap areas.
2242 static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end, in __purge_vmap_area_lazy() argument
2261 INIT_LIST_HEAD(&vn->purge_list); in __purge_vmap_area_lazy()
2262 vn->skip_populate = full_pool_decay; in __purge_vmap_area_lazy()
2265 if (RB_EMPTY_ROOT(&vn->lazy.root)) in __purge_vmap_area_lazy()
2268 spin_lock(&vn->lazy.lock); in __purge_vmap_area_lazy()
2269 WRITE_ONCE(vn->lazy.root.rb_node, NULL); in __purge_vmap_area_lazy()
2270 list_replace_init(&vn->lazy.head, &vn->purge_list); in __purge_vmap_area_lazy()
2271 spin_unlock(&vn->lazy.lock); in __purge_vmap_area_lazy()
2273 start = min(start, list_first_entry(&vn->purge_list, in __purge_vmap_area_lazy()
2274 struct vmap_area, list)->va_start); in __purge_vmap_area_lazy()
2276 end = max(end, list_last_entry(&vn->purge_list, in __purge_vmap_area_lazy()
2277 struct vmap_area, list)->va_end); in __purge_vmap_area_lazy()
2284 flush_tlb_kernel_range(start, end); in __purge_vmap_area_lazy()
2288 nr_purge_helpers = clamp(nr_purge_helpers, 1U, nr_purge_nodes) - 1; in __purge_vmap_area_lazy()
2294 INIT_WORK(&vn->purge_work, purge_vmap_node); in __purge_vmap_area_lazy()
2297 schedule_work_on(i, &vn->purge_work); in __purge_vmap_area_lazy()
2299 schedule_work(&vn->purge_work); in __purge_vmap_area_lazy()
2301 nr_purge_helpers--; in __purge_vmap_area_lazy()
2303 vn->purge_work.func = NULL; in __purge_vmap_area_lazy()
2304 purge_vmap_node(&vn->purge_work); in __purge_vmap_area_lazy()
2305 nr_purged_areas += vn->nr_purged; in __purge_vmap_area_lazy()
2312 if (vn->purge_work.func) { in __purge_vmap_area_lazy()
2313 flush_work(&vn->purge_work); in __purge_vmap_area_lazy()
2314 nr_purged_areas += vn->nr_purged; in __purge_vmap_area_lazy()
2319 trace_purge_vmap_area_lazy(start, end, nr_purged_areas); in __purge_vmap_area_lazy()
2350 unsigned long va_start = va->va_start; in free_vmap_area_noflush()
2351 unsigned int vn_id = decode_vn_id(va->flags); in free_vmap_area_noflush()
2355 if (WARN_ON_ONCE(!list_empty(&va->list))) in free_vmap_area_noflush()
2366 id_to_node(vn_id):addr_to_node(va->va_start); in free_vmap_area_noflush()
2368 spin_lock(&vn->lazy.lock); in free_vmap_area_noflush()
2369 insert_vmap_area(va, &vn->lazy.root, &vn->lazy.head); in free_vmap_area_noflush()
2370 spin_unlock(&vn->lazy.lock); in free_vmap_area_noflush()
2384 flush_cache_vunmap(va->va_start, va->va_end); in free_unmap_vmap_area()
2385 vunmap_range_noflush(va->va_start, va->va_end); in free_unmap_vmap_area()
2387 flush_tlb_kernel_range(va->va_start, va->va_end); in free_unmap_vmap_area()
2404 * addr is not the same as va->va_start, what is not common, we in find_vmap_area()
2407 * <----va----> in find_vmap_area()
2408 * -|-----|-----|-----|-----|- in find_vmap_area()
2418 spin_lock(&vn->busy.lock); in find_vmap_area()
2419 va = __find_vmap_area(addr, &vn->busy.root); in find_vmap_area()
2420 spin_unlock(&vn->busy.lock); in find_vmap_area()
2442 spin_lock(&vn->busy.lock); in find_unlink_vmap_area()
2443 va = __find_vmap_area(addr, &vn->busy.root); in find_unlink_vmap_area()
2445 unlink_va(va, &vn->busy.root); in find_unlink_vmap_area()
2446 spin_unlock(&vn->busy.lock); in find_unlink_vmap_area()
2463 * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
2487 * regular operations: Purge if vb->free is less than 1/4 of the capacity.
2492 #define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/
2501 * be allocated. If it is an issue, we can use rb-tree
2523 * In order to fast access to any "vmap_block" associated with a
2526 * A per-cpu vmap_block_queue is used in both ways, to serialize
2529 * overload it, since we already have the per-cpu array which is
2543 * |------|------|------|------|------|------|...<vmap address space>
2546 * - CPU_1 invokes vm_unmap_ram(6), 6 belongs to CPU0 zone, thus
2547 * it access: CPU0/INDEX0 -> vmap_blocks -> xa_lock;
2549 * - CPU_2 invokes vm_unmap_ram(11), 11 belongs to CPU1 zone, thus
2550 * it access: CPU1/INDEX1 -> vmap_blocks -> xa_lock;
2552 * - CPU_0 invokes vm_unmap_ram(20), 20 belongs to CPU2 zone, thus
2553 * it access: CPU2/INDEX2 -> vmap_blocks -> xa_lock.
2566 * if an index points on it which is nr_cpu_ids - 1. in addr_to_vb_xa()
2583 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); in addr_to_vb_idx()
2598 * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
2600 * @order: how many 2^order pages should be occupied in newly allocated block
2603 * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
2605 static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) in new_vmap_block() argument
2620 return ERR_PTR(-ENOMEM); in new_vmap_block()
2631 vaddr = vmap_block_vaddr(va->va_start, 0); in new_vmap_block()
2632 spin_lock_init(&vb->lock); in new_vmap_block()
2633 vb->va = va; in new_vmap_block()
2635 BUG_ON(VMAP_BBMAP_BITS <= (1UL << order)); in new_vmap_block()
2636 bitmap_zero(vb->used_map, VMAP_BBMAP_BITS); in new_vmap_block()
2637 vb->free = VMAP_BBMAP_BITS - (1UL << order); in new_vmap_block()
2638 vb->dirty = 0; in new_vmap_block()
2639 vb->dirty_min = VMAP_BBMAP_BITS; in new_vmap_block()
2640 vb->dirty_max = 0; in new_vmap_block()
2641 bitmap_set(vb->used_map, 0, (1UL << order)); in new_vmap_block()
2642 INIT_LIST_HEAD(&vb->free_list); in new_vmap_block()
2643 vb->cpu = raw_smp_processor_id(); in new_vmap_block()
2645 xa = addr_to_vb_xa(va->va_start); in new_vmap_block()
2646 vb_idx = addr_to_vb_idx(va->va_start); in new_vmap_block()
2655 * rather than vb->cpu due to task migration, which in new_vmap_block()
2660 vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); in new_vmap_block()
2661 spin_lock(&vbq->lock); in new_vmap_block()
2662 list_add_tail_rcu(&vb->free_list, &vbq->free); in new_vmap_block()
2663 spin_unlock(&vbq->lock); in new_vmap_block()
2674 xa = addr_to_vb_xa(vb->va->va_start); in free_vmap_block()
2675 tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start)); in free_vmap_block()
2678 vn = addr_to_node(vb->va->va_start); in free_vmap_block()
2679 spin_lock(&vn->busy.lock); in free_vmap_block()
2680 unlink_va(vb->va, &vn->busy.root); in free_vmap_block()
2681 spin_unlock(&vn->busy.lock); in free_vmap_block()
2683 free_vmap_area_noflush(vb->va); in free_vmap_block()
2690 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu); in purge_fragmented_block()
2692 if (vb->free + vb->dirty != VMAP_BBMAP_BITS || in purge_fragmented_block()
2693 vb->dirty == VMAP_BBMAP_BITS) in purge_fragmented_block()
2697 if (!(force_purge || vb->free < VMAP_PURGE_THRESHOLD)) in purge_fragmented_block()
2701 WRITE_ONCE(vb->free, 0); in purge_fragmented_block()
2703 WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS); in purge_fragmented_block()
2704 vb->dirty_min = 0; in purge_fragmented_block()
2705 vb->dirty_max = VMAP_BBMAP_BITS; in purge_fragmented_block()
2706 spin_lock(&vbq->lock); in purge_fragmented_block()
2707 list_del_rcu(&vb->free_list); in purge_fragmented_block()
2708 spin_unlock(&vbq->lock); in purge_fragmented_block()
2709 list_add_tail(&vb->purge, purge_list); in purge_fragmented_block()
2718 list_del(&vb->purge); in free_purged_blocks()
2730 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in purge_fragmented_blocks()
2731 unsigned long free = READ_ONCE(vb->free); in purge_fragmented_blocks()
2732 unsigned long dirty = READ_ONCE(vb->dirty); in purge_fragmented_blocks()
2738 spin_lock(&vb->lock); in purge_fragmented_blocks()
2740 spin_unlock(&vb->lock); in purge_fragmented_blocks()
2759 unsigned int order; in vb_alloc() local
2769 return ERR_PTR(-EINVAL); in vb_alloc()
2771 order = get_order(size); in vb_alloc()
2775 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in vb_alloc()
2778 if (READ_ONCE(vb->free) < (1UL << order)) in vb_alloc()
2781 spin_lock(&vb->lock); in vb_alloc()
2782 if (vb->free < (1UL << order)) { in vb_alloc()
2783 spin_unlock(&vb->lock); in vb_alloc()
2787 pages_off = VMAP_BBMAP_BITS - vb->free; in vb_alloc()
2788 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off); in vb_alloc()
2789 WRITE_ONCE(vb->free, vb->free - (1UL << order)); in vb_alloc()
2790 bitmap_set(vb->used_map, pages_off, (1UL << order)); in vb_alloc()
2791 if (vb->free == 0) { in vb_alloc()
2792 spin_lock(&vbq->lock); in vb_alloc()
2793 list_del_rcu(&vb->free_list); in vb_alloc()
2794 spin_unlock(&vbq->lock); in vb_alloc()
2797 spin_unlock(&vb->lock); in vb_alloc()
2805 vaddr = new_vmap_block(order, gfp_mask); in vb_alloc()
2813 unsigned int order; in vb_free() local
2822 order = get_order(size); in vb_free()
2823 offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT; in vb_free()
2828 spin_lock(&vb->lock); in vb_free()
2829 bitmap_clear(vb->used_map, offset, (1UL << order)); in vb_free()
2830 spin_unlock(&vb->lock); in vb_free()
2837 spin_lock(&vb->lock); in vb_free()
2840 vb->dirty_min = min(vb->dirty_min, offset); in vb_free()
2841 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order)); in vb_free()
2843 WRITE_ONCE(vb->dirty, vb->dirty + (1UL << order)); in vb_free()
2844 if (vb->dirty == VMAP_BBMAP_BITS) { in vb_free()
2845 BUG_ON(vb->free); in vb_free()
2846 spin_unlock(&vb->lock); in vb_free()
2849 spin_unlock(&vb->lock); in vb_free()
2852 static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) in _vm_unmap_aliases() argument
2868 xa_for_each(&vbq->vmap_blocks, idx, vb) { in _vm_unmap_aliases()
2869 spin_lock(&vb->lock); in _vm_unmap_aliases()
2877 vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { in _vm_unmap_aliases()
2878 unsigned long va_start = vb->va->va_start; in _vm_unmap_aliases()
2881 s = va_start + (vb->dirty_min << PAGE_SHIFT); in _vm_unmap_aliases()
2882 e = va_start + (vb->dirty_max << PAGE_SHIFT); in _vm_unmap_aliases()
2884 start = min(s, start); in _vm_unmap_aliases()
2888 vb->dirty_min = VMAP_BBMAP_BITS; in _vm_unmap_aliases()
2889 vb->dirty_max = 0; in _vm_unmap_aliases()
2893 spin_unlock(&vb->lock); in _vm_unmap_aliases()
2899 if (!__purge_vmap_area_lazy(start, end, false) && flush) in _vm_unmap_aliases()
2900 flush_tlb_kernel_range(start, end); in _vm_unmap_aliases()
2905 * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
2919 unsigned long start = ULONG_MAX, end = 0; in vm_unmap_aliases() local
2922 _vm_unmap_aliases(start, end, flush); in vm_unmap_aliases()
2927 * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
2955 debug_check_no_locks_freed((void *)va->va_start, va_size(va)); in vm_unmap_ram()
2961 * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
2967 * faster than vmap so it's good. But if you mix long-life and short-life
2970 * the end. Please use this function for short-lived objects.
2994 addr = va->va_start; in vm_map_ram()
3006 * With hardware tag-based KASAN, marking is skipped for in vm_map_ram()
3007 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in vm_map_ram()
3020 return vm->page_order; in vm_area_page_order()
3031 static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order) in set_vm_area_page_order() argument
3034 vm->page_order = order; in set_vm_area_page_order()
3036 BUG_ON(order != 0); in set_vm_area_page_order()
3041 * vm_area_add_early - add vmap area early during boot
3045 * vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags
3055 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { in vm_area_add_early()
3056 if (tmp->addr >= vm->addr) { in vm_area_add_early()
3057 BUG_ON(tmp->addr < vm->addr + vm->size); in vm_area_add_early()
3060 BUG_ON(tmp->addr + tmp->size > vm->addr); in vm_area_add_early()
3062 vm->next = *p; in vm_area_add_early()
3067 * vm_area_register_early - register vmap area early during boot
3072 * vmalloc_init() is called. @vm->size and @vm->flags should contain
3074 * vm->addr contains the allocated address.
3085 for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { in vm_area_register_early()
3086 if ((unsigned long)cur->addr - addr >= vm->size) in vm_area_register_early()
3088 addr = ALIGN((unsigned long)cur->addr + cur->size, align); in vm_area_register_early()
3091 BUG_ON(addr > VMALLOC_END - vm->size); in vm_area_register_early()
3092 vm->addr = (void *)addr; in vm_area_register_early()
3093 vm->next = *p; in vm_area_register_early()
3095 kasan_populate_early_vm_area_shadow(vm->addr, vm->size); in vm_area_register_early()
3106 vm->flags &= ~VM_UNINITIALIZED; in clear_vm_uninitialized_flag()
3111 unsigned long start, unsigned long end, int node, in __get_vm_area_node() argument
3134 area->flags = flags; in __get_vm_area_node()
3135 area->caller = caller; in __get_vm_area_node()
3137 va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area); in __get_vm_area_node()
3144 * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a in __get_vm_area_node()
3145 * best-effort approach, as they can be mapped outside of vmalloc code. in __get_vm_area_node()
3148 * With hardware tag-based KASAN, marking is skipped for in __get_vm_area_node()
3149 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in __get_vm_area_node()
3152 area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, in __get_vm_area_node()
3159 unsigned long start, unsigned long end, in __get_vm_area_caller() argument
3162 return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, in __get_vm_area_caller()
3167 * get_vm_area - reserve a contiguous kernel virtual area
3194 * find_vm_area - find a continuous kernel virtual area
3211 return va->vm; in find_vm_area()
3215 * remove_vm_area - find and remove a continuous kernel virtual area
3236 if (!va || !va->vm) in remove_vm_area()
3238 vm = va->vm; in remove_vm_area()
3240 debug_check_no_locks_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
3241 debug_check_no_obj_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
3243 kasan_poison_vmalloc(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
3255 for (i = 0; i < area->nr_pages; i++) in set_area_direct_map()
3256 if (page_address(area->pages[i])) in set_area_direct_map()
3257 set_direct_map(area->pages[i]); in set_area_direct_map()
3265 unsigned long start = ULONG_MAX, end = 0; in vm_reset_perms() local
3271 * Find the start and end range of the direct mappings to make sure that in vm_reset_perms()
3274 for (i = 0; i < area->nr_pages; i += 1U << page_order) { in vm_reset_perms()
3275 unsigned long addr = (unsigned long)page_address(area->pages[i]); in vm_reset_perms()
3281 start = min(addr, start); in vm_reset_perms()
3293 _vm_unmap_aliases(start, end, flush_dmap); in vm_reset_perms()
3302 llist_for_each_safe(llnode, t, llist_del_all(&p->list)) in delayed_vfree_work()
3307 * vfree_atomic - release memory allocated by vmalloc()
3326 if (addr && llist_add((struct llist_node *)addr, &p->list)) in vfree_atomic()
3327 schedule_work(&p->wq); in vfree_atomic()
3331 * vfree - Release memory allocated by vmalloc()
3345 * conventions for vfree() arch-dependent would be a really bad idea).
3371 if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS)) in vfree()
3373 for (i = 0; i < vm->nr_pages; i++) { in vfree()
3374 struct page *page = vm->pages[i]; in vfree()
3377 if (!(vm->flags & VM_MAP_PUT_PAGES)) in vfree()
3378 mod_memcg_page_state(page, MEMCG_VMALLOC, -1); in vfree()
3380 * High-order allocs for huge vmallocs are split, so in vfree()
3381 * can be freed as an array of order-0 allocations in vfree()
3386 if (!(vm->flags & VM_MAP_PUT_PAGES)) in vfree()
3387 atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages); in vfree()
3388 kvfree(vm->pages); in vfree()
3394 * vunmap - release virtual mapping obtained by vmap()
3422 * vmap - map an array of pages into virtually contiguous space
3425 * @flags: vm_area->flags
3463 addr = (unsigned long)area->addr; in vmap()
3466 vunmap(area->addr); in vmap()
3471 area->pages = pages; in vmap()
3472 area->nr_pages = count; in vmap()
3474 return area->addr; in vmap()
3488 unsigned long pfn = data->pfns[data->idx]; in vmap_pfn_apply()
3492 return -EINVAL; in vmap_pfn_apply()
3494 ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); in vmap_pfn_apply()
3497 data->idx++; in vmap_pfn_apply()
3502 * vmap_pfn - map an array of PFNs into virtually contiguous space
3508 * the start address of the mapping.
3519 if (apply_to_page_range(&init_mm, (unsigned long)area->addr, in vmap_pfn()
3525 flush_cache_vmap((unsigned long)area->addr, in vmap_pfn()
3526 (unsigned long)area->addr + count * PAGE_SIZE); in vmap_pfn()
3528 return area->addr; in vmap_pfn()
3535 unsigned int order, unsigned int nr_pages, struct page **pages) in vm_area_alloc_pages() argument
3542 * For order-0 pages we make use of bulk allocator, if in vm_area_alloc_pages()
3547 if (!order) { in vm_area_alloc_pages()
3552 * A maximum allowed request is hard-coded and is 100 in vm_area_alloc_pages()
3553 * pages per call. That is done in order to prevent a in vm_area_alloc_pages()
3554 * long preemption off scenario in the bulk-allocator in vm_area_alloc_pages()
3557 nr_pages_request = min(100U, nr_pages - nr_allocated); in vm_area_alloc_pages()
3585 /* High-order pages or fallback path if "bulk" fails. */ in vm_area_alloc_pages()
3591 page = alloc_pages_noprof(gfp, order); in vm_area_alloc_pages()
3593 page = alloc_pages_node_noprof(nid, gfp, order); in vm_area_alloc_pages()
3599 * High-order allocations must be able to be treated as in vm_area_alloc_pages()
3601 * small-page vmallocs). Some drivers do their own refcounting in vm_area_alloc_pages()
3602 * on vmalloc_to_page() pages, some use page->mapping, in vm_area_alloc_pages()
3603 * page->lru, etc. in vm_area_alloc_pages()
3605 if (order) in vm_area_alloc_pages()
3606 split_page(page, order); in vm_area_alloc_pages()
3609 * Careful, we allocate and map page-order pages, but in vm_area_alloc_pages()
3613 for (i = 0; i < (1U << order); i++) in vm_area_alloc_pages()
3617 nr_allocated += 1U << order; in vm_area_alloc_pages()
3629 unsigned long addr = (unsigned long)area->addr; in __vmalloc_area_node()
3644 area->pages = __vmalloc_node_noprof(array_size, 1, nested_gfp, node, in __vmalloc_area_node()
3645 area->caller); in __vmalloc_area_node()
3647 area->pages = kmalloc_node_noprof(array_size, nested_gfp, node); in __vmalloc_area_node()
3650 if (!area->pages) { in __vmalloc_area_node()
3658 set_vm_area_page_order(area, page_shift - PAGE_SHIFT); in __vmalloc_area_node()
3662 * High-order nofail allocations are really expensive and in __vmalloc_area_node()
3663 * potentially dangerous (pre-mature OOM, disruptive reclaim in __vmalloc_area_node()
3666 * Please note, the __vmalloc_node_range_noprof() falls-back in __vmalloc_area_node()
3667 * to order-0 pages if high-order attempt is unsuccessful. in __vmalloc_area_node()
3669 area->nr_pages = vm_area_alloc_pages((page_order ? in __vmalloc_area_node()
3671 node, page_order, nr_small_pages, area->pages); in __vmalloc_area_node()
3673 atomic_long_add(area->nr_pages, &nr_vmalloc_pages); in __vmalloc_area_node()
3677 for (i = 0; i < area->nr_pages; i++) in __vmalloc_area_node()
3678 mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1); in __vmalloc_area_node()
3685 if (area->nr_pages != nr_small_pages) { in __vmalloc_area_node()
3688 * also:- in __vmalloc_area_node()
3690 * - a pending fatal signal in __vmalloc_area_node()
3691 * - insufficient huge page-order pages in __vmalloc_area_node()
3693 * Since we always retry allocations at order-0 in the huge page in __vmalloc_area_node()
3699 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3713 ret = vmap_pages_range(addr, addr + size, prot, area->pages, in __vmalloc_area_node()
3727 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3731 return area->addr; in __vmalloc_area_node()
3734 vfree(area->addr); in __vmalloc_area_node()
3739 * __vmalloc_node_range - allocate virtually contiguous memory
3742 * @start: vm area range start
3767 unsigned long start, unsigned long end, gfp_t gfp_mask, in __vmalloc_node_range_noprof() argument
3807 VM_UNINITIALIZED | vm_flags, start, end, node, in __vmalloc_node_range_noprof()
3855 * Tag-based KASAN modes only assign tags to normal non-executable in __vmalloc_node_range_noprof()
3863 area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); in __vmalloc_node_range_noprof()
3876 return area->addr; in __vmalloc_node_range_noprof()
3890 * __vmalloc_node - allocate virtually contiguous memory
3900 * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
3931 * vmalloc - allocate virtually contiguous memory
3950 * vmalloc_huge - allocate virtually contiguous memory, allow huge pages
3970 * vzalloc - allocate virtually contiguous memory with zero fill
3990 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
4008 * vmalloc_node - allocate memory on a specific node
4028 * vzalloc_node - allocate memory on a specific node with zero fill
4046 * vrealloc - reallocate virtually contiguous memory; contents remain unchanged
4097 memset((void *)p + size, 0, old_size - size); in vrealloc_noprof()
4098 kasan_poison_vmalloc(p + size, old_size - size); in vrealloc_noprof()
4129 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
4145 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
4176 remains -= copied; in zero_iter()
4182 return count - remains; in zero_iter()
4202 length = PAGE_SIZE - offset; in aligned_vread_iter()
4221 remains -= copied; in aligned_vread_iter()
4227 return count - remains; in aligned_vread_iter()
4238 char *start; in vmap_ram_vread_iter() local
4264 spin_lock(&vb->lock); in vmap_ram_vread_iter()
4265 if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { in vmap_ram_vread_iter()
4266 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
4270 for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { in vmap_ram_vread_iter()
4276 start = vmap_block_vaddr(vb->va->va_start, rs); in vmap_ram_vread_iter()
4278 if (addr < start) { in vmap_ram_vread_iter()
4279 size_t to_zero = min_t(size_t, start - addr, remains); in vmap_ram_vread_iter()
4283 remains -= zeroed; in vmap_ram_vread_iter()
4289 /*it could start reading from the middle of used region*/ in vmap_ram_vread_iter()
4291 n = ((re - rs + 1) << PAGE_SHIFT) - offset; in vmap_ram_vread_iter()
4295 copied = aligned_vread_iter(iter, start + offset, n); in vmap_ram_vread_iter()
4298 remains -= copied; in vmap_ram_vread_iter()
4304 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
4307 /* zero-fill the left dirty or free regions */ in vmap_ram_vread_iter()
4308 return count - remains + zero_iter(iter, remains); in vmap_ram_vread_iter()
4311 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
4312 return count - remains; in vmap_ram_vread_iter()
4316 * vread_iter() - read vmalloc area in a safe way to an iterator.
4324 * proper area of @buf. If there are memory holes, they'll be zero-filled.
4352 count = -(unsigned long) addr; in vread_iter()
4361 if ((unsigned long)addr + remains <= va->va_start) in vread_iter()
4370 vm = va->vm; in vread_iter()
4371 flags = va->flags & VMAP_FLAGS_MASK; in vread_iter()
4373 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need in vread_iter()
4381 if (vm && (vm->flags & VM_UNINITIALIZED)) in vread_iter()
4387 vaddr = (char *) va->va_start; in vread_iter()
4394 size_t to_zero = min_t(size_t, vaddr - addr, remains); in vread_iter()
4398 remains -= zeroed; in vread_iter()
4404 n = vaddr + size - addr; in vread_iter()
4410 else if (!(vm && (vm->flags & (VM_IOREMAP | VM_SPARSE)))) in vread_iter()
4416 remains -= copied; in vread_iter()
4422 next = va->va_end; in vread_iter()
4423 spin_unlock(&vn->busy.lock); in vread_iter()
4428 spin_unlock(&vn->busy.lock); in vread_iter()
4430 /* zero-fill memory holes */ in vread_iter()
4431 return count - remains + zero_iter(iter, remains); in vread_iter()
4435 spin_unlock(&vn->busy.lock); in vread_iter()
4437 return count - remains; in vread_iter()
4441 * remap_vmalloc_range_partial - map vmalloc pages to userspace
4443 * @uaddr: target user address to start at
4445 * @pgoff: offset from @kaddr to start at
4448 * Returns: 0 for success, -Exxx on failure
4466 return -EINVAL; in remap_vmalloc_range_partial()
4471 return -EINVAL; in remap_vmalloc_range_partial()
4475 return -EINVAL; in remap_vmalloc_range_partial()
4477 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) in remap_vmalloc_range_partial()
4478 return -EINVAL; in remap_vmalloc_range_partial()
4482 return -EINVAL; in remap_vmalloc_range_partial()
4495 size -= PAGE_SIZE; in remap_vmalloc_range_partial()
4504 * remap_vmalloc_range - map vmalloc pages to userspace
4509 * Returns: 0 for success, -Exxx on failure
4520 return remap_vmalloc_range_partial(vma, vma->vm_start, in remap_vmalloc_range()
4522 vma->vm_end - vma->vm_start); in remap_vmalloc_range()
4529 ret = remove_vm_area(area->addr); in free_vm_area()
4542 * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
4546 * the first highest(reverse order) vmap_area is returned
4547 * i.e. va->va_start < addr && va->va_end < addr or NULL
4561 if (tmp->va_start <= addr) { in pvm_find_va_enclose_addr()
4563 if (tmp->va_end >= addr) in pvm_find_va_enclose_addr()
4566 n = n->rb_right; in pvm_find_va_enclose_addr()
4568 n = n->rb_left; in pvm_find_va_enclose_addr()
4576 * pvm_determine_end_from_reverse - find the highest aligned address
4579 * in - the VA we start the search(reverse order);
4580 * out - the VA with the highest aligned end address.
4588 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pvm_determine_end_from_reverse()
4594 addr = min((*va)->va_end & ~(align - 1), vmalloc_end); in pvm_determine_end_from_reverse()
4595 if ((*va)->va_start < addr) in pvm_determine_end_from_reverse()
4604 * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
4621 * does everything top-down and scans free blocks from the end looking
4632 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pcpu_get_vm_areas()
4636 unsigned long base, start, size, end, last_end, orig_start, orig_end; in pcpu_get_vm_areas() local
4642 start = offsets[area]; in pcpu_get_vm_areas()
4643 end = start + sizes[area]; in pcpu_get_vm_areas()
4650 if (start > offsets[last_area]) in pcpu_get_vm_areas()
4657 BUG_ON(start2 < end && start < end2); in pcpu_get_vm_areas()
4662 if (vmalloc_end - vmalloc_start < last_end) { in pcpu_get_vm_areas()
4681 /* start scanning - we scan from the top, begin with the last area */ in pcpu_get_vm_areas()
4683 start = offsets[area]; in pcpu_get_vm_areas()
4684 end = start + sizes[area]; in pcpu_get_vm_areas()
4687 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4707 if (base + end > va->va_end) { in pcpu_get_vm_areas()
4708 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4716 if (base + start < va->va_start) { in pcpu_get_vm_areas()
4717 va = node_to_va(rb_prev(&va->rb_node)); in pcpu_get_vm_areas()
4718 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4727 area = (area + nr_vms - 1) % nr_vms; in pcpu_get_vm_areas()
4731 start = offsets[area]; in pcpu_get_vm_areas()
4732 end = start + sizes[area]; in pcpu_get_vm_areas()
4740 start = base + offsets[area]; in pcpu_get_vm_areas()
4743 va = pvm_find_va_enclose_addr(start); in pcpu_get_vm_areas()
4745 /* It is a BUG(), but trigger recovery instead. */ in pcpu_get_vm_areas()
4749 &free_vmap_area_list, va, start, size); in pcpu_get_vm_areas()
4751 /* It is a BUG(), but trigger recovery instead. */ in pcpu_get_vm_areas()
4756 va->va_start = start; in pcpu_get_vm_areas()
4757 va->va_end = start + size; in pcpu_get_vm_areas()
4764 if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) in pcpu_get_vm_areas()
4770 struct vmap_node *vn = addr_to_node(vas[area]->va_start); in pcpu_get_vm_areas()
4772 spin_lock(&vn->busy.lock); in pcpu_get_vm_areas()
4773 insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head); in pcpu_get_vm_areas()
4776 spin_unlock(&vn->busy.lock); in pcpu_get_vm_areas()
4780 * Mark allocated areas as accessible. Do it now as a best-effort in pcpu_get_vm_areas()
4782 * With hardware tag-based KASAN, marking is skipped for in pcpu_get_vm_areas()
4783 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in pcpu_get_vm_areas()
4786 vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, in pcpu_get_vm_areas()
4787 vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); in pcpu_get_vm_areas()
4799 while (area--) { in pcpu_get_vm_areas()
4800 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4801 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4806 va->va_start, va->va_end, in pcpu_get_vm_areas()
4851 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4852 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4857 va->va_start, va->va_end, in pcpu_get_vm_areas()
4869 * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
4898 if (!spin_trylock(&vn->busy.lock)) in vmalloc_dump_obj()
4901 va = __find_vmap_area(addr, &vn->busy.root); in vmalloc_dump_obj()
4902 if (!va || !va->vm) { in vmalloc_dump_obj()
4903 spin_unlock(&vn->busy.lock); in vmalloc_dump_obj()
4907 vm = va->vm; in vmalloc_dump_obj()
4908 addr = (unsigned long) vm->addr; in vmalloc_dump_obj()
4909 caller = vm->caller; in vmalloc_dump_obj()
4910 nr_pages = vm->nr_pages; in vmalloc_dump_obj()
4911 spin_unlock(&vn->busy.lock); in vmalloc_dump_obj()
4913 pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", in vmalloc_dump_obj()
4924 unsigned int nr, *counters = m->private; in show_numa_info()
4930 if (v->flags & VM_UNINITIALIZED) in show_numa_info()
4937 for (nr = 0; nr < v->nr_pages; nr += step) in show_numa_info()
4938 counters[page_to_nid(v->pages[nr])] += step; in show_numa_info()
4954 spin_lock(&vn->lazy.lock); in show_purge_info()
4955 list_for_each_entry(va, &vn->lazy.head, list) { in show_purge_info()
4956 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", in show_purge_info()
4957 (void *)va->va_start, (void *)va->va_end, in show_purge_info()
4960 spin_unlock(&vn->lazy.lock); in show_purge_info()
4974 spin_lock(&vn->busy.lock); in vmalloc_info_show()
4975 list_for_each_entry(va, &vn->busy.head, list) { in vmalloc_info_show()
4976 if (!va->vm) { in vmalloc_info_show()
4977 if (va->flags & VMAP_RAM) in vmalloc_info_show()
4978 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", in vmalloc_info_show()
4979 (void *)va->va_start, (void *)va->va_end, in vmalloc_info_show()
4985 v = va->vm; in vmalloc_info_show()
4987 seq_printf(m, "0x%pK-0x%pK %7ld", in vmalloc_info_show()
4988 v->addr, v->addr + v->size, v->size); in vmalloc_info_show()
4990 if (v->caller) in vmalloc_info_show()
4991 seq_printf(m, " %pS", v->caller); in vmalloc_info_show()
4993 if (v->nr_pages) in vmalloc_info_show()
4994 seq_printf(m, " pages=%d", v->nr_pages); in vmalloc_info_show()
4996 if (v->phys_addr) in vmalloc_info_show()
4997 seq_printf(m, " phys=%pa", &v->phys_addr); in vmalloc_info_show()
4999 if (v->flags & VM_IOREMAP) in vmalloc_info_show()
5002 if (v->flags & VM_SPARSE) in vmalloc_info_show()
5005 if (v->flags & VM_ALLOC) in vmalloc_info_show()
5008 if (v->flags & VM_MAP) in vmalloc_info_show()
5011 if (v->flags & VM_USERMAP) in vmalloc_info_show()
5014 if (v->flags & VM_DMA_COHERENT) in vmalloc_info_show()
5015 seq_puts(m, " dma-coherent"); in vmalloc_info_show()
5017 if (is_vmalloc_addr(v->pages)) in vmalloc_info_show()
5023 spin_unlock(&vn->busy.lock); in vmalloc_info_show()
5058 * -|-----|.....|-----|-----|-----|.....|- in vmap_init_free_space()
5060 * |<--------------------------------->| in vmap_init_free_space()
5062 for (busy = vmlist; busy; busy = busy->next) { in vmap_init_free_space()
5063 if ((unsigned long) busy->addr - vmap_start > 0) { in vmap_init_free_space()
5066 free->va_start = vmap_start; in vmap_init_free_space()
5067 free->va_end = (unsigned long) busy->addr; in vmap_init_free_space()
5075 vmap_start = (unsigned long) busy->addr + busy->size; in vmap_init_free_space()
5078 if (vmap_end - vmap_start > 0) { in vmap_init_free_space()
5081 free->va_start = vmap_start; in vmap_init_free_space()
5082 free->va_end = vmap_end; in vmap_init_free_space()
5102 * As for NUMA-aware notes. For bigger systems, for example in vmap_init_nodes()
5103 * NUMA with multi-sockets, where we can end-up with thousands in vmap_init_nodes()
5104 * of cores in total, a "sub-numa-clustering" should be added. in vmap_init_nodes()
5107 * with dedicated sub-nodes in it which describe one group or in vmap_init_nodes()
5108 * set of cores. Therefore a per-domain purging is supposed to in vmap_init_nodes()
5109 * be added as well as a per-domain balancing. in vmap_init_nodes()
5128 vn->busy.root = RB_ROOT; in vmap_init_nodes()
5129 INIT_LIST_HEAD(&vn->busy.head); in vmap_init_nodes()
5130 spin_lock_init(&vn->busy.lock); in vmap_init_nodes()
5132 vn->lazy.root = RB_ROOT; in vmap_init_nodes()
5133 INIT_LIST_HEAD(&vn->lazy.head); in vmap_init_nodes()
5134 spin_lock_init(&vn->lazy.lock); in vmap_init_nodes()
5137 INIT_LIST_HEAD(&vn->pool[i].head); in vmap_init_nodes()
5138 WRITE_ONCE(vn->pool[i].len, 0); in vmap_init_nodes()
5141 spin_lock_init(&vn->pool_lock); in vmap_init_nodes()
5156 count += READ_ONCE(vn->pool[j].len); in vmap_node_shrink_count()
5191 spin_lock_init(&vbq->lock); in vmalloc_init()
5192 INIT_LIST_HEAD(&vbq->free); in vmalloc_init()
5194 init_llist_head(&p->list); in vmalloc_init()
5195 INIT_WORK(&p->wq, delayed_vfree_work); in vmalloc_init()
5196 xa_init(&vbq->vmap_blocks); in vmalloc_init()
5205 for (tmp = vmlist; tmp; tmp = tmp->next) { in vmalloc_init()
5210 va->va_start = (unsigned long)tmp->addr; in vmalloc_init()
5211 va->va_end = va->va_start + tmp->size; in vmalloc_init()
5212 va->vm = tmp; in vmalloc_init()
5214 vn = addr_to_node(va->va_start); in vmalloc_init()
5215 insert_vmap_area(va, &vn->busy.root, &vn->busy.head); in vmalloc_init()
5224 vmap_node_shrinker = shrinker_alloc(0, "vmap-node"); in vmalloc_init()
5226 pr_err("Failed to allocate vmap-node shrinker!\n"); in vmalloc_init()
5230 vmap_node_shrinker->count_objects = vmap_node_shrink_count; in vmalloc_init()
5231 vmap_node_shrinker->scan_objects = vmap_node_shrink_scan; in vmalloc_init()