Lines Matching +full:link +full:- +full:trigger +full:- +full:order

1 // SPDX-License-Identifier: GPL-2.0-only
5 * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
51 #include "pgalloc-track.h"
54 static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
106 return -ENOMEM; in vmap_pte_range()
149 if ((end - addr) != PMD_SIZE) in vmap_try_huge_pmd()
173 return -ENOMEM; in vmap_pmd_range()
184 return -ENOMEM; in vmap_pmd_range()
185 } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pmd_range()
199 if ((end - addr) != PUD_SIZE) in vmap_try_huge_pud()
223 return -ENOMEM; in vmap_pud_range()
235 return -ENOMEM; in vmap_pud_range()
236 } while (pud++, phys_addr += (next - addr), addr = next, addr != end); in vmap_pud_range()
250 if ((end - addr) != P4D_SIZE) in vmap_try_huge_p4d()
274 return -ENOMEM; in vmap_p4d_range()
286 return -ENOMEM; in vmap_p4d_range()
287 } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); in vmap_p4d_range()
312 } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); in vmap_range_noflush()
340 if (!area || !(area->flags & VM_IOREMAP)) { in ioremap_page_range()
342 return -EINVAL; in ioremap_page_range()
344 if (addr != (unsigned long)area->addr || in ioremap_page_range()
345 (void *)end != area->addr + get_vm_area_size(area)) { in ioremap_page_range()
347 addr, end, (long)area->addr, in ioremap_page_range()
348 (long)area->addr + get_vm_area_size(area)); in ioremap_page_range()
349 return -ERANGE; in ioremap_page_range()
373 if (WARN_ON(end - addr < size)) in vunmap_pte_range()
374 size = end - addr; in vunmap_pte_range()
401 WARN_ON(next - addr < PMD_SIZE); in vunmap_pmd_range()
428 WARN_ON(next - addr < PUD_SIZE); in vunmap_pud_range()
464 * or be re-mapped for something else, if TLB flushes are being delayed or
498 * vunmap_range - unmap kernel virtual addresses
500 * @end: end of the VM area to unmap (non-inclusive)
503 * caches. Any subsequent access to the address before it has been re-mapped
527 return -ENOMEM; in vmap_pages_pte_range()
535 err = -EBUSY; in vmap_pages_pte_range()
539 err = -ENOMEM; in vmap_pages_pte_range()
543 err = -EINVAL; in vmap_pages_pte_range()
566 return -ENOMEM; in vmap_pages_pmd_range()
570 return -ENOMEM; in vmap_pages_pmd_range()
584 return -ENOMEM; in vmap_pages_pud_range()
588 return -ENOMEM; in vmap_pages_pud_range()
602 return -ENOMEM; in vmap_pages_p4d_range()
606 return -ENOMEM; in vmap_pages_p4d_range()
650 unsigned int i, nr = (end - addr) >> PAGE_SHIFT; in __vmap_pages_range_noflush()
658 for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { in __vmap_pages_range_noflush()
685 * vmap_pages_range - map pages to a kernel virtual address
687 * @end: end of the VM area to map (non-inclusive)
694 * 0 on success, -errno on failure.
710 if (WARN_ON_ONCE(area->flags & VM_FLUSH_RESET_PERMS)) in check_sparse_vm_area()
711 return -EINVAL; in check_sparse_vm_area()
712 if (WARN_ON_ONCE(area->flags & VM_NO_GUARD)) in check_sparse_vm_area()
713 return -EINVAL; in check_sparse_vm_area()
714 if (WARN_ON_ONCE(!(area->flags & VM_SPARSE))) in check_sparse_vm_area()
715 return -EINVAL; in check_sparse_vm_area()
716 if ((end - start) >> PAGE_SHIFT > totalram_pages()) in check_sparse_vm_area()
717 return -E2BIG; in check_sparse_vm_area()
718 if (start < (unsigned long)area->addr || in check_sparse_vm_area()
719 (void *)end > area->addr + get_vm_area_size(area)) in check_sparse_vm_area()
720 return -ERANGE; in check_sparse_vm_area()
725 * vm_area_map_pages - map pages inside given sparse vm_area
744 * vm_area_unmap_pages - unmap pages inside given sparse vm_area
761 * ARM, x86-64 and sparc64 put modules in a special place, in is_vmalloc_or_module_addr()
836 * Map a vmalloc()-space virtual address to the physical page frame number.
869 * This augment red-black tree represents the free vmap space.
870 * All vmap_area objects in this tree are sorted by va->va_start
875 * of its sub-tree, right or left. Therefore it is possible to
889 * rb-tree are part of one entity protected by the lock. Nodes are
890 * sorted in ascending order, thus for O(1) access to left/right
902 * An index in the pool-array corresponds to number of pages + 1.
912 * An effective vmap-node logic. Users make use of nodes instead
927 * Ready-to-free areas.
943 /* A simple iterator over all vmap-nodes. */
970 unsigned int id = node - vmap_nodes; in node_to_id()
975 WARN_ONCE(1, "An address 0x%p is out-of-bounds.\n", node); in node_to_id()
981 * an encoded value will be the node-id incremented by 1.
983 * be encoded is [0:nr_vmap_nodes - 1]. If a passed node_id
999 * Returns an encoded node-id, the valid range is within
1000 * [0:nr_vmap_nodes-1] values. Otherwise nr_vmap_nodes is
1006 unsigned int node_id = (val >> BITS_PER_BYTE) - 1; in decode_vn_id()
1031 return (va->va_end - va->va_start); in va_size()
1040 return va ? va->subtree_max_size : 0; in get_subtree_max_size()
1061 struct rb_node *n = root->rb_node; in __find_vmap_area()
1069 if (addr < va->va_start) in __find_vmap_area()
1070 n = n->rb_left; in __find_vmap_area()
1071 else if (addr >= va->va_end) in __find_vmap_area()
1072 n = n->rb_right; in __find_vmap_area()
1085 struct rb_node *n = root->rb_node; in __find_vmap_area_exceed_addr()
1093 if (tmp->va_end > addr) { in __find_vmap_area_exceed_addr()
1095 if (tmp->va_start <= addr) in __find_vmap_area_exceed_addr()
1098 n = n->rb_left; in __find_vmap_area_exceed_addr()
1100 n = n->rb_right; in __find_vmap_area_exceed_addr()
1123 spin_lock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1124 *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root); in find_vmap_area_exceed_addr_lock()
1127 if (!va_start_lowest || (*va)->va_start < va_start_lowest) in find_vmap_area_exceed_addr_lock()
1128 va_start_lowest = (*va)->va_start; in find_vmap_area_exceed_addr_lock()
1129 spin_unlock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1140 spin_lock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1141 *va = __find_vmap_area(va_start_lowest, &vn->busy.root); in find_vmap_area_exceed_addr_lock()
1146 spin_unlock(&vn->busy.lock); in find_vmap_area_exceed_addr_lock()
1155 * and its left or right link for further processing.
1167 struct rb_node **link; in find_va_links() local
1170 link = &root->rb_node; in find_va_links()
1171 if (unlikely(!*link)) { in find_va_links()
1173 return link; in find_va_links()
1176 link = &from; in find_va_links()
1182 * it link, where the new va->rb_node will be attached to. in find_va_links()
1185 tmp_va = rb_entry(*link, struct vmap_area, rb_node); in find_va_links()
1189 * Trigger the BUG() if there are sides(left/right) in find_va_links()
1192 if (va->va_end <= tmp_va->va_start) in find_va_links()
1193 link = &(*link)->rb_left; in find_va_links()
1194 else if (va->va_start >= tmp_va->va_end) in find_va_links()
1195 link = &(*link)->rb_right; in find_va_links()
1197 WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n", in find_va_links()
1198 va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end); in find_va_links()
1202 } while (*link); in find_va_links()
1204 *parent = &tmp_va->rb_node; in find_va_links()
1205 return link; in find_va_links()
1209 get_va_next_sibling(struct rb_node *parent, struct rb_node **link) in get_va_next_sibling() argument
1215 * The red-black tree where we try to find VA neighbors in get_va_next_sibling()
1222 list = &rb_entry(parent, struct vmap_area, rb_node)->list; in get_va_next_sibling()
1223 return (&parent->rb_right == link ? list->next : list); in get_va_next_sibling()
1228 struct rb_node *parent, struct rb_node **link, in __link_va() argument
1236 head = &rb_entry(parent, struct vmap_area, rb_node)->list; in __link_va()
1237 if (&parent->rb_right != link) in __link_va()
1238 head = head->prev; in __link_va()
1241 /* Insert to the rb-tree */ in __link_va()
1242 rb_link_node(&va->rb_node, parent, link); in __link_va()
1246 * to the tree. We do not set va->subtree_max_size to in __link_va()
1253 * the correct order later on. in __link_va()
1255 rb_insert_augmented(&va->rb_node, in __link_va()
1257 va->subtree_max_size = 0; in __link_va()
1259 rb_insert_color(&va->rb_node, root); in __link_va()
1262 /* Address-sort this list */ in __link_va()
1263 list_add(&va->list, head); in __link_va()
1268 struct rb_node *parent, struct rb_node **link, in link_va() argument
1271 __link_va(va, root, parent, link, head, false); in link_va()
1276 struct rb_node *parent, struct rb_node **link, in link_va_augment() argument
1279 __link_va(va, root, parent, link, head, true); in link_va_augment()
1285 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node))) in __unlink_va()
1289 rb_erase_augmented(&va->rb_node, in __unlink_va()
1292 rb_erase(&va->rb_node, root); in __unlink_va()
1294 list_del_init(&va->list); in __unlink_va()
1295 RB_CLEAR_NODE(&va->rb_node); in __unlink_va()
1318 get_subtree_max_size(va->rb_node.rb_left), in compute_subtree_max_size()
1319 get_subtree_max_size(va->rb_node.rb_right)); in compute_subtree_max_size()
1330 if (computed_size != va->subtree_max_size) in augment_tree_propagate_check()
1332 va_size(va), va->subtree_max_size); in augment_tree_propagate_check()
1344 * - After VA has been inserted to the tree(free path);
1345 * - After VA has been shrunk(allocation path);
1346 * - After VA has been increased(merging path).
1352 * 4--8
1356 * 2--2 8--8
1362 * node becomes 4--6.
1372 free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL); in augment_tree_propagate_from()
1383 struct rb_node **link; in insert_vmap_area() local
1386 link = find_va_links(va, root, NULL, &parent); in insert_vmap_area()
1387 if (link) in insert_vmap_area()
1388 link_va(va, root, parent, link, head); in insert_vmap_area()
1396 struct rb_node **link; in insert_vmap_area_augment() local
1400 link = find_va_links(va, NULL, from, &parent); in insert_vmap_area_augment()
1402 link = find_va_links(va, root, NULL, &parent); in insert_vmap_area_augment()
1404 if (link) { in insert_vmap_area_augment()
1405 link_va_augment(va, root, parent, link, head); in insert_vmap_area_augment()
1411 * Merge de-allocated chunk of VA memory with previous
1427 struct rb_node **link; in __merge_or_add_vmap_area() local
1435 link = find_va_links(va, root, NULL, &parent); in __merge_or_add_vmap_area()
1436 if (!link) in __merge_or_add_vmap_area()
1442 next = get_va_next_sibling(parent, link); in __merge_or_add_vmap_area()
1449 * |<------VA------>|<-----Next----->| in __merge_or_add_vmap_area()
1455 if (sibling->va_start == va->va_end) { in __merge_or_add_vmap_area()
1456 sibling->va_start = va->va_start; in __merge_or_add_vmap_area()
1470 * |<-----Prev----->|<------VA------>| in __merge_or_add_vmap_area()
1474 if (next->prev != head) { in __merge_or_add_vmap_area()
1475 sibling = list_entry(next->prev, struct vmap_area, list); in __merge_or_add_vmap_area()
1476 if (sibling->va_end == va->va_start) { in __merge_or_add_vmap_area()
1487 sibling->va_end = va->va_end; in __merge_or_add_vmap_area()
1500 __link_va(va, root, parent, link, head, augment); in __merge_or_add_vmap_area()
1529 if (va->va_start > vstart) in is_within_this_va()
1530 nva_start_addr = ALIGN(va->va_start, align); in is_within_this_va()
1539 return (nva_start_addr + size <= va->va_end); in is_within_this_va()
1558 node = root->rb_node; in find_vmap_lowest_match()
1561 length = adjust_search_size ? size + align - 1 : size; in find_vmap_lowest_match()
1566 if (get_subtree_max_size(node->rb_left) >= length && in find_vmap_lowest_match()
1567 vstart < va->va_start) { in find_vmap_lowest_match()
1568 node = node->rb_left; in find_vmap_lowest_match()
1575 * sub-tree if it does not have a free block that is in find_vmap_lowest_match()
1578 if (get_subtree_max_size(node->rb_right) >= length) { in find_vmap_lowest_match()
1579 node = node->rb_right; in find_vmap_lowest_match()
1584 * OK. We roll back and find the first right sub-tree, in find_vmap_lowest_match()
1594 if (get_subtree_max_size(node->rb_right) >= length && in find_vmap_lowest_match()
1595 vstart <= va->va_start) { in find_vmap_lowest_match()
1599 * to enter same sub-tree after it has already been checked in find_vmap_lowest_match()
1602 vstart = va->va_start + 1; in find_vmap_lowest_match()
1603 node = node->rb_right; in find_vmap_lowest_match()
1667 if (nva_start_addr < va->va_start || in classify_va_fit_type()
1668 nva_start_addr + size > va->va_end) in classify_va_fit_type()
1672 if (va->va_start == nva_start_addr) { in classify_va_fit_type()
1673 if (va->va_end == nva_start_addr + size) in classify_va_fit_type()
1677 } else if (va->va_end == nva_start_addr + size) { in classify_va_fit_type()
1700 * |---------------| in va_clip()
1710 * |-------|-------| in va_clip()
1712 va->va_start += size; in va_clip()
1719 * |-------|-------| in va_clip()
1721 va->va_end = nva_start_addr; in va_clip()
1728 * |---|-------|---| in va_clip()
1733 * For percpu allocator we do not do any pre-allocation in va_clip()
1759 return -ENOMEM; in va_clip()
1765 lva->va_start = va->va_start; in va_clip()
1766 lva->va_end = nva_start_addr; in va_clip()
1771 va->va_start = nva_start_addr + size; in va_clip()
1773 return -EINVAL; in va_clip()
1780 insert_vmap_area_augment(lva, &va->rb_node, root, head); in va_clip()
1795 if (va->va_start > vstart) in va_alloc()
1796 nva_start_addr = ALIGN(va->va_start, align); in va_alloc()
1802 return -ERANGE; in va_alloc()
1834 if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size)) in __alloc_vmap_area()
1839 return -ENOENT; in __alloc_vmap_area()
1856 struct vmap_node *vn = addr_to_node(va->va_start); in free_vmap_area()
1861 spin_lock(&vn->busy.lock); in free_vmap_area()
1862 unlink_va(va, &vn->busy.root); in free_vmap_area()
1863 spin_unlock(&vn->busy.lock); in free_vmap_area()
1883 * We do it in non-atomic context, thus it allows us to use more in preload_this_cpu_lock()
1900 unsigned int idx = (size - 1) / PAGE_SIZE; in size_to_va_pool()
1903 return &vn->pool[idx]; in size_to_va_pool()
1917 spin_lock(&n->pool_lock); in node_pool_add_va()
1918 list_add(&va->list, &vp->head); in node_pool_add_va()
1919 WRITE_ONCE(vp->len, vp->len + 1); in node_pool_add_va()
1920 spin_unlock(&n->pool_lock); in node_pool_add_va()
1935 if (!vp || list_empty(&vp->head)) in node_pool_del_va()
1938 spin_lock(&vn->pool_lock); in node_pool_del_va()
1939 if (!list_empty(&vp->head)) { in node_pool_del_va()
1940 va = list_first_entry(&vp->head, struct vmap_area, list); in node_pool_del_va()
1942 if (IS_ALIGNED(va->va_start, align)) { in node_pool_del_va()
1948 err |= (va->va_start < vstart); in node_pool_del_va()
1949 err |= (va->va_end > vend); in node_pool_del_va()
1952 list_del_init(&va->list); in node_pool_del_va()
1953 WRITE_ONCE(vp->len, vp->len - 1); in node_pool_del_va()
1958 list_move_tail(&va->list, &vp->head); in node_pool_del_va()
1962 spin_unlock(&vn->pool_lock); in node_pool_del_va()
1975 *addr = -EINVAL; in node_alloc()
1990 *addr = va->va_start; in node_alloc()
1998 vm->flags = flags; in setup_vmalloc_vm()
1999 vm->addr = (void *)va->va_start; in setup_vmalloc_vm()
2000 vm->size = vm->requested_size = va_size(va); in setup_vmalloc_vm()
2001 vm->caller = caller; in setup_vmalloc_vm()
2002 va->vm = vm; in setup_vmalloc_vm()
2024 return ERR_PTR(-EINVAL); in alloc_vmap_area()
2027 return ERR_PTR(-EBUSY); in alloc_vmap_area()
2045 return ERR_PTR(-ENOMEM); in alloc_vmap_area()
2051 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask); in alloc_vmap_area()
2072 * returned. Therefore trigger the overflow path. in alloc_vmap_area()
2077 va->va_start = addr; in alloc_vmap_area()
2078 va->va_end = addr + size; in alloc_vmap_area()
2079 va->vm = NULL; in alloc_vmap_area()
2080 va->flags = (va_flags | vn_id); in alloc_vmap_area()
2083 vm->addr = (void *)va->va_start; in alloc_vmap_area()
2084 vm->size = va_size(va); in alloc_vmap_area()
2085 va->vm = vm; in alloc_vmap_area()
2088 vn = addr_to_node(va->va_start); in alloc_vmap_area()
2090 spin_lock(&vn->busy.lock); in alloc_vmap_area()
2091 insert_vmap_area(va, &vn->busy.root, &vn->busy.head); in alloc_vmap_area()
2092 spin_unlock(&vn->busy.lock); in alloc_vmap_area()
2094 BUG_ON(!IS_ALIGNED(va->va_start, align)); in alloc_vmap_area()
2095 BUG_ON(va->va_start < vstart); in alloc_vmap_area()
2096 BUG_ON(va->va_end > vend); in alloc_vmap_area()
2122 pr_warn("vmalloc_node_range for size %lu failed: Address range restricted to %#lx - %#lx\n", in alloc_vmap_area()
2126 return ERR_PTR(-EBUSY); in alloc_vmap_area()
2173 /* for per-CPU blocks */
2203 if (list_empty(&vn->pool[i].head)) in decay_va_pool_node()
2206 /* Detach the pool, so no-one can access it. */ in decay_va_pool_node()
2207 spin_lock(&vn->pool_lock); in decay_va_pool_node()
2208 list_replace_init(&vn->pool[i].head, &tmp_list); in decay_va_pool_node()
2209 spin_unlock(&vn->pool_lock); in decay_va_pool_node()
2211 pool_len = n_decay = vn->pool[i].len; in decay_va_pool_node()
2212 WRITE_ONCE(vn->pool[i].len, 0); in decay_va_pool_node()
2217 pool_len -= n_decay; in decay_va_pool_node()
2220 if (!n_decay--) in decay_va_pool_node()
2223 list_del_init(&va->list); in decay_va_pool_node()
2234 spin_lock(&vn->pool_lock); in decay_va_pool_node()
2235 list_replace_init(&tmp_list, &vn->pool[i].head); in decay_va_pool_node()
2236 WRITE_ONCE(vn->pool[i].len, pool_len); in decay_va_pool_node()
2237 spin_unlock(&vn->pool_lock); in decay_va_pool_node()
2250 start = list_first_entry(&vn->purge_list, struct vmap_area, list)->va_start; in kasan_release_vmalloc_node()
2251 end = list_last_entry(&vn->purge_list, struct vmap_area, list)->va_end; in kasan_release_vmalloc_node()
2253 list_for_each_entry(va, &vn->purge_list, list) { in kasan_release_vmalloc_node()
2254 if (is_vmalloc_or_module_addr((void *) va->va_start)) in kasan_release_vmalloc_node()
2255 kasan_release_vmalloc(va->va_start, va->va_end, in kasan_release_vmalloc_node()
2256 va->va_start, va->va_end, in kasan_release_vmalloc_node()
2274 vn->nr_purged = 0; in purge_vmap_node()
2276 list_for_each_entry_safe(va, n_va, &vn->purge_list, list) { in purge_vmap_node()
2278 unsigned int vn_id = decode_vn_id(va->flags); in purge_vmap_node()
2280 list_del_init(&va->list); in purge_vmap_node()
2283 vn->nr_purged++; in purge_vmap_node()
2285 if (is_vn_id_valid(vn_id) && !vn->skip_populate) in purge_vmap_node()
2290 list_add(&va->list, &local_list); in purge_vmap_node()
2299 * Purges all lazily-freed vmap areas.
2319 INIT_LIST_HEAD(&vn->purge_list); in __purge_vmap_area_lazy()
2320 vn->skip_populate = full_pool_decay; in __purge_vmap_area_lazy()
2323 if (RB_EMPTY_ROOT(&vn->lazy.root)) in __purge_vmap_area_lazy()
2326 spin_lock(&vn->lazy.lock); in __purge_vmap_area_lazy()
2327 WRITE_ONCE(vn->lazy.root.rb_node, NULL); in __purge_vmap_area_lazy()
2328 list_replace_init(&vn->lazy.head, &vn->purge_list); in __purge_vmap_area_lazy()
2329 spin_unlock(&vn->lazy.lock); in __purge_vmap_area_lazy()
2331 start = min(start, list_first_entry(&vn->purge_list, in __purge_vmap_area_lazy()
2332 struct vmap_area, list)->va_start); in __purge_vmap_area_lazy()
2334 end = max(end, list_last_entry(&vn->purge_list, in __purge_vmap_area_lazy()
2335 struct vmap_area, list)->va_end); in __purge_vmap_area_lazy()
2346 nr_purge_helpers = clamp(nr_purge_helpers, 1U, nr_purge_nodes) - 1; in __purge_vmap_area_lazy()
2352 INIT_WORK(&vn->purge_work, purge_vmap_node); in __purge_vmap_area_lazy()
2355 schedule_work_on(i, &vn->purge_work); in __purge_vmap_area_lazy()
2357 schedule_work(&vn->purge_work); in __purge_vmap_area_lazy()
2359 nr_purge_helpers--; in __purge_vmap_area_lazy()
2361 vn->purge_work.func = NULL; in __purge_vmap_area_lazy()
2362 purge_vmap_node(&vn->purge_work); in __purge_vmap_area_lazy()
2363 nr_purged_areas += vn->nr_purged; in __purge_vmap_area_lazy()
2370 if (vn->purge_work.func) { in __purge_vmap_area_lazy()
2371 flush_work(&vn->purge_work); in __purge_vmap_area_lazy()
2372 nr_purged_areas += vn->nr_purged; in __purge_vmap_area_lazy()
2408 unsigned long va_start = va->va_start; in free_vmap_area_noflush()
2409 unsigned int vn_id = decode_vn_id(va->flags); in free_vmap_area_noflush()
2413 if (WARN_ON_ONCE(!list_empty(&va->list))) in free_vmap_area_noflush()
2424 id_to_node(vn_id):addr_to_node(va->va_start); in free_vmap_area_noflush()
2426 spin_lock(&vn->lazy.lock); in free_vmap_area_noflush()
2427 insert_vmap_area(va, &vn->lazy.root, &vn->lazy.head); in free_vmap_area_noflush()
2428 spin_unlock(&vn->lazy.lock); in free_vmap_area_noflush()
2442 flush_cache_vunmap(va->va_start, va->va_end); in free_unmap_vmap_area()
2443 vunmap_range_noflush(va->va_start, va->va_end); in free_unmap_vmap_area()
2445 flush_tlb_kernel_range(va->va_start, va->va_end); in free_unmap_vmap_area()
2462 * addr is not the same as va->va_start, what is not common, we in find_vmap_area()
2465 * <----va----> in find_vmap_area()
2466 * -|-----|-----|-----|-----|- in find_vmap_area()
2476 spin_lock(&vn->busy.lock); in find_vmap_area()
2477 va = __find_vmap_area(addr, &vn->busy.root); in find_vmap_area()
2478 spin_unlock(&vn->busy.lock); in find_vmap_area()
2482 } while ((i = (i + nr_vmap_nodes - 1) % nr_vmap_nodes) != j); in find_vmap_area()
2500 spin_lock(&vn->busy.lock); in find_unlink_vmap_area()
2501 va = __find_vmap_area(addr, &vn->busy.root); in find_unlink_vmap_area()
2503 unlink_va(va, &vn->busy.root); in find_unlink_vmap_area()
2504 spin_unlock(&vn->busy.lock); in find_unlink_vmap_area()
2508 } while ((i = (i + nr_vmap_nodes - 1) % nr_vmap_nodes) != j); in find_unlink_vmap_area()
2521 * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess
2545 * regular operations: Purge if vb->free is less than 1/4 of the capacity.
2550 #define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/
2559 * be allocated. If it is an issue, we can use rb-tree
2581 * In order to fast access to any "vmap_block" associated with a
2584 * A per-cpu vmap_block_queue is used in both ways, to serialize
2587 * overload it, since we already have the per-cpu array which is
2601 * |------|------|------|------|------|------|...<vmap address space>
2604 * - CPU_1 invokes vm_unmap_ram(6), 6 belongs to CPU0 zone, thus
2605 * it access: CPU0/INDEX0 -> vmap_blocks -> xa_lock;
2607 * - CPU_2 invokes vm_unmap_ram(11), 11 belongs to CPU1 zone, thus
2608 * it access: CPU1/INDEX1 -> vmap_blocks -> xa_lock;
2610 * - CPU_0 invokes vm_unmap_ram(20), 20 belongs to CPU2 zone, thus
2611 * it access: CPU2/INDEX2 -> vmap_blocks -> xa_lock.
2624 * if an index points on it which is nr_cpu_ids - 1. in addr_to_vb_xa()
2641 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); in addr_to_vb_idx()
2656 * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
2658 * @order: how many 2^order pages should be occupied in newly allocated block
2661 * Return: virtual address in a newly allocated block or ERR_PTR(-errno)
2663 static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) in new_vmap_block() argument
2678 return ERR_PTR(-ENOMEM); in new_vmap_block()
2689 vaddr = vmap_block_vaddr(va->va_start, 0); in new_vmap_block()
2690 spin_lock_init(&vb->lock); in new_vmap_block()
2691 vb->va = va; in new_vmap_block()
2693 BUG_ON(VMAP_BBMAP_BITS <= (1UL << order)); in new_vmap_block()
2694 bitmap_zero(vb->used_map, VMAP_BBMAP_BITS); in new_vmap_block()
2695 vb->free = VMAP_BBMAP_BITS - (1UL << order); in new_vmap_block()
2696 vb->dirty = 0; in new_vmap_block()
2697 vb->dirty_min = VMAP_BBMAP_BITS; in new_vmap_block()
2698 vb->dirty_max = 0; in new_vmap_block()
2699 bitmap_set(vb->used_map, 0, (1UL << order)); in new_vmap_block()
2700 INIT_LIST_HEAD(&vb->free_list); in new_vmap_block()
2701 vb->cpu = raw_smp_processor_id(); in new_vmap_block()
2703 xa = addr_to_vb_xa(va->va_start); in new_vmap_block()
2704 vb_idx = addr_to_vb_idx(va->va_start); in new_vmap_block()
2713 * rather than vb->cpu due to task migration, which in new_vmap_block()
2718 vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); in new_vmap_block()
2719 spin_lock(&vbq->lock); in new_vmap_block()
2720 list_add_tail_rcu(&vb->free_list, &vbq->free); in new_vmap_block()
2721 spin_unlock(&vbq->lock); in new_vmap_block()
2732 xa = addr_to_vb_xa(vb->va->va_start); in free_vmap_block()
2733 tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start)); in free_vmap_block()
2736 vn = addr_to_node(vb->va->va_start); in free_vmap_block()
2737 spin_lock(&vn->busy.lock); in free_vmap_block()
2738 unlink_va(vb->va, &vn->busy.root); in free_vmap_block()
2739 spin_unlock(&vn->busy.lock); in free_vmap_block()
2741 free_vmap_area_noflush(vb->va); in free_vmap_block()
2748 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu); in purge_fragmented_block()
2750 if (vb->free + vb->dirty != VMAP_BBMAP_BITS || in purge_fragmented_block()
2751 vb->dirty == VMAP_BBMAP_BITS) in purge_fragmented_block()
2755 if (!(force_purge || vb->free < VMAP_PURGE_THRESHOLD)) in purge_fragmented_block()
2759 WRITE_ONCE(vb->free, 0); in purge_fragmented_block()
2761 WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS); in purge_fragmented_block()
2762 vb->dirty_min = 0; in purge_fragmented_block()
2763 vb->dirty_max = VMAP_BBMAP_BITS; in purge_fragmented_block()
2764 spin_lock(&vbq->lock); in purge_fragmented_block()
2765 list_del_rcu(&vb->free_list); in purge_fragmented_block()
2766 spin_unlock(&vbq->lock); in purge_fragmented_block()
2767 list_add_tail(&vb->purge, purge_list); in purge_fragmented_block()
2776 list_del(&vb->purge); in free_purged_blocks()
2788 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in purge_fragmented_blocks()
2789 unsigned long free = READ_ONCE(vb->free); in purge_fragmented_blocks()
2790 unsigned long dirty = READ_ONCE(vb->dirty); in purge_fragmented_blocks()
2796 spin_lock(&vb->lock); in purge_fragmented_blocks()
2798 spin_unlock(&vb->lock); in purge_fragmented_blocks()
2817 unsigned int order; in vb_alloc() local
2827 return ERR_PTR(-EINVAL); in vb_alloc()
2829 order = get_order(size); in vb_alloc()
2833 list_for_each_entry_rcu(vb, &vbq->free, free_list) { in vb_alloc()
2836 if (READ_ONCE(vb->free) < (1UL << order)) in vb_alloc()
2839 spin_lock(&vb->lock); in vb_alloc()
2840 if (vb->free < (1UL << order)) { in vb_alloc()
2841 spin_unlock(&vb->lock); in vb_alloc()
2845 pages_off = VMAP_BBMAP_BITS - vb->free; in vb_alloc()
2846 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off); in vb_alloc()
2847 WRITE_ONCE(vb->free, vb->free - (1UL << order)); in vb_alloc()
2848 bitmap_set(vb->used_map, pages_off, (1UL << order)); in vb_alloc()
2849 if (vb->free == 0) { in vb_alloc()
2850 spin_lock(&vbq->lock); in vb_alloc()
2851 list_del_rcu(&vb->free_list); in vb_alloc()
2852 spin_unlock(&vbq->lock); in vb_alloc()
2855 spin_unlock(&vb->lock); in vb_alloc()
2863 vaddr = new_vmap_block(order, gfp_mask); in vb_alloc()
2871 unsigned int order; in vb_free() local
2880 order = get_order(size); in vb_free()
2881 offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT; in vb_free()
2886 spin_lock(&vb->lock); in vb_free()
2887 bitmap_clear(vb->used_map, offset, (1UL << order)); in vb_free()
2888 spin_unlock(&vb->lock); in vb_free()
2895 spin_lock(&vb->lock); in vb_free()
2898 vb->dirty_min = min(vb->dirty_min, offset); in vb_free()
2899 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order)); in vb_free()
2901 WRITE_ONCE(vb->dirty, vb->dirty + (1UL << order)); in vb_free()
2902 if (vb->dirty == VMAP_BBMAP_BITS) { in vb_free()
2903 BUG_ON(vb->free); in vb_free()
2904 spin_unlock(&vb->lock); in vb_free()
2907 spin_unlock(&vb->lock); in vb_free()
2926 xa_for_each(&vbq->vmap_blocks, idx, vb) { in _vm_unmap_aliases()
2927 spin_lock(&vb->lock); in _vm_unmap_aliases()
2935 vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { in _vm_unmap_aliases()
2936 unsigned long va_start = vb->va->va_start; in _vm_unmap_aliases()
2939 s = va_start + (vb->dirty_min << PAGE_SHIFT); in _vm_unmap_aliases()
2940 e = va_start + (vb->dirty_max << PAGE_SHIFT); in _vm_unmap_aliases()
2946 vb->dirty_min = VMAP_BBMAP_BITS; in _vm_unmap_aliases()
2947 vb->dirty_max = 0; in _vm_unmap_aliases()
2951 spin_unlock(&vb->lock); in _vm_unmap_aliases()
2963 * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
2982 * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
3010 debug_check_no_locks_freed((void *)va->va_start, va_size(va)); in vm_unmap_ram()
3016 * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
3022 * faster than vmap so it's good. But if you mix long-life and short-life
3025 * the end. Please use this function for short-lived objects.
3049 addr = va->va_start; in vm_map_ram()
3061 * With hardware tag-based KASAN, marking is skipped for in vm_map_ram()
3062 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in vm_map_ram()
3075 return vm->page_order; in vm_area_page_order()
3086 static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order) in set_vm_area_page_order() argument
3089 vm->page_order = order; in set_vm_area_page_order()
3091 BUG_ON(order != 0); in set_vm_area_page_order()
3096 * vm_area_add_early - add vmap area early during boot
3100 * vmalloc_init() is called. @vm->addr, @vm->size, and @vm->flags
3110 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { in vm_area_add_early()
3111 if (tmp->addr >= vm->addr) { in vm_area_add_early()
3112 BUG_ON(tmp->addr < vm->addr + vm->size); in vm_area_add_early()
3115 BUG_ON(tmp->addr + tmp->size > vm->addr); in vm_area_add_early()
3117 vm->next = *p; in vm_area_add_early()
3122 * vm_area_register_early - register vmap area early during boot
3127 * vmalloc_init() is called. @vm->size and @vm->flags should contain
3129 * vm->addr contains the allocated address.
3140 for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { in vm_area_register_early()
3141 if ((unsigned long)cur->addr - addr >= vm->size) in vm_area_register_early()
3143 addr = ALIGN((unsigned long)cur->addr + cur->size, align); in vm_area_register_early()
3146 BUG_ON(addr > VMALLOC_END - vm->size); in vm_area_register_early()
3147 vm->addr = (void *)addr; in vm_area_register_early()
3148 vm->next = *p; in vm_area_register_early()
3150 kasan_populate_early_vm_area_shadow(vm->addr, vm->size); in vm_area_register_early()
3161 vm->flags &= ~VM_UNINITIALIZED; in clear_vm_uninitialized_flag()
3189 area->flags = flags; in __get_vm_area_node()
3190 area->caller = caller; in __get_vm_area_node()
3191 area->requested_size = requested_size; in __get_vm_area_node()
3200 * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a in __get_vm_area_node()
3201 * best-effort approach, as they can be mapped outside of vmalloc code. in __get_vm_area_node()
3204 * With hardware tag-based KASAN, marking is skipped for in __get_vm_area_node()
3205 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in __get_vm_area_node()
3208 area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, in __get_vm_area_node()
3223 * get_vm_area - reserve a contiguous kernel virtual area
3250 * find_vm_area - find a continuous kernel virtual area
3267 return va->vm; in find_vm_area()
3271 * remove_vm_area - find and remove a continuous kernel virtual area
3292 if (!va || !va->vm) in remove_vm_area()
3294 vm = va->vm; in remove_vm_area()
3296 debug_check_no_locks_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
3297 debug_check_no_obj_freed(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
3299 kasan_poison_vmalloc(vm->addr, get_vm_area_size(vm)); in remove_vm_area()
3311 for (i = 0; i < area->nr_pages; i++) in set_area_direct_map()
3312 if (page_address(area->pages[i])) in set_area_direct_map()
3313 set_direct_map(area->pages[i]); in set_area_direct_map()
3330 for (i = 0; i < area->nr_pages; i += 1U << page_order) { in vm_reset_perms()
3331 unsigned long addr = (unsigned long)page_address(area->pages[i]); in vm_reset_perms()
3358 llist_for_each_safe(llnode, t, llist_del_all(&p->list)) in delayed_vfree_work()
3363 * vfree_atomic - release memory allocated by vmalloc()
3382 if (addr && llist_add((struct llist_node *)addr, &p->list)) in vfree_atomic()
3383 schedule_work(&p->wq); in vfree_atomic()
3387 * vfree - Release memory allocated by vmalloc()
3401 * conventions for vfree() arch-dependent would be a really bad idea).
3427 if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS)) in vfree()
3430 if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES)) in vfree()
3431 mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages); in vfree()
3432 for (i = 0; i < vm->nr_pages; i++) { in vfree()
3433 struct page *page = vm->pages[i]; in vfree()
3437 * High-order allocs for huge vmallocs are split, so in vfree()
3438 * can be freed as an array of order-0 allocations in vfree()
3443 if (!(vm->flags & VM_MAP_PUT_PAGES)) in vfree()
3444 atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages); in vfree()
3445 kvfree(vm->pages); in vfree()
3451 * vunmap - release virtual mapping obtained by vmap()
3479 * vmap - map an array of pages into virtually contiguous space
3482 * @flags: vm_area->flags
3520 addr = (unsigned long)area->addr; in vmap()
3523 vunmap(area->addr); in vmap()
3528 area->pages = pages; in vmap()
3529 area->nr_pages = count; in vmap()
3531 return area->addr; in vmap()
3545 unsigned long pfn = data->pfns[data->idx]; in vmap_pfn_apply()
3549 return -EINVAL; in vmap_pfn_apply()
3551 ptent = pte_mkspecial(pfn_pte(pfn, data->prot)); in vmap_pfn_apply()
3554 data->idx++; in vmap_pfn_apply()
3559 * vmap_pfn - map an array of PFNs into virtually contiguous space
3576 if (apply_to_page_range(&init_mm, (unsigned long)area->addr, in vmap_pfn()
3582 flush_cache_vmap((unsigned long)area->addr, in vmap_pfn()
3583 (unsigned long)area->addr + count * PAGE_SIZE); in vmap_pfn()
3585 return area->addr; in vmap_pfn()
3592 unsigned int order, unsigned int nr_pages, struct page **pages) in vm_area_alloc_pages() argument
3599 * For order-0 pages we make use of bulk allocator, if in vm_area_alloc_pages()
3604 if (!order) { in vm_area_alloc_pages()
3609 * A maximum allowed request is hard-coded and is 100 in vm_area_alloc_pages()
3610 * pages per call. That is done in order to prevent a in vm_area_alloc_pages()
3611 * long preemption off scenario in the bulk-allocator in vm_area_alloc_pages()
3614 nr_pages_request = min(100U, nr_pages - nr_allocated); in vm_area_alloc_pages()
3641 /* High-order pages or fallback path if "bulk" fails. */ in vm_area_alloc_pages()
3647 page = alloc_pages_noprof(gfp, order); in vm_area_alloc_pages()
3649 page = alloc_pages_node_noprof(nid, gfp, order); in vm_area_alloc_pages()
3655 * High-order allocations must be able to be treated as in vm_area_alloc_pages()
3657 * small-page vmallocs). Some drivers do their own refcounting in vm_area_alloc_pages()
3658 * on vmalloc_to_page() pages, some use page->mapping, in vm_area_alloc_pages()
3659 * page->lru, etc. in vm_area_alloc_pages()
3661 if (order) in vm_area_alloc_pages()
3662 split_page(page, order); in vm_area_alloc_pages()
3665 * Careful, we allocate and map page-order pages, but in vm_area_alloc_pages()
3669 for (i = 0; i < (1U << order); i++) in vm_area_alloc_pages()
3672 nr_allocated += 1U << order; in vm_area_alloc_pages()
3684 unsigned long addr = (unsigned long)area->addr; in __vmalloc_area_node()
3699 area->pages = __vmalloc_node_noprof(array_size, 1, nested_gfp, node, in __vmalloc_area_node()
3700 area->caller); in __vmalloc_area_node()
3702 area->pages = kmalloc_node_noprof(array_size, nested_gfp, node); in __vmalloc_area_node()
3705 if (!area->pages) { in __vmalloc_area_node()
3713 set_vm_area_page_order(area, page_shift - PAGE_SHIFT); in __vmalloc_area_node()
3717 * High-order nofail allocations are really expensive and in __vmalloc_area_node()
3718 * potentially dangerous (pre-mature OOM, disruptive reclaim in __vmalloc_area_node()
3721 * Please note, the __vmalloc_node_range_noprof() falls-back in __vmalloc_area_node()
3722 * to order-0 pages if high-order attempt is unsuccessful. in __vmalloc_area_node()
3724 area->nr_pages = vm_area_alloc_pages((page_order ? in __vmalloc_area_node()
3726 node, page_order, nr_small_pages, area->pages); in __vmalloc_area_node()
3728 atomic_long_add(area->nr_pages, &nr_vmalloc_pages); in __vmalloc_area_node()
3730 if (gfp_mask & __GFP_ACCOUNT && area->nr_pages) in __vmalloc_area_node()
3731 mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC, in __vmalloc_area_node()
3732 area->nr_pages); in __vmalloc_area_node()
3738 if (area->nr_pages != nr_small_pages) { in __vmalloc_area_node()
3741 * also:- in __vmalloc_area_node()
3743 * - a pending fatal signal in __vmalloc_area_node()
3744 * - insufficient huge page-order pages in __vmalloc_area_node()
3746 * Since we always retry allocations at order-0 in the huge page in __vmalloc_area_node()
3752 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3766 ret = vmap_pages_range(addr, addr + size, prot, area->pages, in __vmalloc_area_node()
3780 area->nr_pages * PAGE_SIZE); in __vmalloc_area_node()
3784 return area->addr; in __vmalloc_area_node()
3787 vfree(area->addr); in __vmalloc_area_node()
3792 * __vmalloc_node_range - allocate virtually contiguous memory
3906 * Tag-based KASAN modes only assign tags to normal non-executable in __vmalloc_node_range_noprof()
3914 area->addr = kasan_unpoison_vmalloc(area->addr, size, kasan_flags); in __vmalloc_node_range_noprof()
3926 return area->addr; in __vmalloc_node_range_noprof()
3939 * __vmalloc_node - allocate virtually contiguous memory
3949 * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
3980 * vmalloc - allocate virtually contiguous memory
3999 * vmalloc_huge_node - allocate virtually contiguous memory, allow huge pages
4020 * vzalloc - allocate virtually contiguous memory with zero fill
4040 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
4058 * vmalloc_node - allocate memory on a specific node
4078 * vzalloc_node - allocate memory on a specific node with zero fill
4096 * vrealloc_node_align_noprof - reallocate virtually contiguous memory; contents
4149 old_size = vm->requested_size; in vrealloc_node_align_noprof()
4168 memset((void *)p + size, 0, old_size - size); in vrealloc_node_align_noprof()
4169 vm->requested_size = size; in vrealloc_node_align_noprof()
4170 kasan_poison_vmalloc(p + size, old_size - size); in vrealloc_node_align_noprof()
4178 kasan_unpoison_vmalloc(p + old_size, size - old_size, in vrealloc_node_align_noprof()
4185 vm->requested_size = size; in vrealloc_node_align_noprof()
4217 * vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
4233 * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
4264 remains -= copied; in zero_iter()
4270 return count - remains; in zero_iter()
4290 length = PAGE_SIZE - offset; in aligned_vread_iter()
4309 remains -= copied; in aligned_vread_iter()
4315 return count - remains; in aligned_vread_iter()
4352 spin_lock(&vb->lock); in vmap_ram_vread_iter()
4353 if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { in vmap_ram_vread_iter()
4354 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
4358 for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { in vmap_ram_vread_iter()
4364 start = vmap_block_vaddr(vb->va->va_start, rs); in vmap_ram_vread_iter()
4367 size_t to_zero = min_t(size_t, start - addr, remains); in vmap_ram_vread_iter()
4371 remains -= zeroed; in vmap_ram_vread_iter()
4379 n = ((re - rs + 1) << PAGE_SHIFT) - offset; in vmap_ram_vread_iter()
4386 remains -= copied; in vmap_ram_vread_iter()
4392 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
4395 /* zero-fill the left dirty or free regions */ in vmap_ram_vread_iter()
4396 return count - remains + zero_iter(iter, remains); in vmap_ram_vread_iter()
4399 spin_unlock(&vb->lock); in vmap_ram_vread_iter()
4400 return count - remains; in vmap_ram_vread_iter()
4404 * vread_iter() - read vmalloc area in a safe way to an iterator.
4412 * proper area of @buf. If there are memory holes, they'll be zero-filled.
4440 count = -(unsigned long) addr; in vread_iter()
4449 if ((unsigned long)addr + remains <= va->va_start) in vread_iter()
4458 vm = va->vm; in vread_iter()
4459 flags = va->flags & VMAP_FLAGS_MASK; in vread_iter()
4461 * VMAP_BLOCK indicates a sub-type of vm_map_ram area, need in vread_iter()
4469 if (vm && (vm->flags & VM_UNINITIALIZED)) in vread_iter()
4475 vaddr = (char *) va->va_start; in vread_iter()
4482 size_t to_zero = min_t(size_t, vaddr - addr, remains); in vread_iter()
4486 remains -= zeroed; in vread_iter()
4492 n = vaddr + size - addr; in vread_iter()
4498 else if (!(vm && (vm->flags & (VM_IOREMAP | VM_SPARSE)))) in vread_iter()
4504 remains -= copied; in vread_iter()
4510 next = va->va_end; in vread_iter()
4511 spin_unlock(&vn->busy.lock); in vread_iter()
4516 spin_unlock(&vn->busy.lock); in vread_iter()
4518 /* zero-fill memory holes */ in vread_iter()
4519 return count - remains + zero_iter(iter, remains); in vread_iter()
4523 spin_unlock(&vn->busy.lock); in vread_iter()
4525 return count - remains; in vread_iter()
4529 * remap_vmalloc_range_partial - map vmalloc pages to userspace
4536 * Returns: 0 for success, -Exxx on failure
4554 return -EINVAL; in remap_vmalloc_range_partial()
4559 return -EINVAL; in remap_vmalloc_range_partial()
4563 return -EINVAL; in remap_vmalloc_range_partial()
4565 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT))) in remap_vmalloc_range_partial()
4566 return -EINVAL; in remap_vmalloc_range_partial()
4570 return -EINVAL; in remap_vmalloc_range_partial()
4583 size -= PAGE_SIZE; in remap_vmalloc_range_partial()
4592 * remap_vmalloc_range - map vmalloc pages to userspace
4597 * Returns: 0 for success, -Exxx on failure
4608 return remap_vmalloc_range_partial(vma, vma->vm_start, in remap_vmalloc_range()
4610 vma->vm_end - vma->vm_start); in remap_vmalloc_range()
4617 ret = remove_vm_area(area->addr); in free_vm_area()
4630 * pvm_find_va_enclose_addr - find the vmap_area @addr belongs to
4634 * the first highest(reverse order) vmap_area is returned
4635 * i.e. va->va_start < addr && va->va_end < addr or NULL
4649 if (tmp->va_start <= addr) { in pvm_find_va_enclose_addr()
4651 if (tmp->va_end >= addr) in pvm_find_va_enclose_addr()
4654 n = n->rb_right; in pvm_find_va_enclose_addr()
4656 n = n->rb_left; in pvm_find_va_enclose_addr()
4664 * pvm_determine_end_from_reverse - find the highest aligned address
4667 * in - the VA we start the search(reverse order);
4668 * out - the VA with the highest aligned end address.
4676 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pvm_determine_end_from_reverse()
4682 addr = min((*va)->va_end & ~(align - 1), vmalloc_end); in pvm_determine_end_from_reverse()
4683 if ((*va)->va_start < addr) in pvm_determine_end_from_reverse()
4692 * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
4709 * does everything top-down and scans free blocks from the end looking
4720 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); in pcpu_get_vm_areas()
4750 if (vmalloc_end - vmalloc_start < last_end) { in pcpu_get_vm_areas()
4769 /* start scanning - we scan from the top, begin with the last area */ in pcpu_get_vm_areas()
4775 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4795 if (base + end > va->va_end) { in pcpu_get_vm_areas()
4796 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4804 if (base + start < va->va_start) { in pcpu_get_vm_areas()
4805 va = node_to_va(rb_prev(&va->rb_node)); in pcpu_get_vm_areas()
4806 base = pvm_determine_end_from_reverse(&va, align) - end; in pcpu_get_vm_areas()
4815 area = (area + nr_vms - 1) % nr_vms; in pcpu_get_vm_areas()
4833 /* It is a BUG(), but trigger recovery instead. */ in pcpu_get_vm_areas()
4839 /* It is a BUG(), but trigger recovery instead. */ in pcpu_get_vm_areas()
4844 va->va_start = start; in pcpu_get_vm_areas()
4845 va->va_end = start + size; in pcpu_get_vm_areas()
4852 if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area], GFP_KERNEL)) in pcpu_get_vm_areas()
4858 struct vmap_node *vn = addr_to_node(vas[area]->va_start); in pcpu_get_vm_areas()
4860 spin_lock(&vn->busy.lock); in pcpu_get_vm_areas()
4861 insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head); in pcpu_get_vm_areas()
4864 spin_unlock(&vn->busy.lock); in pcpu_get_vm_areas()
4868 * Mark allocated areas as accessible. Do it now as a best-effort in pcpu_get_vm_areas()
4870 * With hardware tag-based KASAN, marking is skipped for in pcpu_get_vm_areas()
4871 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). in pcpu_get_vm_areas()
4874 vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, in pcpu_get_vm_areas()
4875 vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); in pcpu_get_vm_areas()
4887 while (area--) { in pcpu_get_vm_areas()
4888 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4889 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4894 va->va_start, va->va_end, in pcpu_get_vm_areas()
4939 orig_start = vas[area]->va_start; in pcpu_get_vm_areas()
4940 orig_end = vas[area]->va_end; in pcpu_get_vm_areas()
4945 va->va_start, va->va_end, in pcpu_get_vm_areas()
4957 * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
4986 if (!spin_trylock(&vn->busy.lock)) in vmalloc_dump_obj()
4989 va = __find_vmap_area(addr, &vn->busy.root); in vmalloc_dump_obj()
4990 if (!va || !va->vm) { in vmalloc_dump_obj()
4991 spin_unlock(&vn->busy.lock); in vmalloc_dump_obj()
4995 vm = va->vm; in vmalloc_dump_obj()
4996 addr = (unsigned long) vm->addr; in vmalloc_dump_obj()
4997 caller = vm->caller; in vmalloc_dump_obj()
4998 nr_pages = vm->nr_pages; in vmalloc_dump_obj()
4999 spin_unlock(&vn->busy.lock); in vmalloc_dump_obj()
5001 pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n", in vmalloc_dump_obj()
5014 * and VM_UNINITIALIZED bit in v->flags is disabled.
5027 for (nr = 0; nr < v->nr_pages; nr += step) in show_numa_info()
5028 counters[page_to_nid(v->pages[nr])] += step; in show_numa_info()
5040 spin_lock(&vn->lazy.lock); in show_purge_info()
5041 list_for_each_entry(va, &vn->lazy.head, list) { in show_purge_info()
5042 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", in show_purge_info()
5043 (void *)va->va_start, (void *)va->va_end, in show_purge_info()
5046 spin_unlock(&vn->lazy.lock); in show_purge_info()
5061 spin_lock(&vn->busy.lock); in vmalloc_info_show()
5062 list_for_each_entry(va, &vn->busy.head, list) { in vmalloc_info_show()
5063 if (!va->vm) { in vmalloc_info_show()
5064 if (va->flags & VMAP_RAM) in vmalloc_info_show()
5065 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", in vmalloc_info_show()
5066 (void *)va->va_start, (void *)va->va_end, in vmalloc_info_show()
5072 v = va->vm; in vmalloc_info_show()
5073 if (v->flags & VM_UNINITIALIZED) in vmalloc_info_show()
5079 seq_printf(m, "0x%pK-0x%pK %7ld", in vmalloc_info_show()
5080 v->addr, v->addr + v->size, v->size); in vmalloc_info_show()
5082 if (v->caller) in vmalloc_info_show()
5083 seq_printf(m, " %pS", v->caller); in vmalloc_info_show()
5085 if (v->nr_pages) in vmalloc_info_show()
5086 seq_printf(m, " pages=%d", v->nr_pages); in vmalloc_info_show()
5088 if (v->phys_addr) in vmalloc_info_show()
5089 seq_printf(m, " phys=%pa", &v->phys_addr); in vmalloc_info_show()
5091 if (v->flags & VM_IOREMAP) in vmalloc_info_show()
5094 if (v->flags & VM_SPARSE) in vmalloc_info_show()
5097 if (v->flags & VM_ALLOC) in vmalloc_info_show()
5100 if (v->flags & VM_MAP) in vmalloc_info_show()
5103 if (v->flags & VM_USERMAP) in vmalloc_info_show()
5106 if (v->flags & VM_DMA_COHERENT) in vmalloc_info_show()
5107 seq_puts(m, " dma-coherent"); in vmalloc_info_show()
5109 if (is_vmalloc_addr(v->pages)) in vmalloc_info_show()
5117 spin_unlock(&vn->busy.lock); in vmalloc_info_show()
5147 * -|-----|.....|-----|-----|-----|.....|- in vmap_init_free_space()
5149 * |<--------------------------------->| in vmap_init_free_space()
5151 for (busy = vmlist; busy; busy = busy->next) { in vmap_init_free_space()
5152 if ((unsigned long) busy->addr - vmap_start > 0) { in vmap_init_free_space()
5155 free->va_start = vmap_start; in vmap_init_free_space()
5156 free->va_end = (unsigned long) busy->addr; in vmap_init_free_space()
5164 vmap_start = (unsigned long) busy->addr + busy->size; in vmap_init_free_space()
5167 if (vmap_end - vmap_start > 0) { in vmap_init_free_space()
5170 free->va_start = vmap_start; in vmap_init_free_space()
5171 free->va_end = vmap_end; in vmap_init_free_space()
5191 * As for NUMA-aware notes. For bigger systems, for example in vmap_init_nodes()
5192 * NUMA with multi-sockets, where we can end-up with thousands in vmap_init_nodes()
5193 * of cores in total, a "sub-numa-clustering" should be added. in vmap_init_nodes()
5196 * with dedicated sub-nodes in it which describe one group or in vmap_init_nodes()
5197 * set of cores. Therefore a per-domain purging is supposed to in vmap_init_nodes()
5198 * be added as well as a per-domain balancing. in vmap_init_nodes()
5216 vn->busy.root = RB_ROOT; in vmap_init_nodes()
5217 INIT_LIST_HEAD(&vn->busy.head); in vmap_init_nodes()
5218 spin_lock_init(&vn->busy.lock); in vmap_init_nodes()
5220 vn->lazy.root = RB_ROOT; in vmap_init_nodes()
5221 INIT_LIST_HEAD(&vn->lazy.head); in vmap_init_nodes()
5222 spin_lock_init(&vn->lazy.lock); in vmap_init_nodes()
5225 INIT_LIST_HEAD(&vn->pool[i].head); in vmap_init_nodes()
5226 WRITE_ONCE(vn->pool[i].len, 0); in vmap_init_nodes()
5229 spin_lock_init(&vn->pool_lock); in vmap_init_nodes()
5242 count += READ_ONCE(vn->pool[i].len); in vmap_node_shrink_count()
5277 spin_lock_init(&vbq->lock); in vmalloc_init()
5278 INIT_LIST_HEAD(&vbq->free); in vmalloc_init()
5280 init_llist_head(&p->list); in vmalloc_init()
5281 INIT_WORK(&p->wq, delayed_vfree_work); in vmalloc_init()
5282 xa_init(&vbq->vmap_blocks); in vmalloc_init()
5291 for (tmp = vmlist; tmp; tmp = tmp->next) { in vmalloc_init()
5296 va->va_start = (unsigned long)tmp->addr; in vmalloc_init()
5297 va->va_end = va->va_start + tmp->size; in vmalloc_init()
5298 va->vm = tmp; in vmalloc_init()
5300 vn = addr_to_node(va->va_start); in vmalloc_init()
5301 insert_vmap_area(va, &vn->busy.root, &vn->busy.head); in vmalloc_init()
5310 vmap_node_shrinker = shrinker_alloc(0, "vmap-node"); in vmalloc_init()
5312 pr_err("Failed to allocate vmap-node shrinker!\n"); in vmalloc_init()
5316 vmap_node_shrinker->count_objects = vmap_node_shrink_count; in vmalloc_init()
5317 vmap_node_shrinker->scan_objects = vmap_node_shrink_scan; in vmalloc_init()