Lines Matching +full:current +full:- +full:mode
1 // SPDX-License-Identifier: GPL-2.0-only
24 * a set of weights (per-node), with normal fallback if it
26 * Example: nodeset(0,1) & weights (2,1) - 2 pages allocated
122 #define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */
132 * run-time system-wide default policy => local allocation
136 .mode = MPOL_LOCAL,
142 * iw_table is the sysfs-set interleave weight table, a value of 0 denotes
143 * system-default value should be used. A NULL iw_table also denotes that
144 * system-default values should be used. Until the system-default table
145 * is implemented, the system-default is always 1.
168 * numa_nearest_node - Find nearest node by state
181 return -EINVAL; in numa_nearest_node()
201 struct mempolicy *pol = p->mempolicy; in get_task_policy()
211 if (pol->mode) in get_task_policy()
225 return pol->flags & MPOL_MODE_FLAGS; in mpol_store_user_nodemask()
239 return -EINVAL; in mpol_new_nodemask()
240 pol->nodes = *nodes; in mpol_new_nodemask()
247 return -EINVAL; in mpol_new_preferred()
249 nodes_clear(pol->nodes); in mpol_new_preferred()
250 node_set(first_node(*nodes), pol->nodes); in mpol_new_preferred()
257 * parameter with respect to the policy mode and flags.
272 if (!pol || pol->mode == MPOL_LOCAL) in mpol_set_nodemask()
276 nodes_and(nsc->mask1, in mpol_set_nodemask()
281 if (pol->flags & MPOL_F_RELATIVE_NODES) in mpol_set_nodemask()
282 mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); in mpol_set_nodemask()
284 nodes_and(nsc->mask2, *nodes, nsc->mask1); in mpol_set_nodemask()
287 pol->w.user_nodemask = *nodes; in mpol_set_nodemask()
289 pol->w.cpuset_mems_allowed = cpuset_current_mems_allowed; in mpol_set_nodemask()
291 ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); in mpol_set_nodemask()
299 static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, in mpol_new() argument
304 if (mode == MPOL_DEFAULT) { in mpol_new()
306 return ERR_PTR(-EINVAL); in mpol_new()
314 * All other modes require a valid pointer to a non-empty nodemask. in mpol_new()
316 if (mode == MPOL_PREFERRED) { in mpol_new()
320 return ERR_PTR(-EINVAL); in mpol_new()
322 mode = MPOL_LOCAL; in mpol_new()
324 } else if (mode == MPOL_LOCAL) { in mpol_new()
328 return ERR_PTR(-EINVAL); in mpol_new()
330 return ERR_PTR(-EINVAL); in mpol_new()
334 return ERR_PTR(-ENOMEM); in mpol_new()
335 atomic_set(&policy->refcnt, 1); in mpol_new()
336 policy->mode = mode; in mpol_new()
337 policy->flags = flags; in mpol_new()
338 policy->home_node = NUMA_NO_NODE; in mpol_new()
346 if (!atomic_dec_and_test(&pol->refcnt)) in __mpol_put()
359 if (pol->flags & MPOL_F_STATIC_NODES) in mpol_rebind_nodemask()
360 nodes_and(tmp, pol->w.user_nodemask, *nodes); in mpol_rebind_nodemask()
361 else if (pol->flags & MPOL_F_RELATIVE_NODES) in mpol_rebind_nodemask()
362 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); in mpol_rebind_nodemask()
364 nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed, in mpol_rebind_nodemask()
366 pol->w.cpuset_mems_allowed = *nodes; in mpol_rebind_nodemask()
372 pol->nodes = tmp; in mpol_rebind_nodemask()
378 pol->w.cpuset_mems_allowed = *nodes; in mpol_rebind_preferred()
382 * mpol_rebind_policy - Migrate a policy to a different set of nodes
384 * Per-vma policies are protected by mmap_lock. Allocations using per-task
385 * policies are protected by task->mems_allowed_seq to prevent a premature
390 if (!pol || pol->mode == MPOL_LOCAL) in mpol_rebind_policy()
393 nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) in mpol_rebind_policy()
396 mpol_ops[pol->mode].rebind(pol, newmask); in mpol_rebind_policy()
407 mpol_rebind_policy(tsk->mempolicy, new); in mpol_rebind_task()
413 * Call holding a reference to mm. Takes mm->mmap_lock during call.
423 mpol_rebind_policy(vma->vm_policy, new); in mpol_rebind_mm()
465 * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO in strictly_unmovable()
489 * Check if the folio's nid is in qp->nmask.
491 * If MPOL_MF_INVERT is set in qp->flags, check if the nid is
492 * in the invert of qp->nmask.
498 unsigned long flags = qp->flags; in queue_folio_required()
500 return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); in queue_folio_required()
506 struct queue_pages *qp = walk->private; in queue_folios_pmd()
509 qp->nr_failed++; in queue_folios_pmd()
514 walk->action = ACTION_CONTINUE; in queue_folios_pmd()
519 if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || in queue_folios_pmd()
520 !vma_migratable(walk->vma) || in queue_folios_pmd()
521 !migrate_folio_add(folio, qp->pagelist, qp->flags)) in queue_folios_pmd()
522 qp->nr_failed++; in queue_folios_pmd()
530 * 0 - continue walking to scan for more, even if an existing folio on the
532 * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL,
538 struct vm_area_struct *vma = walk->vma; in queue_folios_pte_range()
540 struct queue_pages *qp = walk->private; in queue_folios_pte_range()
541 unsigned long flags = qp->flags; in queue_folios_pte_range()
553 mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); in queue_folios_pte_range()
555 walk->action = ACTION_AGAIN; in queue_folios_pte_range()
564 qp->nr_failed++; in queue_folios_pte_range()
581 * but may be mapped by many PTEs (and Copy-On-Write may in queue_folios_pte_range()
584 * there can be other cases of multi-mapped pages which in queue_folios_pte_range()
585 * this quick check does not help to filter out - and a in queue_folios_pte_range()
595 if (folio == qp->large) in queue_folios_pte_range()
597 qp->large = folio; in queue_folios_pte_range()
601 !migrate_folio_add(folio, qp->pagelist, flags)) { in queue_folios_pte_range()
602 qp->nr_failed++; in queue_folios_pte_range()
610 if (qp->nr_failed && strictly_unmovable(flags)) in queue_folios_pte_range()
611 return -EIO; in queue_folios_pte_range()
620 struct queue_pages *qp = walk->private; in queue_folios_hugetlb()
621 unsigned long flags = qp->flags; in queue_folios_hugetlb()
626 ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); in queue_folios_hugetlb()
627 entry = huge_ptep_get(walk->mm, addr, pte); in queue_folios_hugetlb()
630 qp->nr_failed++; in queue_folios_hugetlb()
637 !vma_migratable(walk->vma)) { in queue_folios_hugetlb()
638 qp->nr_failed++; in queue_folios_hugetlb()
650 if (!folio_isolate_hugetlb(folio, qp->pagelist)) in queue_folios_hugetlb()
651 qp->nr_failed++; in queue_folios_hugetlb()
654 if (qp->nr_failed && strictly_unmovable(flags)) in queue_folios_hugetlb()
655 return -EIO; in queue_folios_hugetlb()
676 tlb_gather_mmu(&tlb, vma->vm_mm); in change_prot_numa()
681 count_memcg_events_mm(vma->vm_mm, NUMA_PTE_UPDATES, nr_updated); in change_prot_numa()
693 struct vm_area_struct *next, *vma = walk->vma; in queue_pages_test_walk()
694 struct queue_pages *qp = walk->private; in queue_pages_test_walk()
695 unsigned long flags = qp->flags; in queue_pages_test_walk()
700 if (!qp->first) { in queue_pages_test_walk()
701 qp->first = vma; in queue_pages_test_walk()
703 (qp->start < vma->vm_start)) in queue_pages_test_walk()
705 return -EFAULT; in queue_pages_test_walk()
707 next = find_vma(vma->vm_mm, vma->vm_end); in queue_pages_test_walk()
709 ((vma->vm_end < qp->end) && in queue_pages_test_walk()
710 (!next || vma->vm_end < next->vm_start))) in queue_pages_test_walk()
712 return -EFAULT; in queue_pages_test_walk()
715 * Need check MPOL_MF_STRICT to return -EIO if possible in queue_pages_test_walk()
723 * Check page nodes, and queue pages to move, in the current vma. in queue_pages_test_walk()
752 * 0 - all pages already on the right node, or successfully queued for moving
754 * >0 - this number of misplaced folios could not be queued for moving
756 * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs.
757 * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified.
780 err = -EFAULT; in queue_pages_range()
802 if (vma->vm_ops && vma->vm_ops->set_policy) { in vma_replace_policy()
803 err = vma->vm_ops->set_policy(vma, new); in vma_replace_policy()
808 old = vma->vm_policy; in vma_replace_policy()
809 vma->vm_policy = new; /* protected by mmap_lock */ in vma_replace_policy()
825 vmend = min(end, vma->vm_end); in mbind_range()
826 if (start > vma->vm_start) { in mbind_range()
830 vmstart = vma->vm_start; in mbind_range()
833 if (mpol_equal(vma->vm_policy, new_pol)) { in mbind_range()
847 static long do_set_mempolicy(unsigned short mode, unsigned short flags, in do_set_mempolicy() argument
855 return -ENOMEM; in do_set_mempolicy()
857 new = mpol_new(mode, flags, nodes); in do_set_mempolicy()
863 task_lock(current); in do_set_mempolicy()
866 task_unlock(current); in do_set_mempolicy()
871 old = current->mempolicy; in do_set_mempolicy()
872 current->mempolicy = new; in do_set_mempolicy()
873 if (new && (new->mode == MPOL_INTERLEAVE || in do_set_mempolicy()
874 new->mode == MPOL_WEIGHTED_INTERLEAVE)) { in do_set_mempolicy()
875 current->il_prev = MAX_NUMNODES-1; in do_set_mempolicy()
876 current->il_weight = 0; in do_set_mempolicy()
878 task_unlock(current); in do_set_mempolicy()
897 switch (pol->mode) { in get_policy_nodemask()
903 *nodes = pol->nodes; in get_policy_nodemask()
931 struct mm_struct *mm = current->mm; in do_get_mempolicy()
933 struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL; in do_get_mempolicy()
937 return -EINVAL; in do_get_mempolicy()
941 return -EINVAL; in do_get_mempolicy()
943 task_lock(current); in do_get_mempolicy()
945 task_unlock(current); in do_get_mempolicy()
960 return -EFAULT; in do_get_mempolicy()
964 return -EINVAL; in do_get_mempolicy()
984 } else if (pol == current->mempolicy && in do_get_mempolicy()
985 pol->mode == MPOL_INTERLEAVE) { in do_get_mempolicy()
986 *policy = next_node_in(current->il_prev, pol->nodes); in do_get_mempolicy()
987 } else if (pol == current->mempolicy && in do_get_mempolicy()
988 pol->mode == MPOL_WEIGHTED_INTERLEAVE) { in do_get_mempolicy()
989 if (current->il_weight) in do_get_mempolicy()
990 *policy = current->il_prev; in do_get_mempolicy()
992 *policy = next_node_in(current->il_prev, in do_get_mempolicy()
993 pol->nodes); in do_get_mempolicy()
995 err = -EINVAL; in do_get_mempolicy()
1000 pol->mode; in do_get_mempolicy()
1005 *policy |= (pol->flags & MPOL_MODE_FLAGS); in do_get_mempolicy()
1011 *nmask = pol->w.user_nodemask; in do_get_mempolicy()
1013 task_lock(current); in do_get_mempolicy()
1015 task_unlock(current); in do_get_mempolicy()
1041 list_add_tail(&folio->lru, foliolist); in migrate_folio_add()
1047 * Non-movable folio may reach here. And, there may be in migrate_folio_add()
1048 * temporary off LRU folios or non-LRU movable folios. in migrate_folio_add()
1094 nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, in migrate_to_node()
1175 * Example: [2,3,4] -> [3,4,5] moves everything. in do_migrate_pages()
1176 * [0-7] - > [3,4,5] moves only 0,1,2,6,7. in do_migrate_pages()
1218 struct mempolicy *pol = mmpol->pol; in alloc_migration_target_by_mpol()
1219 pgoff_t ilx = mmpol->ilx; in alloc_migration_target_by_mpol()
1225 ilx += src->index >> order; in alloc_migration_target_by_mpol()
1256 return -ENOSYS; in do_migrate_pages()
1267 unsigned short mode, unsigned short mode_flags, in do_mbind() argument
1270 struct mm_struct *mm = current->mm; in do_mbind()
1281 return -EINVAL; in do_mbind()
1283 return -EPERM; in do_mbind()
1286 return -EINVAL; in do_mbind()
1288 if (mode == MPOL_DEFAULT) in do_mbind()
1295 return -EINVAL; in do_mbind()
1299 new = mpol_new(mode, mode_flags, nmask); in do_mbind()
1320 err = -ENOMEM; in do_mbind()
1349 new = get_task_policy(current); in do_mbind()
1361 if (new->mode == MPOL_INTERLEAVE || in do_mbind()
1362 new->mode == MPOL_WEIGHTED_INTERLEAVE) { in do_mbind()
1365 unsigned long addr = -EFAULT; in do_mbind()
1376 if (addr != -EFAULT) in do_mbind()
1380 if (addr != -EFAULT) { in do_mbind()
1386 mmpol.ilx -= folio->index >> order; in do_mbind()
1401 err = -EIO; in do_mbind()
1429 return -EFAULT; in get_bitmap()
1432 mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1; in get_bitmap()
1441 --maxnode; in get_nodes()
1446 return -EINVAL; in get_nodes()
1457 if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits)) in get_nodes()
1458 return -EFAULT; in get_nodes()
1460 if (maxnode - bits >= MAX_NUMNODES) { in get_nodes()
1461 maxnode -= bits; in get_nodes()
1464 t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); in get_nodes()
1467 return -EINVAL; in get_nodes()
1477 unsigned long copy = ALIGN(maxnode-1, 64) / 8; in copy_nodes_to_user()
1486 return -EINVAL; in copy_nodes_to_user()
1487 if (clear_user((char __user *)mask + nbytes, copy - nbytes)) in copy_nodes_to_user()
1488 return -EFAULT; in copy_nodes_to_user()
1497 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; in copy_nodes_to_user()
1501 static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) in sanitize_mpol_flags() argument
1503 *flags = *mode & MPOL_MODE_FLAGS; in sanitize_mpol_flags()
1504 *mode &= ~MPOL_MODE_FLAGS; in sanitize_mpol_flags()
1506 if ((unsigned int)(*mode) >= MPOL_MAX) in sanitize_mpol_flags()
1507 return -EINVAL; in sanitize_mpol_flags()
1509 return -EINVAL; in sanitize_mpol_flags()
1511 if (*mode == MPOL_BIND || *mode == MPOL_PREFERRED_MANY) in sanitize_mpol_flags()
1514 return -EINVAL; in sanitize_mpol_flags()
1520 unsigned long mode, const unsigned long __user *nmask, in kernel_mbind() argument
1525 int lmode = mode; in kernel_mbind()
1543 struct mm_struct *mm = current->mm; in SYSCALL_DEFINE4()
1547 int err = -ENOENT; in SYSCALL_DEFINE4()
1552 return -EINVAL; in SYSCALL_DEFINE4()
1557 return -EINVAL; in SYSCALL_DEFINE4()
1564 return -EINVAL; in SYSCALL_DEFINE4()
1570 return -EINVAL; in SYSCALL_DEFINE4()
1586 if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { in SYSCALL_DEFINE4()
1587 err = -EOPNOTSUPP; in SYSCALL_DEFINE4()
1597 new->home_node = home_node; in SYSCALL_DEFINE4()
1608 unsigned long, mode, const unsigned long __user *, nmask, in SYSCALL_DEFINE6() argument
1611 return kernel_mbind(start, len, mode, nmask, maxnode, flags); in SYSCALL_DEFINE6()
1615 static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, in kernel_set_mempolicy() argument
1620 int lmode = mode; in kernel_set_mempolicy()
1634 SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, in SYSCALL_DEFINE3() argument
1637 return kernel_set_mempolicy(mode, nmask, maxnode); in SYSCALL_DEFINE3()
1653 return -ENOMEM; in kernel_migrate_pages()
1655 old = &scratch->mask1; in kernel_migrate_pages()
1656 new = &scratch->mask2; in kernel_migrate_pages()
1668 task = pid ? find_task_by_vpid(pid) : current; in kernel_migrate_pages()
1671 err = -ESRCH; in kernel_migrate_pages()
1676 err = -EINVAL; in kernel_migrate_pages()
1684 err = -EPERM; in kernel_migrate_pages()
1692 err = -EPERM; in kernel_migrate_pages()
1696 task_nodes = cpuset_mems_allowed(current); in kernel_migrate_pages()
1709 err = -EINVAL; in kernel_migrate_pages()
1746 return -EINVAL; in kernel_get_mempolicy()
1756 return -EFAULT; in kernel_get_mempolicy()
1773 if (vma->vm_flags & (VM_IO | VM_PFNMAP)) in vma_migratable()
1792 if (vma->vm_file && in vma_migratable()
1793 gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) in vma_migratable()
1803 return (vma->vm_ops && vma->vm_ops->get_policy) ? in __get_vma_policy()
1804 vma->vm_ops->get_policy(vma, addr, ilx) : vma->vm_policy; in __get_vma_policy()
1816 * Falls back to current->mempolicy or system default policy, as necessary.
1818 * count--added by the get_policy() vm_op, as appropriate--to protect against
1829 pol = get_task_policy(current); in get_vma_policy()
1830 if (pol->mode == MPOL_INTERLEAVE || in get_vma_policy()
1831 pol->mode == MPOL_WEIGHTED_INTERLEAVE) { in get_vma_policy()
1832 *ilx += vma->vm_pgoff >> order; in get_vma_policy()
1833 *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); in get_vma_policy()
1842 if (vma->vm_ops && vma->vm_ops->get_policy) { in vma_policy_mof()
1846 pol = vma->vm_ops->get_policy(vma, vma->vm_start, &ilx); in vma_policy_mof()
1847 if (pol && (pol->flags & MPOL_F_MOF)) in vma_policy_mof()
1854 pol = vma->vm_policy; in vma_policy_mof()
1856 pol = get_task_policy(current); in vma_policy_mof()
1858 return pol->flags & MPOL_F_MOF; in vma_policy_mof()
1868 * if policy->nodes has movable memory only, in apply_policy_zone()
1871 * policy->nodes is intersect with node_states[N_MEMORY]. in apply_policy_zone()
1873 * policy->nodes has movable memory only. in apply_policy_zone()
1875 if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) in apply_policy_zone()
1887 /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */ in weighted_interleave_nodes()
1889 node = current->il_prev; in weighted_interleave_nodes()
1890 if (!current->il_weight || !node_isset(node, policy->nodes)) { in weighted_interleave_nodes()
1891 node = next_node_in(node, policy->nodes); in weighted_interleave_nodes()
1896 current->il_prev = node; in weighted_interleave_nodes()
1897 current->il_weight = get_il_weight(node); in weighted_interleave_nodes()
1899 current->il_weight--; in weighted_interleave_nodes()
1909 /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ in interleave_nodes()
1912 nid = next_node_in(current->il_prev, policy->nodes); in interleave_nodes()
1916 current->il_prev = nid; in interleave_nodes()
1932 policy = current->mempolicy; in mempolicy_slab_node()
1936 switch (policy->mode) { in mempolicy_slab_node()
1938 return first_node(policy->nodes); in mempolicy_slab_node()
1957 zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; in mempolicy_slab_node()
1959 &policy->nodes); in mempolicy_slab_node()
1979 memcpy(mask, &pol->nodes, sizeof(nodemask_t)); in read_once_policy_nodemask()
2016 target -= weight; in weighted_interleave_nid()
2025 * node in pol->nodes (starting from ilx=0), wrapping around if ilx
2054 switch (pol->mode) { in policy_nodemask()
2057 *nid = first_node(pol->nodes); in policy_nodemask()
2060 nodemask = &pol->nodes; in policy_nodemask()
2061 if (pol->home_node != NUMA_NO_NODE) in policy_nodemask()
2062 *nid = pol->home_node; in policy_nodemask()
2067 cpuset_nodemask_valid_mems_allowed(&pol->nodes)) in policy_nodemask()
2068 nodemask = &pol->nodes; in policy_nodemask()
2069 if (pol->home_node != NUMA_NO_NODE) in policy_nodemask()
2070 *nid = pol->home_node; in policy_nodemask()
2100 * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy
2104 * If the effective policy is 'bind' or 'prefer-many', returns a pointer
2114 *mpol = get_vma_policy(vma, addr, hstate_vma(vma)->order, &ilx); in huge_node()
2122 * If the current task's mempolicy is "default" [NULL], return 'false'
2127 * of non-default mempolicy.
2130 * because the current task is examining it's own mempolicy and a task's
2139 if (!(mask && current->mempolicy)) in init_nodemask_of_mempolicy()
2142 task_lock(current); in init_nodemask_of_mempolicy()
2143 mempolicy = current->mempolicy; in init_nodemask_of_mempolicy()
2144 switch (mempolicy->mode) { in init_nodemask_of_mempolicy()
2150 *mask = mempolicy->nodes; in init_nodemask_of_mempolicy()
2160 task_unlock(current); in init_nodemask_of_mempolicy()
2186 mempolicy = tsk->mempolicy; in mempolicy_in_oom_domain()
2187 if (mempolicy && mempolicy->mode == MPOL_BIND) in mempolicy_in_oom_domain()
2188 ret = nodes_intersects(mempolicy->nodes, *mask); in mempolicy_in_oom_domain()
2216 * alloc_pages_mpol - Allocate pages according to NUMA mempolicy.
2233 if (pol->mode == MPOL_PREFERRED_MANY) in alloc_pages_mpol()
2240 * For hugepage allocation and non-interleave policy which in alloc_pages_mpol()
2241 * allows the current node (or other explicitly preferred in alloc_pages_mpol()
2242 * node) we only try to allocate from the current/preferred in alloc_pages_mpol()
2246 * If the policy is interleave or does not allow the current in alloc_pages_mpol()
2249 if (pol->mode != MPOL_INTERLEAVE && in alloc_pages_mpol()
2250 pol->mode != MPOL_WEIGHTED_INTERLEAVE && in alloc_pages_mpol()
2272 if (unlikely(pol->mode == MPOL_INTERLEAVE || in alloc_pages_mpol()
2273 pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) { in alloc_pages_mpol()
2299 * vma_alloc_folio - Allocate a folio for a VMA.
2320 if (vma->vm_flags & VM_DROPPABLE) in vma_alloc_folio_noprof()
2335 * No reference counting needed for current->mempolicy in alloc_frozen_pages_noprof()
2339 pol = get_task_policy(current); in alloc_frozen_pages_noprof()
2346 * alloc_pages - Allocate pages.
2351 * first page is naturally aligned (eg an order-3 allocation will be aligned
2352 * to a multiple of 8 * PAGE_SIZE bytes). The NUMA policy of the current
2386 nodes = nodes_weight(pol->nodes); in alloc_pages_bulk_interleave()
2388 delta = nr_pages - nodes * nr_pages_per_node; in alloc_pages_bulk_interleave()
2396 delta--; in alloc_pages_bulk_interleave()
2414 struct task_struct *me = current; in alloc_pages_bulk_weighted_interleave()
2425 int resume_node = MAX_NUMNODES - 1; in alloc_pages_bulk_weighted_interleave()
2444 node = me->il_prev; in alloc_pages_bulk_weighted_interleave()
2445 weight = me->il_weight; in alloc_pages_bulk_weighted_interleave()
2454 me->il_weight -= rem_pages; in alloc_pages_bulk_weighted_interleave()
2458 rem_pages -= weight; in alloc_pages_bulk_weighted_interleave()
2461 me->il_weight = 0; in alloc_pages_bulk_weighted_interleave()
2500 delta -= weight; in alloc_pages_bulk_weighted_interleave()
2505 resume_weight = weight - delta; in alloc_pages_bulk_weighted_interleave()
2519 me->il_prev = resume_node; in alloc_pages_bulk_weighted_interleave()
2520 me->il_weight = resume_weight; in alloc_pages_bulk_weighted_interleave()
2535 nr_allocated = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes, in alloc_pages_bulk_preferred_many()
2540 nr_pages - nr_allocated, in alloc_pages_bulk_preferred_many()
2559 pol = get_task_policy(current); in alloc_pages_bulk_mempolicy_noprof()
2561 if (pol->mode == MPOL_INTERLEAVE) in alloc_pages_bulk_mempolicy_noprof()
2565 if (pol->mode == MPOL_WEIGHTED_INTERLEAVE) in alloc_pages_bulk_mempolicy_noprof()
2569 if (pol->mode == MPOL_PREFERRED_MANY) in alloc_pages_bulk_mempolicy_noprof()
2581 struct mempolicy *pol = mpol_dup(src->vm_policy); in vma_dup_policy()
2585 dst->vm_policy = pol; in vma_dup_policy()
2590 * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
2596 * current's mempolicy may be rebinded by the other task(the task that changes
2597 * cpuset's mems), so we needn't do rebind work for current task.
2606 return ERR_PTR(-ENOMEM); in __mpol_dup()
2609 if (old == current->mempolicy) { in __mpol_dup()
2610 task_lock(current); in __mpol_dup()
2612 task_unlock(current); in __mpol_dup()
2617 nodemask_t mems = cpuset_mems_allowed(current); in __mpol_dup()
2620 atomic_set(&new->refcnt, 1); in __mpol_dup()
2629 if (a->mode != b->mode) in __mpol_equal()
2631 if (a->flags != b->flags) in __mpol_equal()
2633 if (a->home_node != b->home_node) in __mpol_equal()
2636 if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) in __mpol_equal()
2639 switch (a->mode) { in __mpol_equal()
2645 return !!nodes_equal(a->nodes, b->nodes); in __mpol_equal()
2658 * The policies are kept in Red-Black tree linked from the inode.
2659 * They are protected by the sp->lock rwlock, which should be held
2664 * lookup first element intersecting start-end. Caller holds sp->lock for
2670 struct rb_node *n = sp->root.rb_node; in sp_lookup()
2675 if (start >= p->end) in sp_lookup()
2676 n = n->rb_right; in sp_lookup()
2677 else if (end <= p->start) in sp_lookup()
2678 n = n->rb_left; in sp_lookup()
2690 if (w->end <= start) in sp_lookup()
2698 * Insert a new shared policy into the list. Caller holds sp->lock for
2703 struct rb_node **p = &sp->root.rb_node; in sp_insert()
2710 if (new->start < nd->start) in sp_insert()
2711 p = &(*p)->rb_left; in sp_insert()
2712 else if (new->end > nd->end) in sp_insert()
2713 p = &(*p)->rb_right; in sp_insert()
2717 rb_link_node(&new->nd, parent, p); in sp_insert()
2718 rb_insert_color(&new->nd, &sp->root); in sp_insert()
2728 if (!sp->root.rb_node) in mpol_shared_policy_lookup()
2730 read_lock(&sp->lock); in mpol_shared_policy_lookup()
2733 mpol_get(sn->policy); in mpol_shared_policy_lookup()
2734 pol = sn->policy; in mpol_shared_policy_lookup()
2736 read_unlock(&sp->lock); in mpol_shared_policy_lookup()
2742 mpol_put(n->policy); in sp_free()
2747 * mpol_misplaced - check whether current folio node is valid in policy
2753 * Lookup current policy node id for vma,addr and "compare to" folio's
2767 struct vm_area_struct *vma = vmf->vma; in mpol_misplaced()
2777 lockdep_assert_held(vmf->ptl); in mpol_misplaced()
2779 if (!(pol->flags & MPOL_F_MOF)) in mpol_misplaced()
2782 switch (pol->mode) { in mpol_misplaced()
2792 if (node_isset(curnid, pol->nodes)) in mpol_misplaced()
2794 polnid = first_node(pol->nodes); in mpol_misplaced()
2812 if (pol->flags & MPOL_F_MORON) { in mpol_misplaced()
2817 if (node_isset(thisnid, pol->nodes)) in mpol_misplaced()
2823 * use current page if in policy nodemask, in mpol_misplaced()
2825 * If no allowed nodes, use current [!misplaced]. in mpol_misplaced()
2827 if (node_isset(curnid, pol->nodes)) in mpol_misplaced()
2832 &pol->nodes); in mpol_misplaced()
2841 if (pol->flags & MPOL_F_MORON) { in mpol_misplaced()
2844 if (!should_numa_migrate_memory(current, folio, curnid, in mpol_misplaced()
2858 * Drop the (possibly final) reference to task->mempolicy. It needs to be
2859 * dropped after task->mempolicy is set to NULL so that any allocation done as
2868 pol = task->mempolicy; in mpol_put_task_policy()
2869 task->mempolicy = NULL; in mpol_put_task_policy()
2876 rb_erase(&n->nd, &sp->root); in sp_delete()
2883 node->start = start; in sp_node_init()
2884 node->end = end; in sp_node_init()
2885 node->policy = pol; in sp_node_init()
2903 newpol->flags |= MPOL_F_SHARED; in sp_alloc()
2919 write_lock(&sp->lock); in shared_policy_replace()
2922 while (n && n->start < end) { in shared_policy_replace()
2923 struct rb_node *next = rb_next(&n->nd); in shared_policy_replace()
2924 if (n->start >= start) { in shared_policy_replace()
2925 if (n->end <= end) in shared_policy_replace()
2928 n->start = end; in shared_policy_replace()
2931 if (n->end > end) { in shared_policy_replace()
2935 *mpol_new = *n->policy; in shared_policy_replace()
2936 atomic_set(&mpol_new->refcnt, 1); in shared_policy_replace()
2937 sp_node_init(n_new, end, n->end, mpol_new); in shared_policy_replace()
2938 n->end = start; in shared_policy_replace()
2944 n->end = start; in shared_policy_replace()
2952 write_unlock(&sp->lock); in shared_policy_replace()
2964 write_unlock(&sp->lock); in shared_policy_replace()
2965 ret = -ENOMEM; in shared_policy_replace()
2972 atomic_set(&mpol_new->refcnt, 1); in shared_policy_replace()
2977 * mpol_shared_policy_init - initialize shared policy for inode
2981 * Install non-NULL @mpol in inode's shared policy rb-tree.
2982 * On entry, the current task has a reference on a non-NULL @mpol.
2990 sp->root = RB_ROOT; /* empty tree == default mempolicy */ in mpol_shared_policy_init()
2991 rwlock_init(&sp->lock); in mpol_shared_policy_init()
3002 npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); in mpol_shared_policy_init()
3006 task_lock(current); in mpol_shared_policy_init()
3007 ret = mpol_set_nodemask(npol, &mpol->w.user_nodemask, scratch); in mpol_shared_policy_init()
3008 task_unlock(current); in mpol_shared_policy_init()
3033 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, pol); in mpol_set_shared_policy()
3035 return -ENOMEM; in mpol_set_shared_policy()
3037 err = shared_policy_replace(sp, vma->vm_pgoff, vma->vm_pgoff + sz, new); in mpol_set_shared_policy()
3049 if (!sp->root.rb_node) in mpol_free_shared_policy()
3051 write_lock(&sp->lock); in mpol_free_shared_policy()
3052 next = rb_first(&sp->root); in mpol_free_shared_policy()
3055 next = rb_next(&n->nd); in mpol_free_shared_policy()
3058 write_unlock(&sp->lock); in mpol_free_shared_policy()
3071 /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ in check_numabalancing_enable()
3092 numabalancing_override = -1; in setup_numabalancing()
3125 .mode = MPOL_PREFERRED, in numa_policy_init()
3161 /* Reset policy of current process to default */
3183 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
3188 * <mode>[=<flags>][:<nodelist>]
3199 int err = 1, mode; in mpol_parse_str() local
3202 *flags++ = '\0'; /* terminate mode string */ in mpol_parse_str()
3205 /* NUL-terminate mode or flags string */ in mpol_parse_str()
3214 mode = match_string(policy_modes, MPOL_MAX, str); in mpol_parse_str()
3215 if (mode < 0) in mpol_parse_str()
3218 switch (mode) { in mpol_parse_str()
3270 * mode flags. in mpol_parse_str()
3280 new = mpol_new(mode, mode_flags, &nodes); in mpol_parse_str()
3288 if (mode != MPOL_PREFERRED) { in mpol_parse_str()
3289 new->nodes = nodes; in mpol_parse_str()
3291 nodes_clear(new->nodes); in mpol_parse_str()
3292 node_set(first_node(nodes), new->nodes); in mpol_parse_str()
3294 new->mode = MPOL_LOCAL; in mpol_parse_str()
3301 new->w.user_nodemask = nodes; in mpol_parse_str()
3308 *--nodelist = ':'; in mpol_parse_str()
3310 *--flags = '='; in mpol_parse_str()
3318 * mpol_to_str - format a mempolicy structure for printing
3324 * Recommend a @maxlen of at least 51 for the longest mode, "weighted
3332 unsigned short mode = MPOL_DEFAULT; in mpol_to_str() local
3338 pol <= &preferred_node_policy[ARRAY_SIZE(preferred_node_policy) - 1])) { in mpol_to_str()
3339 mode = pol->mode; in mpol_to_str()
3340 flags = pol->flags; in mpol_to_str()
3343 switch (mode) { in mpol_to_str()
3352 nodes = pol->nodes; in mpol_to_str()
3360 p += snprintf(p, maxlen, "%s", policy_modes[mode]); in mpol_to_str()
3363 p += snprintf(p, buffer + maxlen - p, "="); in mpol_to_str()
3369 p += snprintf(p, buffer + maxlen - p, "static"); in mpol_to_str()
3371 p += snprintf(p, buffer + maxlen - p, "relative"); in mpol_to_str()
3375 p += snprintf(p, buffer + maxlen - p, "|"); in mpol_to_str()
3376 p += snprintf(p, buffer + maxlen - p, "balancing"); in mpol_to_str()
3381 p += scnprintf(p, buffer + maxlen - p, ":%*pbl", in mpol_to_str()
3398 weight = get_il_weight(node_attr->nid); in node_show()
3414 return -EINVAL; in node_store()
3418 return -ENOMEM; in node_store()
3425 new[node_attr->nid] = weight; in node_store()
3440 sysfs_remove_file(parent, &node_attr->kobj_attr.attr); in sysfs_wi_node_release()
3441 kfree(node_attr->kobj_attr.attr.name); in sysfs_wi_node_release()
3466 return -ENOMEM; in add_weight_node()
3471 return -ENOMEM; in add_weight_node()
3474 sysfs_attr_init(&node_attr->kobj_attr.attr); in add_weight_node()
3475 node_attr->kobj_attr.attr.name = name; in add_weight_node()
3476 node_attr->kobj_attr.attr.mode = 0644; in add_weight_node()
3477 node_attr->kobj_attr.show = node_show; in add_weight_node()
3478 node_attr->kobj_attr.store = node_store; in add_weight_node()
3479 node_attr->nid = nid; in add_weight_node()
3481 if (sysfs_create_file(wi_kobj, &node_attr->kobj_attr.attr)) { in add_weight_node()
3482 kfree(node_attr->kobj_attr.attr.name); in add_weight_node()
3485 return -ENOMEM; in add_weight_node()
3499 return -ENOMEM; in add_weighted_interleave_group()
3546 err = -ENOMEM; in mempolicy_sysfs_init()
3553 err = -ENOMEM; in mempolicy_sysfs_init()