Lines Matching +full:d +full:- +full:tlb +full:- +full:sets

1 // SPDX-License-Identifier: GPL-2.0
13 #include <linux/page-isolation.h>
28 #include <linux/backing-dev.h>
35 #include <asm/tlb.h>
42 * and return -ERESTARTNOINTR to have userspace try again.
47 struct mmu_gather *tlb; member
52 * Any behaviour which results in changes to the vma->vm_flags needs to
84 /* Add 1 for NUL terminator at the end of the anon_name->name */ in anon_vma_name_alloc()
88 kref_init(&anon_name->kref); in anon_vma_name_alloc()
89 memcpy(anon_name->name, name, count); in anon_vma_name_alloc()
104 mmap_assert_locked(vma->vm_mm); in anon_vma_name()
106 return vma->anon_name; in anon_vma_name()
109 /* mmap_lock should be write-locked */
116 vma->anon_name = NULL; in replace_anon_vma_name()
124 vma->anon_name = anon_vma_name_reuse(anon_name); in replace_anon_vma_name()
134 return -EINVAL; in replace_anon_vma_name()
150 struct mm_struct *mm = vma->vm_mm; in madvise_update_vma()
154 if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { in madvise_update_vma()
169 if (!vma->vm_file || vma_is_anon_shmem(vma)) { in madvise_update_vma()
182 struct vm_area_struct *vma = walk->private; in swapin_walk_pmd_entry()
194 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in swapin_walk_pmd_entry()
232 XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); in shmem_swapin_range()
233 pgoff_t end_index = linear_page_index(vma, end) - 1; in shmem_swapin_range()
249 addr = vma->vm_start + in shmem_swapin_range()
250 ((xas.xa_index - vma->vm_pgoff) << PAGE_SHIFT); in shmem_swapin_range()
273 struct mm_struct *mm = vma->vm_mm; in madvise_willneed()
274 struct file *file = vma->vm_file; in madvise_willneed()
280 walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma); in madvise_willneed()
285 if (shmem_mapping(file->f_mapping)) { in madvise_willneed()
286 shmem_swapin_range(vma, start, end, file->f_mapping); in madvise_willneed()
292 return -EBADF; in madvise_willneed()
308 offset = (loff_t)(start - vma->vm_start) in madvise_willneed()
309 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); in madvise_willneed()
311 vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); in madvise_willneed()
319 if (!vma->vm_file) in can_do_file_pageout()
322 * paging out pagecache only for non-anonymous mappings that correspond in can_do_file_pageout()
324 * otherwise we'd be including shared non-exclusive mappings, which in can_do_file_pageout()
328 file_inode(vma->vm_file)) || in can_do_file_pageout()
329 file_permission(vma->vm_file, MAY_WRITE) == 0; in can_do_file_pageout()
338 int max_nr = (end - addr) / PAGE_SIZE; in madvise_folio_pte_batch()
348 struct madvise_walk_private *private = walk->private; in madvise_cold_or_pageout_pte_range()
349 struct mmu_gather *tlb = private->tlb; in madvise_cold_or_pageout_pte_range() local
350 bool pageout = private->pageout; in madvise_cold_or_pageout_pte_range()
351 struct mm_struct *mm = tlb->mm; in madvise_cold_or_pageout_pte_range()
352 struct vm_area_struct *vma = walk->vma; in madvise_cold_or_pageout_pte_range()
362 return -EINTR; in madvise_cold_or_pageout_pte_range()
372 tlb_change_page_size(tlb, HPAGE_PMD_SIZE); in madvise_cold_or_pageout_pte_range()
396 if (next - addr != HPAGE_PMD_SIZE) { in madvise_cold_or_pageout_pte_range()
415 tlb_remove_pmd_tlb_entry(tlb, pmd, addr); in madvise_cold_or_pageout_pte_range()
427 list_add(&folio->lru, &folio_list); in madvise_cold_or_pageout_pte_range()
440 tlb_change_page_size(tlb, PAGE_SIZE); in madvise_cold_or_pageout_pte_range()
442 start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in madvise_cold_or_pageout_pte_range()
515 * non-LRU folio. If we have a large folio at this point, we in madvise_cold_or_pageout_pte_range()
529 tlb_remove_tlb_entries(tlb, pte, nr, addr); in madvise_cold_or_pageout_pte_range()
535 * As a side effect, it makes confuse idle-page tracking in madvise_cold_or_pageout_pte_range()
547 list_add(&folio->lru, &folio_list); in madvise_cold_or_pageout_pte_range()
569 static void madvise_cold_page_range(struct mmu_gather *tlb, in madvise_cold_page_range() argument
575 .tlb = tlb, in madvise_cold_page_range()
578 tlb_start_vma(tlb, vma); in madvise_cold_page_range()
579 walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); in madvise_cold_page_range()
580 tlb_end_vma(tlb, vma); in madvise_cold_page_range()
585 return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB)); in can_madv_lru_vma()
592 struct mm_struct *mm = vma->vm_mm; in madvise_cold()
593 struct mmu_gather tlb; in madvise_cold() local
597 return -EINVAL; in madvise_cold()
600 tlb_gather_mmu(&tlb, mm); in madvise_cold()
601 madvise_cold_page_range(&tlb, vma, start_addr, end_addr); in madvise_cold()
602 tlb_finish_mmu(&tlb); in madvise_cold()
607 static void madvise_pageout_page_range(struct mmu_gather *tlb, in madvise_pageout_page_range() argument
613 .tlb = tlb, in madvise_pageout_page_range()
616 tlb_start_vma(tlb, vma); in madvise_pageout_page_range()
617 walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); in madvise_pageout_page_range()
618 tlb_end_vma(tlb, vma); in madvise_pageout_page_range()
625 struct mm_struct *mm = vma->vm_mm; in madvise_pageout()
626 struct mmu_gather tlb; in madvise_pageout() local
630 return -EINVAL; in madvise_pageout()
639 (vma->vm_flags & VM_MAYSHARE))) in madvise_pageout()
643 tlb_gather_mmu(&tlb, mm); in madvise_pageout()
644 madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); in madvise_pageout()
645 tlb_finish_mmu(&tlb); in madvise_pageout()
655 struct mmu_gather *tlb = walk->private; in madvise_free_pte_range() local
656 struct mm_struct *mm = tlb->mm; in madvise_free_pte_range()
657 struct vm_area_struct *vma = walk->vma; in madvise_free_pte_range()
667 if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) in madvise_free_pte_range()
670 tlb_change_page_size(tlb, PAGE_SIZE); in madvise_free_pte_range()
684 * prevent swap-in which is more expensive rather than in madvise_free_pte_range()
692 max_nr = (end - addr) / PAGE_SIZE; in madvise_free_pte_range()
694 nr_swap -= nr; in madvise_free_pte_range()
696 clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm); in madvise_free_pte_range()
699 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); in madvise_free_pte_range()
776 tlb_remove_tlb_entries(tlb, pte, nr, addr); in madvise_free_pte_range()
800 struct mm_struct *mm = vma->vm_mm; in madvise_free_single_vma()
802 struct mmu_gather tlb; in madvise_free_single_vma() local
806 return -EINVAL; in madvise_free_single_vma()
808 range.start = max(vma->vm_start, start_addr); in madvise_free_single_vma()
809 if (range.start >= vma->vm_end) in madvise_free_single_vma()
810 return -EINVAL; in madvise_free_single_vma()
811 range.end = min(vma->vm_end, end_addr); in madvise_free_single_vma()
812 if (range.end <= vma->vm_start) in madvise_free_single_vma()
813 return -EINVAL; in madvise_free_single_vma()
818 tlb_gather_mmu(&tlb, mm); in madvise_free_single_vma()
822 tlb_start_vma(&tlb, vma); in madvise_free_single_vma()
823 walk_page_range(vma->vm_mm, range.start, range.end, in madvise_free_single_vma()
824 &madvise_free_walk_ops, &tlb); in madvise_free_single_vma()
825 tlb_end_vma(&tlb, vma); in madvise_free_single_vma()
827 tlb_finish_mmu(&tlb); in madvise_free_single_vma()
836 * zap_page_range_single call sets things up for shrink_active_list to actually
859 zap_page_range_single(vma, start, end - start, &details); in madvise_dontneed_single_vma()
874 return !(vma->vm_flags & forbidden); in madvise_dontneed_free_valid_vma()
898 struct mm_struct *mm = vma->vm_mm; in madvise_dontneed_free()
902 return -EINVAL; in madvise_dontneed_free()
913 return -ENOMEM; in madvise_dontneed_free()
920 return -EINVAL; in madvise_dontneed_free()
921 if (end > vma->vm_end) { in madvise_dontneed_free()
923 * Don't fail if end > vma->vm_end. If the old in madvise_dontneed_free()
931 * end-vma->vm_end range, but the manager can in madvise_dontneed_free()
934 end = vma->vm_end; in madvise_dontneed_free()
953 return -EINVAL; in madvise_dontneed_free()
972 case -EINTR: in madvise_populate()
973 return -EINTR; in madvise_populate()
974 case -EINVAL: /* Incompatible mappings / permissions. */ in madvise_populate()
975 return -EINVAL; in madvise_populate()
976 case -EHWPOISON: in madvise_populate()
977 return -EHWPOISON; in madvise_populate()
978 case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */ in madvise_populate()
979 return -EFAULT; in madvise_populate()
984 case -ENOMEM: /* No VMA or out of memory. */ in madvise_populate()
985 return -ENOMEM; in madvise_populate()
1004 struct mm_struct *mm = vma->vm_mm; in madvise_remove()
1008 if (vma->vm_flags & VM_LOCKED) in madvise_remove()
1009 return -EINVAL; in madvise_remove()
1011 f = vma->vm_file; in madvise_remove()
1013 if (!f || !f->f_mapping || !f->f_mapping->host) { in madvise_remove()
1014 return -EINVAL; in madvise_remove()
1018 return -EACCES; in madvise_remove()
1020 offset = (loff_t)(start - vma->vm_start) in madvise_remove()
1021 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); in madvise_remove()
1036 offset, end - start); in madvise_remove()
1048 * they'd not be able to fault in. The issue arises when we try to zap in is_valid_guard_vma()
1057 if ((vma->vm_flags & (VM_MAYWRITE | disallowed)) != VM_MAYWRITE) in is_valid_guard_vma()
1091 unsigned long *nr_pages = (unsigned long *)walk->private; in guard_install_pte_entry()
1107 unsigned long *nr_pages = (unsigned long *)walk->private; in guard_install_set_pte()
1133 return -EINVAL; in madvise_guard_install()
1148 * non-guard pages are encountered, give up and zap the range before in madvise_guard_install()
1162 err = walk_page_range_mm(vma->vm_mm, start, end, in madvise_guard_install()
1168 unsigned long nr_expected_pages = PHYS_PFN(end - start); in madvise_guard_install()
1175 * OK some of the range have non-guard pages mapped, zap in madvise_guard_install()
1178 zap_page_range_single(vma, start, end - start, NULL); in madvise_guard_install()
1194 /* If huge, cannot have guard pages present, so no-op - skip. */ in guard_remove_pud_entry()
1196 walk->action = ACTION_CONTINUE; in guard_remove_pud_entry()
1206 /* If huge, cannot have guard pages present, so no-op - skip. */ in guard_remove_pmd_entry()
1208 walk->action = ACTION_CONTINUE; in guard_remove_pmd_entry()
1220 pte_clear_not_present_full(walk->mm, addr, pte, false); in guard_remove_pte_entry()
1221 update_mmu_cache(walk->vma, addr, pte); in guard_remove_pte_entry()
1240 * We're ok with removing guards in mlock()'d ranges, as this is a in madvise_guard_remove()
1241 * non-destructive action. in madvise_guard_remove()
1244 return -EINVAL; in madvise_guard_remove()
1246 return walk_page_range(vma->vm_mm, start, end, in madvise_guard_remove()
1262 unsigned long new_flags = vma->vm_flags; in madvise_vma_behavior()
1265 return -EPERM; in madvise_vma_behavior()
1293 if (vma->vm_flags & VM_IO) in madvise_vma_behavior()
1294 return -EINVAL; in madvise_vma_behavior()
1299 if (vma->vm_file || vma->vm_flags & VM_SHARED) in madvise_vma_behavior()
1300 return -EINVAL; in madvise_vma_behavior()
1304 if (vma->vm_flags & VM_DROPPABLE) in madvise_vma_behavior()
1305 return -EINVAL; in madvise_vma_behavior()
1313 (vma->vm_flags & VM_DROPPABLE)) in madvise_vma_behavior()
1314 return -EINVAL; in madvise_vma_behavior()
1348 if (error == -ENOMEM) in madvise_vma_behavior()
1349 error = -EAGAIN; in madvise_vma_behavior()
1363 return -EPERM; in madvise_inject_error()
1391 if (ret == -EOPNOTSUPP) in madvise_inject_error()
1465 * original range will result in this function returning -ENOMEM while still
1483 * ranges, just ignore them, but return -ENOMEM at the end. in madvise_walk_vmas()
1484 * - different from the way of handling in mlock etc. in madvise_walk_vmas()
1487 if (vma && start > vma->vm_start) in madvise_walk_vmas()
1495 return -ENOMEM; in madvise_walk_vmas()
1497 /* Here start < (end|vma->vm_end). */ in madvise_walk_vmas()
1498 if (start < vma->vm_start) { in madvise_walk_vmas()
1499 unmapped_error = -ENOMEM; in madvise_walk_vmas()
1500 start = vma->vm_start; in madvise_walk_vmas()
1505 /* Here vma->vm_start <= start < (end|vma->vm_end) */ in madvise_walk_vmas()
1506 tmp = vma->vm_end; in madvise_walk_vmas()
1510 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ in madvise_walk_vmas()
1515 if (prev && start < prev->vm_end) in madvise_walk_vmas()
1516 start = prev->vm_end; in madvise_walk_vmas()
1520 vma = find_vma(mm, prev->vm_end); in madvise_walk_vmas()
1537 if (vma->vm_file && !vma_is_anon_shmem(vma)) in madvise_vma_anon_name()
1538 return -EBADF; in madvise_vma_anon_name()
1540 error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, in madvise_vma_anon_name()
1547 if (error == -ENOMEM) in madvise_vma_anon_name()
1548 error = -EAGAIN; in madvise_vma_anon_name()
1559 return -EINVAL; in madvise_set_anon_name()
1562 /* Check to see whether len was rounded up from small -ve to zero */ in madvise_set_anon_name()
1564 return -EINVAL; in madvise_set_anon_name()
1568 return -EINVAL; in madvise_set_anon_name()
1582 * use appropriate read-ahead and caching techniques. The information
1587 * MADV_NORMAL - the default behavior is to read clusters. This
1588 * results in some read-ahead and read-behind.
1589 * MADV_RANDOM - the system should read the minimum amount of data
1590 * on any access, since it is unlikely that the appli-
1592 * MADV_SEQUENTIAL - pages in the given range will probably be accessed
1595 * MADV_WILLNEED - the application is notifying the system to read
1597 * MADV_DONTNEED - the application is finished with the given range,
1599 * MADV_FREE - the application marks pages in the given range as lazy free,
1601 * MADV_REMOVE - the application wants to free up the given range of
1603 * MADV_DONTFORK - omit this area from child's address space when forking:
1605 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
1606 * MADV_WIPEONFORK - present the child process with zero-filled memory in this
1608 * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
1609 * MADV_HWPOISON - trigger memory error handler as if the given memory range
1611 * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
1612 * MADV_MERGEABLE - the application recommends that KSM try to merge pages in
1614 * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
1615 * MADV_HUGEPAGE - the application wants to back the given range by transparent
1618 * MADV_NOHUGEPAGE - mark the given range as not worth being backed by
1621 * MADV_COLLAPSE - synchronously coalesce pages into new THP.
1622 * MADV_DONTDUMP - the application wants to prevent pages in the given range
1624 * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
1625 * MADV_COLD - the application is not expected to use this memory soon,
1628 * MADV_PAGEOUT - the application is not expected to use this memory soon,
1630 * MADV_POPULATE_READ - populate (prefault) page tables readable by
1632 * MADV_POPULATE_WRITE - populate (prefault) page tables writable by
1636 * zero - success
1637 * -EINVAL - start + len < 0, start is not page-aligned,
1640 * or the specified address range includes file, Huge TLB,
1642 * -ENOMEM - addresses in the specified range are not currently
1644 * -EIO - an I/O error occurred while paging in data.
1645 * -EBADF - map exists, but area maps something that isn't a file.
1646 * -EAGAIN - a kernel resource was temporarily unavailable.
1647 * -EPERM - memory is sealed.
1658 return -EINVAL; in do_madvise()
1661 return -EINVAL; in do_madvise()
1664 /* Check to see whether len was rounded up from small -ve to zero */ in do_madvise()
1666 return -EINVAL; in do_madvise()
1670 return -EINVAL; in do_madvise()
1683 return -EINTR; in do_madvise()
1714 return do_madvise(current->mm, start, len_in, behavior); in SYSCALL_DEFINE3()
1739 if (ret == -ERESTARTNOINTR) { in vector_madvise()
1741 ret = -EINTR; in vector_madvise()
1751 ret = (total_len - iov_iter_count(iter)) ? : ret; in vector_madvise()
1768 ret = -EINVAL; in SYSCALL_DEFINE5()
1793 if (mm != current->mm && !process_madvise_remote_valid(behavior)) { in SYSCALL_DEFINE5()
1794 ret = -EINVAL; in SYSCALL_DEFINE5()
1800 * only non-destructive hints are currently supported for remote in SYSCALL_DEFINE5()
1803 if (mm != current->mm && !capable(CAP_SYS_NICE)) { in SYSCALL_DEFINE5()
1804 ret = -EPERM; in SYSCALL_DEFINE5()