Lines Matching +full:io +full:- +full:backends
1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
69 #include <linux/memory-tiers.h>
82 #include <asm/io.h>
89 #include "pgalloc-track.h"
94 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
110 * Return true if the original pte was a uffd-wp pte marker (so the pte was
111 * wr-protected).
115 if (!userfaultfd_wp(vmf->vma)) in vmf_orig_pte_uffd_wp()
117 if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) in vmf_orig_pte_uffd_wp()
120 return pte_marker_uffd_wp(vmf->orig_pte); in vmf_orig_pte_uffd_wp()
193 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
221 if (end - 1 > ceiling - 1) in free_pmd_range()
227 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
255 if (end - 1 > ceiling - 1) in free_pud_range()
261 mm_dec_nr_puds(tlb->mm); in free_pud_range()
289 if (end - 1 > ceiling - 1) in free_p4d_range()
298 * This function frees user-level page tables of a process.
314 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
315 * of the address space and the top of it (using -1 for the in free_pgd_range()
319 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
330 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
344 if (end - 1 > ceiling - 1) in free_pgd_range()
345 end -= PMD_SIZE; in free_pgd_range()
346 if (addr > end - 1) in free_pgd_range()
353 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
369 unsigned long addr = vma->vm_start; in free_pgtables()
376 next = mas_find(mas, ceiling - 1); in free_pgtables()
390 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
391 floor, next ? next->vm_start : ceiling); in free_pgtables()
399 while (next && next->vm_start <= vma->vm_end + PMD_SIZE in free_pgtables()
402 next = mas_find(mas, ceiling - 1); in free_pgtables()
411 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
412 floor, next ? next->vm_start : ceiling); in free_pgtables()
432 * of a chain of data-dependent loads, meaning most CPUs (alpha in pmd_install()
434 * seen in-order. See the alpha page table accessors for the in pmd_install()
448 return -ENOMEM; in __pte_alloc()
460 return -ENOMEM; in __pte_alloc_kernel()
490 * is found. For example, we might have a PFN-mapped pte in
498 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); in print_bad_pte()
527 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_pte()
531 current->comm, in print_bad_pte()
536 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_pte()
538 vma->vm_file, in print_bad_pte()
539 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_pte()
540 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_pte()
541 mapping ? mapping->a_ops->read_folio : NULL); in print_bad_pte()
547 * vm_normal_page -- This function gets the "struct page" associated with a pte.
567 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
599 if (vma->vm_ops && vma->vm_ops->find_special_page) in vm_normal_page()
600 return vma->vm_ops->find_special_page(vma, addr); in vm_normal_page()
601 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vm_normal_page()
622 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page()
623 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page()
631 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page()
632 if (pfn == vma->vm_pgoff + off) in vm_normal_page()
634 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page()
677 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page_pmd()
678 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page_pmd()
684 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page_pmd()
685 if (pfn == vma->vm_pgoff + off) in vm_normal_page_pmd()
687 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page_pmd()
728 pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); in restore_exclusive_pte()
754 set_pte_at(vma->vm_mm, address, ptep, pte); in restore_exclusive_pte()
757 * No need to invalidate - it was non-present before. However in restore_exclusive_pte()
780 return -EBUSY; in try_restore_exclusive_pte()
794 unsigned long vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
803 return -EIO; in copy_nonpresent_pte()
806 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
808 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
809 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
810 &src_mm->mmlist); in copy_nonpresent_pte()
859 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
881 VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); in copy_nonpresent_pte()
883 return -EBUSY; in copy_nonpresent_pte()
884 return -ENOENT; in copy_nonpresent_pte()
904 * and re-use the pte the traditional way.
906 * And if we need a pre-allocated page but don't yet have
921 return -EAGAIN; in copy_present_page()
928 if (copy_mc_user_highpage(&new_folio->page, page, addr, src_vma)) in copy_present_page()
929 return -EHWPOISON; in copy_present_page()
938 pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot); in copy_present_page()
941 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
943 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
951 struct mm_struct *src_mm = src_vma->vm_mm; in __copy_present_ptes()
954 if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) { in __copy_present_ptes()
960 if (src_vma->vm_flags & VM_SHARED) in __copy_present_ptes()
967 set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr); in __copy_present_ptes()
971 * Copy one present PTE, trying to batch-process subsequent PTEs that map
974 * Returns -EAGAIN if one preallocated page is required to copy the next PTE.
1000 if (src_vma->vm_flags & VM_SHARED) in copy_present_ptes()
1012 return -EAGAIN; in copy_present_ptes()
1081 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
1082 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
1102 * protected by mmap_lock-less collapse skipping areas with anon_vma in copy_pte_range()
1108 ret = -ENOMEM; in copy_pte_range()
1114 * retract_page_tables() are using vma->anon_vma to be exclusive, so in copy_pte_range()
1134 * We are holding two locks at this point - either of them in copy_pte_range()
1153 if (ret == -EIO) { in copy_pte_range()
1156 } else if (ret == -EBUSY) { in copy_pte_range()
1169 WARN_ON_ONCE(ret != -ENOENT); in copy_pte_range()
1172 max_nr = (end - addr) / PAGE_SIZE; in copy_pte_range()
1176 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
1180 if (unlikely(ret == -EAGAIN || ret == -EHWPOISON)) in copy_pte_range()
1184 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1203 if (ret == -EIO) { in copy_pte_range()
1206 ret = -ENOMEM; in copy_pte_range()
1210 } else if (ret == -EBUSY || unlikely(ret == -EHWPOISON)) { in copy_pte_range()
1212 } else if (ret == -EAGAIN) { in copy_pte_range()
1215 return -ENOMEM; in copy_pte_range()
1236 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1237 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1243 return -ENOMEM; in copy_pmd_range()
1250 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1253 if (err == -ENOMEM) in copy_pmd_range()
1254 return -ENOMEM; in copy_pmd_range()
1263 return -ENOMEM; in copy_pmd_range()
1273 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1274 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1280 return -ENOMEM; in copy_pud_range()
1287 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1290 if (err == -ENOMEM) in copy_pud_range()
1291 return -ENOMEM; in copy_pud_range()
1300 return -ENOMEM; in copy_pud_range()
1310 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1316 return -ENOMEM; in copy_p4d_range()
1324 return -ENOMEM; in copy_p4d_range()
1338 * Always copy pgtables when dst_vma has uffd-wp enabled even if it's in vma_needs_copy()
1339 * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable in vma_needs_copy()
1340 * contains uffd-wp protection information, that's something we can't in vma_needs_copy()
1346 if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vma_needs_copy()
1349 if (src_vma->anon_vma) in vma_needs_copy()
1366 unsigned long addr = src_vma->vm_start; in copy_page_range()
1367 unsigned long end = src_vma->vm_end; in copy_page_range()
1368 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1369 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1380 if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { in copy_page_range()
1396 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1410 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1423 ret = -ENOMEM; in copy_page_range()
1429 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1439 if (!details || details->reclaim_pt) in should_zap_cows()
1443 return details->even_cows; in should_zap_cows()
1454 /* Otherwise we should only zap non-anon folios */ in should_zap_folio()
1463 return details->zap_flags & ZAP_FLAG_DROP_MARKER; in zap_drop_markers()
1467 * This function makes sure that we'll replace the none pte with an uffd-wp
1470 * Returns true if uffd-wp ptes was installed, false otherwise.
1491 if (--nr == 0) in zap_install_uffd_wp_if_needed()
1506 struct mm_struct *mm = tlb->mm; in zap_present_folio_ptes()
1510 ptent = get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); in zap_present_folio_ptes()
1520 rss[mm_counter(folio)] -= nr; in zap_present_folio_ptes()
1522 /* We don't need up-to-date accessed/dirty bits. */ in zap_present_folio_ptes()
1523 clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); in zap_present_folio_ptes()
1524 rss[MM_ANONPAGES] -= nr; in zap_present_folio_ptes()
1546 * Zap or skip at least one present PTE, trying to batch-process subsequent
1558 struct mm_struct *mm = tlb->mm; in zap_present_ptes()
1565 /* We don't need up-to-date accessed/dirty bits. */ in zap_present_ptes()
1566 ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); in zap_present_ptes()
1620 * consider uffd-wp bit when zap. For more information, in zap_nonpresent_ptes()
1624 rss[mm_counter(folio)]--; in zap_nonpresent_ptes()
1634 rss[MM_SWAPENTS] -= nr; in zap_nonpresent_ptes()
1641 rss[mm_counter(folio)]--; in zap_nonpresent_ptes()
1665 clear_not_present_full_ptes(vma->vm_mm, addr, pte, nr, tlb->fullmm); in zap_nonpresent_ptes()
1679 int max_nr = (end - addr) / PAGE_SIZE; in do_zap_pte_range()
1689 max_nr -= nr; in do_zap_pte_range()
1713 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1811 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1818 } else if (details && details->single_folio && in zap_pmd_range()
1819 folio_test_pmd_mappable(details->single_folio) && in zap_pmd_range()
1820 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1821 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1835 pmd--; in zap_pmd_range()
1853 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1854 mmap_assert_locked(tlb->mm); in zap_pud_range()
1899 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
1915 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
1918 if (start >= vma->vm_end) in unmap_single_vma()
1920 end = min(vma->vm_end, end_addr); in unmap_single_vma()
1921 if (end <= vma->vm_start) in unmap_single_vma()
1924 if (vma->vm_file) in unmap_single_vma()
1927 if (unlikely(vma->vm_flags & VM_PFNMAP)) in unmap_single_vma()
1933 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
1934 * should be non-null for valid hugetlb area. in unmap_single_vma()
1937 * hugetlbfs ->mmap method fails, in unmap_single_vma()
1938 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
1943 if (vma->vm_file) { in unmap_single_vma()
1945 details->zap_flags : 0; in unmap_single_vma()
1955 * unmap_vmas - unmap a range of memory covered by a list of vma's
1972 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1983 /* Careful - we need to zap private pages too! */ in unmap_vmas()
1987 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, in unmap_vmas()
1997 vma = mas_find(mas, tree_end - 1); in unmap_vmas()
2003 * zap_page_range_single - remove user pages in a given range
2018 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in zap_page_range_single()
2021 tlb_gather_mmu(&tlb, vma->vm_mm); in zap_page_range_single()
2022 update_hiwater_rss(vma->vm_mm); in zap_page_range_single()
2025 * unmap 'address-end' not 'range.start-range.end' as range in zap_page_range_single()
2035 * zap_vma_ptes - remove ptes mapping the vma
2049 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
2090 VM_WARN_ON_ONCE(vma->vm_flags & VM_PFNMAP); in vm_mixed_zeropage_allowed()
2097 if (mm_forbids_zeropage(vma->vm_mm)) in vm_mixed_zeropage_allowed()
2100 if (is_cow_mapping(vma->vm_flags)) in vm_mixed_zeropage_allowed()
2103 if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) in vm_mixed_zeropage_allowed()
2106 * Why not allow any VMA that has vm_ops->pfn_mkwrite? GUP could in vm_mixed_zeropage_allowed()
2107 * find the shared zeropage and longterm-pin it, which would in vm_mixed_zeropage_allowed()
2109 * page due to vma->vm_ops->pfn_mkwrite, because what's mapped would in vm_mixed_zeropage_allowed()
2114 return vma->vm_ops && vma->vm_ops->pfn_mkwrite && in vm_mixed_zeropage_allowed()
2115 (vma_is_fsdax(vma) || vma->vm_flags & VM_IO); in vm_mixed_zeropage_allowed()
2124 return -EINVAL; in validate_page_before_insert()
2127 return -EINVAL; in validate_page_before_insert()
2132 return -EINVAL; in validate_page_before_insert()
2144 return -EBUSY; in insert_page_into_pte_locked()
2151 inc_mm_counter(vma->vm_mm, mm_counter_file(folio)); in insert_page_into_pte_locked()
2154 set_pte_at(vma->vm_mm, addr, pte, pteval); in insert_page_into_pte_locked()
2168 retval = -ENOMEM; in insert_page()
2169 pte = get_locked_pte(vma->vm_mm, addr, &ptl); in insert_page()
2198 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
2204 ret = -EFAULT; in insert_pages()
2210 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
2213 ret = -ENOMEM; in insert_pages()
2223 ret = -EFAULT; in insert_pages()
2232 remaining_pages_total -= pte_idx; in insert_pages()
2239 pages_to_write_in_pmd -= batch_size; in insert_pages()
2240 remaining_pages_total -= batch_size; in insert_pages()
2251 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
2268 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
2270 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
2271 return -EFAULT; in vm_insert_pages()
2272 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
2273 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
2274 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
2278 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
2283 * vm_insert_page - insert single page into user vma
2305 * Usually this function is called from f_op->mmap() handler
2306 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
2308 * function from other places, for example from page-fault handler.
2315 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
2316 return -EFAULT; in vm_insert_page()
2317 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
2318 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
2319 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
2322 return insert_page(vma, addr, page, vma->vm_page_prot); in vm_insert_page()
2327 * __vm_map_pages - maps range of kernel pages into user vma
2343 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
2348 return -ENXIO; in __vm_map_pages()
2351 if (count > num - offset) in __vm_map_pages()
2352 return -ENXIO; in __vm_map_pages()
2365 * vm_map_pages - maps range of kernel pages starts with non zero offset
2385 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
2390 * vm_map_pages_zero - map range of kernel pages starts with zero offset
2412 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
2464 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2471 * to override pgprot on a per-page basis.
2473 * This only makes sense for IO mappings, and it makes no sense for
2478 * pgprot typically only differs from @vma->vm_page_prot when drivers set
2479 * caching- and encryption bits different than those of @vma->vm_page_prot,
2480 * because the caching- or encryption mode may not be known at mmap() time.
2482 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2485 * functions that don't touch caching- or encryption bits, using pte_modify()
2488 * Also when new page-table entries are created, this is only done using the
2489 * fault() callback, and never using the value of vma->vm_page_prot,
2490 * except for page-table entries that point to anonymous pages as the result
2505 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2506 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2508 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2509 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2511 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2525 * vmf_insert_pfn - insert single pfn into user vma
2533 * This function should only be called from a vm_ops->fault handler, and
2547 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2557 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2571 pgprot_t pgprot = vma->vm_page_prot; in __vm_insert_mixed()
2577 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2607 if (err == -ENOMEM) in __vm_insert_mixed()
2609 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2636 * in null mappings (currently treated as "copy-on-access")
2648 return -ENOMEM; in remap_pte_range()
2653 err = -EACCES; in remap_pte_range()
2672 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2675 return -ENOMEM; in remap_pmd_range()
2695 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2698 return -ENOMEM; in remap_pud_range()
2717 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2720 return -ENOMEM; in remap_p4d_range()
2737 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range_internal()
2741 return -EINVAL; in remap_pfn_range_internal()
2756 * There's a horrible special case to handle copy-on-write in remap_pfn_range_internal()
2758 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in remap_pfn_range_internal()
2761 if (is_cow_mapping(vma->vm_flags)) { in remap_pfn_range_internal()
2762 if (addr != vma->vm_start || end != vma->vm_end) in remap_pfn_range_internal()
2763 return -EINVAL; in remap_pfn_range_internal()
2764 vma->vm_pgoff = pfn; in remap_pfn_range_internal()
2770 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range_internal()
2786 * must have pre-validated the caching bits of the pgprot_t.
2806 * remap_pfn_range - remap kernel memory to userspace
2824 return -EINVAL; in remap_pfn_range()
2834 * vm_iomap_memory - remap memory to userspace
2843 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
2844 * whatever write-combining details or similar.
2854 return -EINVAL; in vm_iomap_memory()
2856 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
2857 * but we've historically allowed it because IO memory might in vm_iomap_memory()
2864 return -EINVAL; in vm_iomap_memory()
2867 if (vma->vm_pgoff > pages) in vm_iomap_memory()
2868 return -EINVAL; in vm_iomap_memory()
2869 pfn += vma->vm_pgoff; in vm_iomap_memory()
2870 pages -= vma->vm_pgoff; in vm_iomap_memory()
2873 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
2875 return -EINVAL; in vm_iomap_memory()
2878 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
2896 return -ENOMEM; in apply_to_pte_range()
2902 return -EINVAL; in apply_to_pte_range()
2939 return -ENOMEM; in apply_to_pmd_range()
2948 return -EINVAL; in apply_to_pmd_range()
2975 return -ENOMEM; in apply_to_pud_range()
2984 return -EINVAL; in apply_to_pud_range()
3011 return -ENOMEM; in apply_to_p4d_range()
3020 return -EINVAL; in apply_to_p4d_range()
3046 return -EINVAL; in __apply_to_page_range()
3054 err = -EINVAL; in __apply_to_page_range()
3100 * read non-atomically. Before making any commitment, on those architectures
3111 spin_lock(vmf->ptl); in pte_unmap_same()
3112 same = pte_same(ptep_get(vmf->pte), vmf->orig_pte); in pte_unmap_same()
3113 spin_unlock(vmf->ptl); in pte_unmap_same()
3116 pte_unmap(vmf->pte); in pte_unmap_same()
3117 vmf->pte = NULL; in pte_unmap_same()
3124 * -EHWPOISON: copy failed due to hwpoison in source page
3125 * -EAGAIN: copied failed (some other reason)
3133 struct vm_area_struct *vma = vmf->vma; in __wp_page_copy_user()
3134 struct mm_struct *mm = vma->vm_mm; in __wp_page_copy_user()
3135 unsigned long addr = vmf->address; in __wp_page_copy_user()
3139 return -EHWPOISON; in __wp_page_copy_user()
3145 * a "struct page" for it. We do a best-effort copy by in __wp_page_copy_user()
3147 * fails, we just zero-fill it. Live with it. in __wp_page_copy_user()
3157 vmf->pte = NULL; in __wp_page_copy_user()
3158 if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { in __wp_page_copy_user()
3161 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
3162 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
3167 if (vmf->pte) in __wp_page_copy_user()
3168 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
3169 ret = -EAGAIN; in __wp_page_copy_user()
3173 entry = pte_mkyoung(vmf->orig_pte); in __wp_page_copy_user()
3174 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in __wp_page_copy_user()
3175 update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1); in __wp_page_copy_user()
3185 if (vmf->pte) in __wp_page_copy_user()
3188 /* Re-validate under PTL if the page is still mapped */ in __wp_page_copy_user()
3189 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
3190 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
3192 if (vmf->pte) in __wp_page_copy_user()
3193 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
3194 ret = -EAGAIN; in __wp_page_copy_user()
3205 * use-case in __wp_page_copy_user()
3216 if (vmf->pte) in __wp_page_copy_user()
3217 pte_unmap_unlock(vmf->pte, vmf->ptl); in __wp_page_copy_user()
3227 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
3230 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
3248 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
3250 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
3252 if (vmf->vma->vm_file && in do_page_mkwrite()
3253 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
3256 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
3258 vmf->flags = old_flags; in do_page_mkwrite()
3263 if (!folio->mapping) { in do_page_mkwrite()
3280 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
3282 struct folio *folio = page_folio(vmf->page); in fault_dirty_shared_page()
3284 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
3289 * Take a local copy of the address_space - folio.mapping may be zeroed in fault_dirty_shared_page()
3291 * pinned by vma->vm_file's reference. We rely on folio_unlock()'s in fault_dirty_shared_page()
3298 file_update_time(vma->vm_file); in fault_dirty_shared_page()
3306 * Drop the mmap_lock before waiting on IO, if we can. The file in fault_dirty_shared_page()
3329 * any related book-keeping.
3332 __releases(vmf->ptl) in wp_page_reuse()
3334 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
3337 VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); in wp_page_reuse()
3338 VM_WARN_ON(is_zero_pfn(pte_pfn(vmf->orig_pte))); in wp_page_reuse()
3342 !PageAnonExclusive(vmf->page)); in wp_page_reuse()
3348 folio_xchg_last_cpupid(folio, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
3351 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
3352 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
3354 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
3355 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_reuse()
3356 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
3362 * vm_ops that have a ->map_pages have been audited and don't need
3367 struct vm_area_struct *vma = vmf->vma; in vmf_can_call_fault()
3369 if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK)) in vmf_can_call_fault()
3376 * __vmf_anon_prepare - Prepare to handle an anonymous fault.
3382 * only protected by the per-VMA lock, the caller must retry with the
3385 * do with only the per-VMA lock held for this VMA.
3392 struct vm_area_struct *vma = vmf->vma; in __vmf_anon_prepare()
3395 if (likely(vma->anon_vma)) in __vmf_anon_prepare()
3397 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in __vmf_anon_prepare()
3398 if (!mmap_read_trylock(vma->vm_mm)) in __vmf_anon_prepare()
3403 if (vmf->flags & FAULT_FLAG_VMA_LOCK) in __vmf_anon_prepare()
3404 mmap_read_unlock(vma->vm_mm); in __vmf_anon_prepare()
3417 * - Allocate a page, copy the content of the old page to the new one.
3418 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
3419 * - Take the PTL. If the pte changed, bail out and release the allocated page
3420 * - If the pte is still the way we remember it, update the page table and all
3421 * relevant references. This includes dropping the reference the page-table
3423 * - In any case, unlock the PTL and drop the reference we took to the old page.
3427 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_page_copy()
3428 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
3429 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
3440 if (vmf->page) in wp_page_copy()
3441 old_folio = page_folio(vmf->page); in wp_page_copy()
3446 pfn_is_zero = is_zero_pfn(pte_pfn(vmf->orig_pte)); in wp_page_copy()
3447 new_folio = folio_prealloc(mm, vma, vmf->address, pfn_is_zero); in wp_page_copy()
3454 err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); in wp_page_copy()
3458 * it's fine. If not, userspace would re-fault on in wp_page_copy()
3461 * The -EHWPOISON case will not be retried. in wp_page_copy()
3468 return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; in wp_page_copy()
3470 kmsan_copy_page_meta(&new_folio->page, vmf->page); in wp_page_copy()
3476 vmf->address & PAGE_MASK, in wp_page_copy()
3477 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
3481 * Re-check the pte - we dropped the lock in wp_page_copy()
3483 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
3484 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in wp_page_copy()
3491 ksm_might_unmap_zero_page(mm, vmf->orig_pte); in wp_page_copy()
3494 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
3495 entry = mk_pte(&new_folio->page, vma->vm_page_prot); in wp_page_copy()
3498 if (pte_soft_dirty(vmf->orig_pte)) in wp_page_copy()
3500 if (pte_uffd_wp(vmf->orig_pte)) in wp_page_copy()
3513 ptep_clear_flush(vma, vmf->address, vmf->pte); in wp_page_copy()
3514 folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE); in wp_page_copy()
3517 set_pte_at(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
3518 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_copy()
3542 folio_remove_rmap_pte(old_folio, vmf->page, vma); in wp_page_copy()
3548 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3549 } else if (vmf->pte) { in wp_page_copy()
3550 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
3551 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3577 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3581 * @folio: the folio of vmf->page
3584 * shared mapping due to PTE being read-only once the mapped page is prepared.
3595 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3596 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3597 &vmf->ptl); in finish_mkwrite_fault()
3598 if (!vmf->pte) in finish_mkwrite_fault()
3604 if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) { in finish_mkwrite_fault()
3605 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3606 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3619 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3621 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3624 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3629 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3630 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3640 __releases(vmf->ptl) in wp_page_shared()
3642 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3647 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3650 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3734 * shared-page counter for the old page.
3737 * done by the caller (the low-level page fault routine in most cases).
3745 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3750 __releases(vmf->ptl) in do_wp_page()
3752 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_wp_page()
3753 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
3758 if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) { in do_wp_page()
3760 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3766 * etc.) because we're only removing the uffd-wp bit, in do_wp_page()
3769 pte = pte_clear_uffd_wp(ptep_get(vmf->pte)); in do_wp_page()
3771 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_wp_page()
3776 vmf->orig_pte = pte; in do_wp_page()
3780 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
3783 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
3784 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
3785 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
3788 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
3790 if (vmf->page) in do_wp_page()
3791 folio = page_folio(vmf->page); in do_wp_page()
3797 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in do_wp_page()
3803 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
3805 if (!vmf->page) in do_wp_page()
3818 (PageAnonExclusive(vmf->page) || wp_can_reuse_anon_folio(folio, vma))) { in do_wp_page()
3819 if (!PageAnonExclusive(vmf->page)) in do_wp_page()
3820 SetPageAnonExclusive(vmf->page); in do_wp_page()
3822 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3834 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3846 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
3858 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
3859 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
3864 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3865 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3871 * unmap_mapping_folio() - Unmap single folio from processes.
3883 struct address_space *mapping = folio->mapping; in unmap_mapping_folio()
3890 first_index = folio->index; in unmap_mapping_folio()
3891 last_index = folio_next_index(folio) - 1; in unmap_mapping_folio()
3898 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_folio()
3899 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_folio()
3905 * unmap_mapping_pages() - Unmap pages from processes.
3921 pgoff_t last_index = start + nr - 1; in unmap_mapping_pages()
3928 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
3929 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_pages()
3936 * unmap_mapping_range - unmap the portion of all mmaps in the specified
3956 pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3961 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3963 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
3975 struct folio *folio = page_folio(vmf->page); in remove_device_exclusive_entry()
3976 struct vm_area_struct *vma = vmf->vma; in remove_device_exclusive_entry()
3982 * the PTL so a racing thread can remove the device-exclusive in remove_device_exclusive_entry()
3985 * been re-allocated after being freed all we do is lock and in remove_device_exclusive_entry()
3997 vma->vm_mm, vmf->address & PAGE_MASK, in remove_device_exclusive_entry()
3998 (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); in remove_device_exclusive_entry()
4001 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in remove_device_exclusive_entry()
4002 &vmf->ptl); in remove_device_exclusive_entry()
4003 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in remove_device_exclusive_entry()
4004 restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte); in remove_device_exclusive_entry()
4006 if (vmf->pte) in remove_device_exclusive_entry()
4007 pte_unmap_unlock(vmf->pte, vmf->ptl); in remove_device_exclusive_entry()
4021 if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) || in should_try_to_free_swap()
4036 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in pte_marker_clear()
4037 vmf->address, &vmf->ptl); in pte_marker_clear()
4038 if (!vmf->pte) in pte_marker_clear()
4041 * Be careful so that we will only recover a special uffd-wp pte into a in pte_marker_clear()
4048 if (pte_same(vmf->orig_pte, ptep_get(vmf->pte))) in pte_marker_clear()
4049 pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); in pte_marker_clear()
4050 pte_unmap_unlock(vmf->pte, vmf->ptl); in pte_marker_clear()
4056 if (vma_is_anonymous(vmf->vma)) in do_pte_missing()
4063 * This is actually a page-missing access, but with uffd-wp special pte
4064 * installed. It means this pte was wr-protected before being unmapped.
4070 * got unregistered - we can simply clear them. in pte_marker_handle_uffd_wp()
4072 if (unlikely(!userfaultfd_wp(vmf->vma))) in pte_marker_handle_uffd_wp()
4080 swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte); in handle_pte_marker()
4090 /* Higher priority than uffd-wp when data corrupted */ in handle_pte_marker()
4107 struct vm_area_struct *vma = vmf->vma; in __alloc_swap_folio()
4111 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address); in __alloc_swap_folio()
4115 entry = pte_to_swp_entry(vmf->orig_pte); in __alloc_swap_folio()
4116 if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, in __alloc_swap_folio()
4139 if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) in non_swapcache_batch()
4157 addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); in can_swapin_thp()
4158 idx = (vmf->address - addr) / PAGE_SIZE; in can_swapin_thp()
4161 if (!pte_same(pte, pte_move_swp_offset(vmf->orig_pte, -idx))) in can_swapin_thp()
4169 * from different backends. And they are likely corner cases. Similar in can_swapin_thp()
4205 struct vm_area_struct *vma = vmf->vma; in alloc_swap_folio()
4216 * If uffd is active for the vma we need per-page fault fidelity to in alloc_swap_folio()
4224 * lack handling for such cases, so fallback to swapping in order-0 in alloc_swap_folio()
4230 entry = pte_to_swp_entry(vmf->orig_pte); in alloc_swap_folio()
4235 orders = thp_vma_allowable_orders(vma, vma->vm_flags, in alloc_swap_folio()
4236 TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); in alloc_swap_folio()
4237 orders = thp_vma_suitable_orders(vma, vmf->address, orders); in alloc_swap_folio()
4239 vmf->address, orders); in alloc_swap_folio()
4244 pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in alloc_swap_folio()
4245 vmf->address & PMD_MASK, &ptl); in alloc_swap_folio()
4255 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_swap_folio()
4266 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_swap_folio()
4269 if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, in alloc_swap_folio()
4292 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4301 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
4321 entry = pte_to_swp_entry(vmf->orig_pte); in do_swap_page()
4324 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
4325 vmf->address); in do_swap_page()
4327 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
4330 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_swap_page()
4340 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
4341 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
4342 vmf->address, &vmf->ptl); in do_swap_page()
4343 if (unlikely(!vmf->pte || in do_swap_page()
4344 !pte_same(ptep_get(vmf->pte), in do_swap_page()
4345 vmf->orig_pte))) in do_swap_page()
4352 get_page(vmf->page); in do_swap_page()
4353 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4354 ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
4355 put_page(vmf->page); in do_swap_page()
4361 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
4372 folio = swap_cache_get_folio(entry, vma, vmf->address); in do_swap_page()
4378 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
4418 folio->swap = entry; in do_swap_page()
4420 folio->private = NULL; in do_swap_page()
4433 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
4434 vmf->address, &vmf->ptl); in do_swap_page()
4435 if (likely(vmf->pte && in do_swap_page()
4436 pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
4444 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
4473 * page->index of !PageKSM() pages would be nonlinear inside the in do_swap_page()
4474 * anon VMA -- PageKSM() is lost on actual swapout. in do_swap_page()
4476 folio = ksm_might_need_to_copy(folio, vma, vmf->address); in do_swap_page()
4481 } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { in do_swap_page()
4495 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && in do_swap_page()
4505 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
4506 &vmf->ptl); in do_swap_page()
4507 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
4518 unsigned long folio_start = ALIGN_DOWN(vmf->address, nr * PAGE_SIZE); in do_swap_page()
4519 unsigned long idx = (vmf->address - folio_start) / PAGE_SIZE; in do_swap_page()
4520 pte_t *folio_ptep = vmf->pte - idx; in do_swap_page()
4523 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || in do_swap_page()
4535 address = vmf->address; in do_swap_page()
4536 ptep = vmf->pte; in do_swap_page()
4540 unsigned long folio_start = address - idx * PAGE_SIZE; in do_swap_page()
4545 if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start))) in do_swap_page()
4547 if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end))) in do_swap_page()
4550 folio_ptep = vmf->pte - idx; in do_swap_page()
4552 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || in do_swap_page()
4560 entry = folio->swap; in do_swap_page()
4561 page = &folio->page; in do_swap_page()
4581 exclusive = pte_swp_exclusive(vmf->orig_pte); in do_swap_page()
4585 * swapcache -> certainly exclusive. in do_swap_page()
4589 data_race(si->flags & SWP_STABLE_WRITES)) { in do_swap_page()
4591 * This is tricky: not all swap backends support in do_swap_page()
4599 * For these problematic swap backends, simply drop the in do_swap_page()
4625 if (should_try_to_free_swap(folio, vma, vmf->flags)) in do_swap_page()
4628 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); in do_swap_page()
4629 add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages); in do_swap_page()
4630 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
4631 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
4633 if (pte_swp_uffd_wp(vmf->orig_pte)) in do_swap_page()
4644 if ((vma->vm_flags & VM_WRITE) && !userfaultfd_pte_wp(vma, pte) && in do_swap_page()
4647 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4649 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
4654 folio_ref_add(folio, nr_pages - 1); in do_swap_page()
4656 vmf->orig_pte = pte_advance_pfn(pte, page_idx); in do_swap_page()
4679 set_ptes(vma->vm_mm, address, ptep, pte, nr_pages); in do_swap_page()
4680 arch_do_swap_page_nr(vma->vm_mm, vma, address, in do_swap_page()
4697 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4704 /* No need to invalidate - it was non-present before */ in do_swap_page()
4707 if (vmf->pte) in do_swap_page()
4708 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4720 if (vmf->pte) in do_swap_page()
4721 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4754 struct vm_area_struct *vma = vmf->vma; in alloc_anon_folio()
4764 * If uffd is active for the vma we need per-page fault fidelity to in alloc_anon_folio()
4775 orders = thp_vma_allowable_orders(vma, vma->vm_flags, in alloc_anon_folio()
4776 TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); in alloc_anon_folio()
4777 orders = thp_vma_suitable_orders(vma, vmf->address, orders); in alloc_anon_folio()
4782 pte = pte_offset_map(vmf->pmd, vmf->address & PMD_MASK); in alloc_anon_folio()
4784 return ERR_PTR(-EAGAIN); in alloc_anon_folio()
4793 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_anon_folio()
4807 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_anon_folio()
4810 if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) { in alloc_anon_folio()
4824 folio_zero_user(folio, vmf->address); in alloc_anon_folio()
4834 return folio_prealloc(vma->vm_mm, vma, vmf->address, true); in alloc_anon_folio()
4838 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4844 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
4845 unsigned long addr = vmf->address; in do_anonymous_page()
4851 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
4852 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
4859 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
4862 /* Use the zero-page for reads */ in do_anonymous_page()
4863 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
4864 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
4865 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
4866 vma->vm_page_prot)); in do_anonymous_page()
4867 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
4868 vmf->address, &vmf->ptl); in do_anonymous_page()
4869 if (!vmf->pte) in do_anonymous_page()
4872 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
4875 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4880 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4890 /* Returns NULL on OOM or ERR_PTR(-EAGAIN) if we must retry the fault */ in do_anonymous_page()
4898 addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); in do_anonymous_page()
4907 entry = mk_pte(&folio->page, vma->vm_page_prot); in do_anonymous_page()
4909 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
4912 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); in do_anonymous_page()
4913 if (!vmf->pte) in do_anonymous_page()
4916 update_mmu_tlb(vma, addr, vmf->pte); in do_anonymous_page()
4918 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { in do_anonymous_page()
4919 update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); in do_anonymous_page()
4923 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4929 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4934 folio_ref_add(folio, nr_pages - 1); in do_anonymous_page()
4935 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); in do_anonymous_page()
4942 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages); in do_anonymous_page()
4944 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
4945 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages); in do_anonymous_page()
4947 if (vmf->pte) in do_anonymous_page()
4948 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4959 * released depending on flags and vma->vm_ops->fault() return value.
4964 struct vm_area_struct *vma = vmf->vma; in __do_fault()
4983 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
4984 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
4985 if (!vmf->prealloc_pte) in __do_fault()
4989 ret = vma->vm_ops->fault(vmf); in __do_fault()
4994 folio = page_folio(vmf->page); in __do_fault()
4995 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
4998 if (page_mapped(vmf->page)) in __do_fault()
5001 if (mapping_evict_folio(folio->mapping, folio)) in __do_fault()
5006 vmf->page = NULL; in __do_fault()
5013 VM_BUG_ON_PAGE(!folio_test_locked(folio), vmf->page); in __do_fault()
5021 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
5023 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
5028 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
5029 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
5035 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
5036 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
5037 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
5044 * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any in do_set_pmd()
5047 if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) in do_set_pmd()
5055 page = &folio->page; in do_set_pmd()
5070 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
5071 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
5072 if (!vmf->prealloc_pte) in do_set_pmd()
5076 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
5077 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
5082 entry = mk_huge_pmd(page, vma->vm_page_prot); in do_set_pmd()
5086 add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR); in do_set_pmd()
5095 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
5097 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
5103 spin_unlock(vmf->ptl); in do_set_pmd()
5114 * set_pte_range - Set a range of PTEs to point to pages in a folio.
5124 struct vm_area_struct *vma = vmf->vma; in set_pte_range()
5125 bool write = vmf->flags & FAULT_FLAG_WRITE; in set_pte_range()
5126 bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); in set_pte_range()
5130 entry = mk_pte(page, vma->vm_page_prot); in set_pte_range()
5141 /* copy-on-write page */ in set_pte_range()
5142 if (write && !(vma->vm_flags & VM_SHARED)) { in set_pte_range()
5149 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr); in set_pte_range()
5151 /* no need to invalidate: a not-present page won't be cached */ in set_pte_range()
5152 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr); in set_pte_range()
5157 if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID) in vmf_pte_changed()
5158 return !pte_same(ptep_get(vmf->pte), vmf->orig_pte); in vmf_pte_changed()
5160 return !pte_none(ptep_get(vmf->pte)); in vmf_pte_changed()
5164 * finish_fault - finish page fault once we have prepared the page to fault
5180 struct vm_area_struct *vma = vmf->vma; in finish_fault()
5184 bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && in finish_fault()
5185 !(vma->vm_flags & VM_SHARED); in finish_fault()
5191 addr = vmf->address; in finish_fault()
5195 page = vmf->cow_page; in finish_fault()
5197 page = vmf->page; in finish_fault()
5203 if (!(vma->vm_flags & VM_SHARED)) { in finish_fault()
5204 ret = check_stable_address_space(vma->vm_mm); in finish_fault()
5209 if (pmd_none(*vmf->pmd)) { in finish_fault()
5216 if (vmf->prealloc_pte) in finish_fault()
5217 pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); in finish_fault()
5218 else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) in finish_fault()
5226 * Using per-page fault to maintain the uffd semantics, and same in finish_fault()
5227 * approach also applies to non-anonymous-shmem faults to avoid in finish_fault()
5235 /* The page offset of vmf->address within the VMA. */ in finish_fault()
5236 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in finish_fault()
5238 pgoff_t pte_off = pte_index(vmf->address); in finish_fault()
5241 * Fallback to per-page fault in case the folio size in page in finish_fault()
5245 vma_off + (nr_pages - idx) > vma_pages(vma) || in finish_fault()
5247 pte_off + (nr_pages - idx) > PTRS_PER_PTE)) { in finish_fault()
5251 addr = vmf->address - idx * PAGE_SIZE; in finish_fault()
5252 page = &folio->page; in finish_fault()
5256 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in finish_fault()
5257 addr, &vmf->ptl); in finish_fault()
5258 if (!vmf->pte) in finish_fault()
5261 /* Re-check under ptl */ in finish_fault()
5263 update_mmu_tlb(vma, addr, vmf->pte); in finish_fault()
5266 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { in finish_fault()
5268 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
5272 folio_ref_add(folio, nr_pages - 1); in finish_fault()
5275 add_mm_counter(vma->vm_mm, type, nr_pages); in finish_fault()
5279 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
5300 return -EINVAL; in fault_around_bytes_set()
5303 * The minimum value is 1 page, however this results in no fault-around in fault_around_bytes_set()
5328 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
5329 * not ready to be mapped: not up-to-date, locked, etc.
5346 pgoff_t pte_off = pte_index(vmf->address); in do_fault_around()
5347 /* The page offset of vmf->address within the VMA. */ in do_fault_around()
5348 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in do_fault_around()
5354 pte_off - min(pte_off, vma_off)); in do_fault_around()
5358 pte_off + vma_pages(vmf->vma) - vma_off) - 1; in do_fault_around()
5360 if (pmd_none(*vmf->pmd)) { in do_fault_around()
5361 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
5362 if (!vmf->prealloc_pte) in do_fault_around()
5367 ret = vmf->vma->vm_ops->map_pages(vmf, in do_fault_around()
5368 vmf->pgoff + from_pte - pte_off, in do_fault_around()
5369 vmf->pgoff + to_pte - pte_off); in do_fault_around()
5375 /* Return true if we should do read fault-around, false otherwise */
5378 /* No ->map_pages? No way to fault around... */ in should_fault_around()
5379 if (!vmf->vma->vm_ops->map_pages) in should_fault_around()
5382 if (uffd_disable_fault_around(vmf->vma)) in should_fault_around()
5395 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
5414 folio = page_folio(vmf->page); in do_read_fault()
5423 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
5433 folio = folio_prealloc(vma->vm_mm, vma, vmf->address, false); in do_cow_fault()
5437 vmf->cow_page = &folio->page; in do_cow_fault()
5445 if (copy_mc_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma)) { in do_cow_fault()
5453 unlock_page(vmf->page); in do_cow_fault()
5454 put_page(vmf->page); in do_cow_fault()
5465 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
5477 folio = page_folio(vmf->page); in do_shared_fault()
5483 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
5506 * We enter with non-exclusive mmap_lock (to exclude vma changes,
5515 struct vm_area_struct *vma = vmf->vma; in do_fault()
5516 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
5522 if (!vma->vm_ops->fault) { in do_fault()
5523 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in do_fault()
5524 vmf->address, &vmf->ptl); in do_fault()
5525 if (unlikely(!vmf->pte)) in do_fault()
5535 if (unlikely(pte_none(ptep_get(vmf->pte)))) in do_fault()
5540 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
5542 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
5544 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
5550 if (vmf->prealloc_pte) { in do_fault()
5551 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
5552 vmf->prealloc_pte = NULL; in do_fault()
5561 struct vm_area_struct *vma = vmf->vma; in numa_migrate_check()
5578 if (folio_likely_mapped_shared(folio) && (vma->vm_flags & VM_SHARED)) in numa_migrate_check()
5585 *last_cpupid = (-1 & LAST_CPUPID_MASK); in numa_migrate_check()
5611 pte = pte_modify(old_pte, vma->vm_page_prot); in numa_rebuild_single_mapping()
5623 int nr = pte_pfn(fault_pte) - folio_pfn(folio); in numa_rebuild_large_mapping()
5624 unsigned long start, end, addr = vmf->address; in numa_rebuild_large_mapping()
5625 unsigned long addr_start = addr - (nr << PAGE_SHIFT); in numa_rebuild_large_mapping()
5630 start = max3(addr_start, pt_start, vma->vm_start); in numa_rebuild_large_mapping()
5632 vma->vm_end); in numa_rebuild_large_mapping()
5633 start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT); in numa_rebuild_large_mapping()
5647 ptent = pte_modify(ptent, vma->vm_page_prot); in numa_rebuild_large_mapping()
5660 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
5674 spin_lock(vmf->ptl); in do_numa_page()
5676 old_pte = ptep_get(vmf->pte); in do_numa_page()
5678 if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { in do_numa_page()
5679 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5683 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
5691 can_change_pte_writable(vma, vmf->address, pte)) in do_numa_page()
5694 folio = vm_normal_folio(vma, vmf->address, pte); in do_numa_page()
5701 target_nid = numa_migrate_check(folio, vmf, vmf->address, &flags, in do_numa_page()
5710 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5723 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_numa_page()
5724 vmf->address, &vmf->ptl); in do_numa_page()
5725 if (unlikely(!vmf->pte)) in do_numa_page()
5727 if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in do_numa_page()
5728 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5734 * non-accessible ptes, some can allow access by kernel mode. in do_numa_page()
5740 numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, in do_numa_page()
5742 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5751 struct vm_area_struct *vma = vmf->vma; in create_huge_pmd()
5754 if (vma->vm_ops->huge_fault) in create_huge_pmd()
5755 return vma->vm_ops->huge_fault(vmf, PMD_ORDER); in create_huge_pmd()
5762 struct vm_area_struct *vma = vmf->vma; in wp_huge_pmd()
5763 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_huge_pmd()
5768 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) { in wp_huge_pmd()
5769 if (userfaultfd_wp_async(vmf->vma)) in wp_huge_pmd()
5776 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pmd()
5777 if (vma->vm_ops->huge_fault) { in wp_huge_pmd()
5778 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); in wp_huge_pmd()
5785 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
5786 __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL); in wp_huge_pmd()
5795 struct vm_area_struct *vma = vmf->vma; in create_huge_pud()
5799 if (vma->vm_ops->huge_fault) in create_huge_pud()
5800 return vma->vm_ops->huge_fault(vmf, PUD_ORDER); in create_huge_pud()
5809 struct vm_area_struct *vma = vmf->vma; in wp_huge_pud()
5815 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pud()
5816 if (vma->vm_ops->huge_fault) { in wp_huge_pud()
5817 ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); in wp_huge_pud()
5823 /* COW or write-notify not handled on PUD level: split pud.*/ in wp_huge_pud()
5824 __split_huge_pud(vma, vmf->pud, vmf->address); in wp_huge_pud()
5838 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
5848 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
5850 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
5855 vmf->pte = NULL; in handle_pte_fault()
5856 vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
5866 * Use the maywrite version to indicate that vmf->pte may be in handle_pte_fault()
5873 vmf->pte = pte_offset_map_rw_nolock(vmf->vma->vm_mm, vmf->pmd, in handle_pte_fault()
5874 vmf->address, &dummy_pmdval, in handle_pte_fault()
5875 &vmf->ptl); in handle_pte_fault()
5876 if (unlikely(!vmf->pte)) in handle_pte_fault()
5878 vmf->orig_pte = ptep_get_lockless(vmf->pte); in handle_pte_fault()
5879 vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
5881 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
5882 pte_unmap(vmf->pte); in handle_pte_fault()
5883 vmf->pte = NULL; in handle_pte_fault()
5887 if (!vmf->pte) in handle_pte_fault()
5890 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
5893 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
5896 spin_lock(vmf->ptl); in handle_pte_fault()
5897 entry = vmf->orig_pte; in handle_pte_fault()
5898 if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) { in handle_pte_fault()
5899 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
5902 if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { in handle_pte_fault()
5905 else if (likely(vmf->flags & FAULT_FLAG_WRITE)) in handle_pte_fault()
5909 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
5910 vmf->flags & FAULT_FLAG_WRITE)) { in handle_pte_fault()
5911 update_mmu_cache_range(vmf, vmf->vma, vmf->address, in handle_pte_fault()
5912 vmf->pte, 1); in handle_pte_fault()
5915 if (vmf->flags & FAULT_FLAG_TRIED) in handle_pte_fault()
5923 if (vmf->flags & FAULT_FLAG_WRITE) in handle_pte_fault()
5924 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, in handle_pte_fault()
5925 vmf->pte); in handle_pte_fault()
5928 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
5949 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
5950 unsigned long vm_flags = vma->vm_flags; in __handle_mm_fault()
6035 * mm_account_fault - Do page fault accounting
6038 * of perf event counters, but we'll still do the per-task accounting to
6047 * still be in per-arch page fault handlers at the entry of page fault.
6084 current->maj_flt++; in mm_account_fault()
6086 current->min_flt++; in mm_account_fault()
6106 current->in_lru_fault = vma_has_recency(vma); in lru_gen_enter_fault()
6111 current->in_lru_fault = false; in lru_gen_exit_fault()
6131 * just treat it like an ordinary read-fault otherwise. in sanitize_fault_flags()
6133 if (!is_cow_mapping(vma->vm_flags)) in sanitize_fault_flags()
6136 /* Write faults on read-only mappings are impossible ... */ in sanitize_fault_flags()
6137 if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE))) in sanitize_fault_flags()
6140 if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) && in sanitize_fault_flags()
6141 !is_cow_mapping(vma->vm_flags))) in sanitize_fault_flags()
6146 * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of in sanitize_fault_flags()
6169 struct mm_struct *mm = vma->vm_mm; in handle_mm_fault()
6186 is_droppable = !!(vma->vm_flags & VM_DROPPABLE); in handle_mm_fault()
6198 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
6203 * Warning: It is no longer safe to dereference vma-> after this point, in handle_mm_fault()
6256 * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but in mmap_upgrade_trylock()
6301 if (likely(vma && (vma->vm_start <= addr))) in lock_mm_and_find_vma()
6308 if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { in lock_mm_and_find_vma()
6319 * re-take it, and also look up the vma again, in lock_mm_and_find_vma()
6320 * re-checking it. in lock_mm_and_find_vma()
6329 if (vma->vm_start <= addr) in lock_mm_and_find_vma()
6331 if (!(vma->vm_flags & VM_GROWSDOWN)) in lock_mm_and_find_vma()
6357 MA_STATE(mas, &mm->mm_mt, address, address); in lock_vma_under_rcu()
6370 if (vma->detached) { in lock_vma_under_rcu()
6384 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) in lock_vma_under_rcu()
6402 * We've already handled the fast-path in-line.
6408 return -ENOMEM; in __p4d_alloc()
6410 spin_lock(&mm->page_table_lock); in __p4d_alloc()
6417 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
6425 * We've already handled the fast-path in-line.
6431 return -ENOMEM; in __pud_alloc()
6433 spin_lock(&mm->page_table_lock); in __pud_alloc()
6440 spin_unlock(&mm->page_table_lock); in __pud_alloc()
6448 * We've already handled the fast-path in-line.
6455 return -ENOMEM; in __pmd_alloc()
6476 args->lock = lock; in pfnmap_args_setup()
6477 args->ptep = ptep; in pfnmap_args_setup()
6478 args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); in pfnmap_args_setup()
6479 args->pgprot = pgprot; in pfnmap_args_setup()
6480 args->writable = writable; in pfnmap_args_setup()
6481 args->special = special; in pfnmap_args_setup()
6487 struct file *file = vma->vm_file; in pfnmap_lockdep_assert()
6488 struct address_space *mapping = file ? file->f_mapping : NULL; in pfnmap_lockdep_assert()
6491 lockdep_assert(lockdep_is_held(&mapping->i_mmap_rwsem) || in pfnmap_lockdep_assert()
6492 lockdep_is_held(&vma->vm_mm->mmap_lock)); in pfnmap_lockdep_assert()
6494 lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock)); in pfnmap_lockdep_assert()
6499 * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
6502 * The caller needs to setup args->vma and args->address to point to the
6519 * a later point in time can trigger use-after-free.
6521 * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
6531 struct vm_area_struct *vma = args->vma; in follow_pfnmap_start()
6532 unsigned long address = args->address; in follow_pfnmap_start()
6533 struct mm_struct *mm = vma->vm_mm; in follow_pfnmap_start()
6543 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) in follow_pfnmap_start()
6546 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfnmap_start()
6601 return -EINVAL; in follow_pfnmap_start()
6614 if (args->lock) in follow_pfnmap_end()
6615 spin_unlock(args->lock); in follow_pfnmap_end()
6616 if (args->ptep) in follow_pfnmap_end()
6617 pte_unmap(args->ptep); in follow_pfnmap_end()
6623 * generic_access_phys - generic implementation for iomem mmap access
6641 int ret = -EINVAL; in generic_access_phys()
6647 return -EINVAL; in generic_access_phys()
6654 return -EINVAL; in generic_access_phys()
6658 return -ENOMEM; in generic_access_phys()
6720 return buf - old_buf; in __access_remote_vm()
6732 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
6733 bytes = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
6740 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
6741 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
6742 bytes = PAGE_SIZE-offset; in __access_remote_vm()
6755 len -= bytes; in __access_remote_vm()
6761 return buf - old_buf; in __access_remote_vm()
6765 * access_remote_vm - access another process' address space
6810 struct mm_struct *mm = current->mm; in print_vma_addr()
6820 if (vma && vma->vm_file) { in print_vma_addr()
6821 struct file *f = vma->vm_file; in print_vma_addr()
6822 ip -= vma->vm_start; in print_vma_addr()
6823 ip += vma->vm_pgoff << PAGE_SHIFT; in print_vma_addr()
6825 vma->vm_start, in print_vma_addr()
6826 vma->vm_end - vma->vm_start); in print_vma_addr()
6838 if (current->mm) in __might_fault()
6839 might_lock_read(¤t->mm->mmap_lock); in __might_fault()
6858 ~(((unsigned long)nr_pages << PAGE_SHIFT) - 1); in process_huge_page()
6862 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
6868 for (i = nr_pages - 1; i >= 2 * n; i--) { in process_huge_page()
6876 base = nr_pages - 2 * (nr_pages - n); in process_huge_page()
6877 l = nr_pages - n; in process_huge_page()
6887 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
6892 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
6928 * folio_zero_user - Zero a folio which will be mapped to userspace.
6959 return -EHWPOISON; in copy_user_gigantic_page()
6973 struct page *dst = folio_page(copy_arg->dst, idx); in copy_subpage()
6974 struct page *src = folio_page(copy_arg->src, idx); in copy_subpage()
6976 if (copy_mc_user_highpage(dst, src, addr, copy_arg->vma)) in copy_subpage()
6977 return -EHWPOISON; in copy_subpage()
7017 ret_val -= (PAGE_SIZE - rc); in copy_folio_from_user()
7035 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
7046 ptdesc->ptl = ptl; in ptlock_alloc()
7052 if (ptdesc->ptl) in ptlock_free()
7053 kmem_cache_free(page_ptl_cachep, ptdesc->ptl); in ptlock_free()