Lines Matching +full:entry +full:- +full:address
1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
69 #include <linux/memory-tiers.h>
89 #include "pgalloc-track.h"
94 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
102 * Return true if the original pte was a uffd-wp pte marker (so the pte was
103 * wr-protected).
107 if (!userfaultfd_wp(vmf->vma)) in vmf_orig_pte_uffd_wp()
109 if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) in vmf_orig_pte_uffd_wp()
112 return pte_is_uffd_wp_marker(vmf->orig_pte); in vmf_orig_pte_uffd_wp()
116 * Randomize the address space (stacks, mmaps, brk, etc.).
195 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
223 if (end - 1 > ceiling - 1) in free_pmd_range()
229 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
257 if (end - 1 > ceiling - 1) in free_pud_range()
263 mm_dec_nr_puds(tlb->mm); in free_pud_range()
291 if (end - 1 > ceiling - 1) in free_p4d_range()
300 * free_pgd_range - Unmap and free page tables in the range
302 * @addr: virtual address start
303 * @end: virtual address end
304 * @floor: lowest address boundary
305 * @ceiling: highest address boundary
307 * This function tears down all user-level page tables in the
308 * specified virtual address range [@addr..@end). It is part of
325 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
326 * of the address space and the top of it (using -1 for the in free_pgd_range()
329 * the address space, but end 0 and ceiling 0 refer to the top in free_pgd_range()
330 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
341 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
355 if (end - 1 > ceiling - 1) in free_pgd_range()
356 end -= PMD_SIZE; in free_pgd_range()
357 if (addr > end - 1) in free_pgd_range()
364 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
382 unsigned long addr = vma->vm_start; in free_pgtables()
389 next = mas_find(mas, ceiling - 1); in free_pgtables()
407 while (next && next->vm_start <= vma->vm_end + PMD_SIZE) { in free_pgtables()
409 next = mas_find(mas, ceiling - 1); in free_pgtables()
419 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
420 floor, next ? next->vm_start : ceiling); in free_pgtables()
439 * of a chain of data-dependent loads, meaning most CPUs (alpha in pmd_install()
441 * seen in-order. See the alpha page table accessors for the in pmd_install()
455 return -ENOMEM; in __pte_alloc()
467 return -ENOMEM; in __pte_alloc_kernel()
575 * This function is called to print an error when a bad page table entry (e.g.,
576 * corrupted page table entry) is found. For example, we might have a
577 * PFN-mapped pte in a region that doesn't allow it.
582 * re-walk the page table to dump information: the caller MUST prevent page
587 unsigned long addr, unsigned long long entry, struct page *page, in print_bad_page_map() argument
596 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_page_map()
599 pr_alert("BUG: Bad page map in process %s %s:%08llx", current->comm, in print_bad_page_map()
600 pgtable_level_to_str(level), entry); in print_bad_page_map()
601 __print_bad_page_map_pgtable(vma->vm_mm, addr); in print_bad_page_map()
605 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_page_map()
607 vma->vm_file, in print_bad_page_map()
608 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_page_map()
609 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_page_map()
610 vma->vm_file ? vma->vm_file->f_op->mmap_prepare : NULL, in print_bad_page_map()
611 mapping ? mapping->a_ops->read_folio : NULL); in print_bad_page_map()
619 * __vm_normal_page() - Get the "struct page" associated with a page table entry.
620 * @vma: The VMA mapping the page table entry.
621 * @addr: The address where the page table entry is mapped.
622 * @pfn: The PFN stored in the page table entry.
623 * @special: Whether the page table entry is marked "special".
625 * @entry: The page table entry value for error reporting purposes only.
639 * page table entry bit, such as pte_special(), in which case this function is
641 * entry bit, which requires a more complicated scheme, described below.
645 * cannot be looked up through the PFN stored in the page table entry, but
646 * instead will be looked up through vm_ops->find_normal_page(). So far, this
658 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
662 * This restricts such mappings to be a linear translation from virtual address
685 unsigned long long entry, enum pgtable_level level) in __vm_normal_page() argument
690 if (vma->vm_ops && vma->vm_ops->find_normal_page) in __vm_normal_page()
691 return vma->vm_ops->find_normal_page(vma, addr); in __vm_normal_page()
693 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in __vm_normal_page()
698 print_bad_page_map(vma, addr, entry, NULL, level); in __vm_normal_page()
706 if (unlikely(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) { in __vm_normal_page()
707 if (vma->vm_flags & VM_MIXEDMAP) { in __vm_normal_page()
712 unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; in __vm_normal_page()
715 if (pfn == vma->vm_pgoff + off) in __vm_normal_page()
717 if (!is_cow_mapping(vma->vm_flags)) in __vm_normal_page()
727 /* Corrupted page table entry. */ in __vm_normal_page()
728 print_bad_page_map(vma, addr, entry, NULL, level); in __vm_normal_page()
740 * vm_normal_page() - Get the "struct page" associated with a PTE
742 * @addr: The address where the @pte is mapped.
759 * vm_normal_folio() - Get the "struct folio" associated with a PTE
761 * @addr: The address where the @pte is mapped.
782 * vm_normal_page_pmd() - Get the "struct page" associated with a PMD
784 * @addr: The address where the @pmd is mapped.
801 * vm_normal_folio_pmd() - Get the "struct folio" associated with a PMD
803 * @addr: The address where the @pmd is mapped.
823 * vm_normal_page_pud() - Get the "struct page" associated with a PUD
825 * @addr: The address where the @pud is mapped.
843 * restore_exclusive_pte - Restore a device-exclusive entry
844 * @vma: VMA covering @address
847 * @address: the virtual address
851 * Restore a device-exclusive non-swap entry to an ordinary present pte.
857 * a device-exclusive entry can map it into the device to make forward
869 struct folio *folio, struct page *page, unsigned long address, in restore_exclusive_pte() argument
876 pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); in restore_exclusive_pte()
883 if ((vma->vm_flags & VM_WRITE) && in restore_exclusive_pte()
884 can_change_pte_writable(vma, address, pte)) { in restore_exclusive_pte()
889 set_pte_at(vma->vm_mm, address, ptep, pte); in restore_exclusive_pte()
892 * No need to invalidate - it was non-present before. However in restore_exclusive_pte()
895 update_mmu_cache(vma, address, ptep); in restore_exclusive_pte()
905 const softleaf_t entry = softleaf_from_pte(orig_pte); in try_restore_exclusive_pte() local
906 struct page *page = softleaf_to_page(entry); in try_restore_exclusive_pte()
915 return -EBUSY; in try_restore_exclusive_pte()
929 vm_flags_t vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
931 softleaf_t entry = softleaf_from_pte(orig_pte); in copy_nonpresent_pte() local
936 if (likely(softleaf_is_swap(entry))) { in copy_nonpresent_pte()
937 if (swap_duplicate(entry) < 0) in copy_nonpresent_pte()
938 return -EIO; in copy_nonpresent_pte()
941 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
943 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
944 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
945 &src_mm->mmlist); in copy_nonpresent_pte()
948 /* Mark the swap entry as shared. */ in copy_nonpresent_pte()
954 } else if (softleaf_is_migration(entry)) { in copy_nonpresent_pte()
955 folio = softleaf_to_folio(entry); in copy_nonpresent_pte()
959 if (!softleaf_is_migration_read(entry) && in copy_nonpresent_pte()
963 * to be set to read. A previously exclusive entry is in copy_nonpresent_pte()
966 entry = make_readable_migration_entry( in copy_nonpresent_pte()
967 swp_offset(entry)); in copy_nonpresent_pte()
968 pte = softleaf_to_pte(entry); in copy_nonpresent_pte()
975 } else if (softleaf_is_device_private(entry)) { in copy_nonpresent_pte()
976 page = softleaf_to_page(entry); in copy_nonpresent_pte()
994 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
1000 if (softleaf_is_device_private_write(entry) && in copy_nonpresent_pte()
1002 entry = make_readable_device_private_entry( in copy_nonpresent_pte()
1003 swp_offset(entry)); in copy_nonpresent_pte()
1004 pte = swp_entry_to_pte(entry); in copy_nonpresent_pte()
1009 } else if (softleaf_is_device_exclusive(entry)) { in copy_nonpresent_pte()
1012 * original entry then copying as for a present pte. Device in copy_nonpresent_pte()
1016 VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); in copy_nonpresent_pte()
1018 return -EBUSY; in copy_nonpresent_pte()
1019 return -ENOENT; in copy_nonpresent_pte()
1020 } else if (softleaf_is_marker(entry)) { in copy_nonpresent_pte()
1021 pte_marker marker = copy_pte_marker(entry, dst_vma); in copy_nonpresent_pte()
1039 * and re-use the pte the traditional way.
1041 * And if we need a pre-allocated page but don't yet have
1056 return -EAGAIN; in copy_present_page()
1063 if (copy_mc_user_highpage(&new_folio->page, page, addr, src_vma)) in copy_present_page()
1064 return -EHWPOISON; in copy_present_page()
1073 pte = folio_mk_pte(new_folio, dst_vma->vm_page_prot); in copy_present_page()
1076 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
1078 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
1086 struct mm_struct *src_mm = src_vma->vm_mm; in __copy_present_ptes()
1089 if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) { in __copy_present_ptes()
1095 if (src_vma->vm_flags & VM_SHARED) in __copy_present_ptes()
1102 set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr); in __copy_present_ptes()
1106 * Copy one present PTE, trying to batch-process subsequent PTEs that map
1109 * Returns -EAGAIN if one preallocated page is required to copy the next PTE.
1134 if (!(src_vma->vm_flags & VM_SHARED)) in copy_present_ptes()
1145 return -EAGAIN; in copy_present_ptes()
1212 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
1213 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
1221 softleaf_t entry = softleaf_mk_none(); in copy_pte_range() local
1233 * protected by mmap_lock-less collapse skipping areas with anon_vma in copy_pte_range()
1239 ret = -ENOMEM; in copy_pte_range()
1245 * retract_page_tables() are using vma->anon_vma to be exclusive, so in copy_pte_range()
1265 * We are holding two locks at this point - either of them in copy_pte_range()
1284 if (ret == -EIO) { in copy_pte_range()
1285 entry = softleaf_from_pte(ptep_get(src_pte)); in copy_pte_range()
1287 } else if (ret == -EBUSY) { in copy_pte_range()
1297 * Device exclusive entry restored, continue by copying in copy_pte_range()
1300 WARN_ON_ONCE(ret != -ENOENT); in copy_pte_range()
1303 max_nr = (end - addr) / PAGE_SIZE; in copy_pte_range()
1307 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
1311 if (unlikely(ret == -EAGAIN || ret == -EHWPOISON)) in copy_pte_range()
1315 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1317 * will allocate page according to address). This in copy_pte_range()
1334 if (ret == -EIO) { in copy_pte_range()
1335 VM_WARN_ON_ONCE(!entry.val); in copy_pte_range()
1336 if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) { in copy_pte_range()
1337 ret = -ENOMEM; in copy_pte_range()
1340 entry.val = 0; in copy_pte_range()
1341 } else if (ret == -EBUSY || unlikely(ret == -EHWPOISON)) { in copy_pte_range()
1343 } else if (ret == -EAGAIN) { in copy_pte_range()
1346 return -ENOMEM; in copy_pte_range()
1367 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1368 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1374 return -ENOMEM; in copy_pmd_range()
1381 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1384 if (err == -ENOMEM) in copy_pmd_range()
1385 return -ENOMEM; in copy_pmd_range()
1394 return -ENOMEM; in copy_pmd_range()
1404 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1405 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1411 return -ENOMEM; in copy_pud_range()
1418 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1421 if (err == -ENOMEM) in copy_pud_range()
1422 return -ENOMEM; in copy_pud_range()
1431 return -ENOMEM; in copy_pud_range()
1441 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1447 return -ENOMEM; in copy_p4d_range()
1455 return -ENOMEM; in copy_p4d_range()
1468 if (src_vma->vm_flags & VM_COPY_ON_FORK) in vma_needs_copy()
1474 if (src_vma->anon_vma) in vma_needs_copy()
1490 unsigned long addr = src_vma->vm_start; in copy_page_range()
1491 unsigned long end = src_vma->vm_end; in copy_page_range()
1492 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1493 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1511 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1525 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1537 ret = -ENOMEM; in copy_page_range()
1543 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1553 if (!details || details->reclaim_pt) in should_zap_cows()
1557 return details->even_cows; in should_zap_cows()
1568 /* Otherwise we should only zap non-anon folios */ in should_zap_folio()
1577 return details->zap_flags & ZAP_FLAG_DROP_MARKER; in zap_drop_markers()
1581 * This function makes sure that we'll replace the none pte with an uffd-wp
1584 * Returns true if uffd-wp ptes was installed, false otherwise.
1607 if (--nr == 0) in zap_install_uffd_wp_if_needed()
1622 struct mm_struct *mm = tlb->mm; in zap_present_folio_ptes()
1626 ptent = get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); in zap_present_folio_ptes()
1636 rss[mm_counter(folio)] -= nr; in zap_present_folio_ptes()
1638 /* We don't need up-to-date accessed/dirty bits. */ in zap_present_folio_ptes()
1639 clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); in zap_present_folio_ptes()
1640 rss[MM_ANONPAGES] -= nr; in zap_present_folio_ptes()
1662 * Zap or skip at least one present PTE, trying to batch-process subsequent
1673 struct mm_struct *mm = tlb->mm; in zap_present_ptes()
1680 /* We don't need up-to-date accessed/dirty bits. */ in zap_present_ptes()
1681 ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); in zap_present_ptes()
1718 softleaf_t entry; in zap_nonpresent_ptes() local
1722 entry = softleaf_from_pte(ptent); in zap_nonpresent_ptes()
1723 if (softleaf_is_device_private(entry) || in zap_nonpresent_ptes()
1724 softleaf_is_device_exclusive(entry)) { in zap_nonpresent_ptes()
1725 struct page *page = softleaf_to_page(entry); in zap_nonpresent_ptes()
1733 * consider uffd-wp bit when zap. For more information, in zap_nonpresent_ptes()
1737 rss[mm_counter(folio)]--; in zap_nonpresent_ptes()
1740 } else if (softleaf_is_swap(entry)) { in zap_nonpresent_ptes()
1746 rss[MM_SWAPENTS] -= nr; in zap_nonpresent_ptes()
1747 free_swap_and_cache_nr(entry, nr); in zap_nonpresent_ptes()
1748 } else if (softleaf_is_migration(entry)) { in zap_nonpresent_ptes()
1749 struct folio *folio = softleaf_to_folio(entry); in zap_nonpresent_ptes()
1753 rss[mm_counter(folio)]--; in zap_nonpresent_ptes()
1754 } else if (softleaf_is_uffd_wp_marker(entry)) { in zap_nonpresent_ptes()
1761 } else if (softleaf_is_guard_marker(entry)) { in zap_nonpresent_ptes()
1769 } else if (softleaf_is_hwpoison(entry) || in zap_nonpresent_ptes()
1770 softleaf_is_poison_marker(entry)) { in zap_nonpresent_ptes()
1774 /* We should have covered all the swap entry types */ in zap_nonpresent_ptes()
1775 pr_alert("unrecognized swap entry 0x%lx\n", entry.val); in zap_nonpresent_ptes()
1778 clear_not_present_full_ptes(vma->vm_mm, addr, pte, nr, tlb->fullmm); in zap_nonpresent_ptes()
1792 int max_nr = (end - addr) / PAGE_SIZE; in do_zap_pte_range()
1802 max_nr -= nr; in do_zap_pte_range()
1826 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1924 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1931 } else if (details && details->single_folio && in zap_pmd_range()
1932 folio_test_pmd_mappable(details->single_folio) && in zap_pmd_range()
1933 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1934 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1948 pmd--; in zap_pmd_range()
1966 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1967 mmap_assert_locked(tlb->mm); in zap_pud_range()
2012 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
2027 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
2030 if (start >= vma->vm_end) in unmap_single_vma()
2032 end = min(vma->vm_end, end_addr); in unmap_single_vma()
2033 if (end <= vma->vm_start) in unmap_single_vma()
2036 if (vma->vm_file) in unmap_single_vma()
2042 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
2043 * should be non-null for valid hugetlb area. in unmap_single_vma()
2046 * hugetlbfs ->mmap method fails, in unmap_single_vma()
2047 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
2052 if (vma->vm_file) { in unmap_single_vma()
2054 details->zap_flags : 0; in unmap_single_vma()
2064 * unmap_vmas - unmap a range of memory covered by a list of vma's
2065 * @tlb: address of the caller's struct mmu_gather
2068 * @start_addr: virtual address at which to start unmapping
2069 * @end_addr: virtual address at which to end unmapping
2076 * The VMA list must be sorted in ascending virtual address order.
2078 * unmap_vmas() assumes that the caller will flush the whole unmapped address
2080 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
2090 /* Careful - we need to zap private pages too! */ in unmap_vmas()
2094 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, in unmap_vmas()
2103 vma = mas_find(mas, tree_end - 1); in unmap_vmas()
2109 * zap_page_range_single_batched - remove user pages in a given range
2112 * @address: starting address of pages to remove
2117 * hugetlb, @tlb is flushed and re-initialized by this function.
2120 struct vm_area_struct *vma, unsigned long address, in zap_page_range_single_batched() argument
2123 const unsigned long end = address + size; in zap_page_range_single_batched()
2126 VM_WARN_ON_ONCE(!tlb || tlb->mm != vma->vm_mm); in zap_page_range_single_batched()
2128 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in zap_page_range_single_batched()
2129 address, end); in zap_page_range_single_batched()
2131 update_hiwater_rss(vma->vm_mm); in zap_page_range_single_batched()
2134 * unmap 'address-end' not 'range.start-range.end' as range in zap_page_range_single_batched()
2137 unmap_single_vma(tlb, vma, address, end, details); in zap_page_range_single_batched()
2146 tlb_gather_mmu(tlb, vma->vm_mm); in zap_page_range_single_batched()
2151 * zap_page_range_single - remove user pages in a given range
2153 * @address: starting address of pages to zap
2159 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, in zap_page_range_single() argument
2164 tlb_gather_mmu(&tlb, vma->vm_mm); in zap_page_range_single()
2165 zap_page_range_single_batched(&tlb, vma, address, size, details); in zap_page_range_single()
2170 * zap_vma_ptes - remove ptes mapping the vma
2172 * @address: starting address of pages to zap
2177 * The entire address range must be fully contained within the vma.
2180 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, in zap_vma_ptes() argument
2183 if (!range_in_vma(vma, address, address + size) || in zap_vma_ptes()
2184 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
2187 zap_page_range_single(vma, address, size, NULL); in zap_vma_ptes()
2225 VM_WARN_ON_ONCE(vma->vm_flags & VM_PFNMAP); in vm_mixed_zeropage_allowed()
2232 if (mm_forbids_zeropage(vma->vm_mm)) in vm_mixed_zeropage_allowed()
2235 if (is_cow_mapping(vma->vm_flags)) in vm_mixed_zeropage_allowed()
2238 if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) in vm_mixed_zeropage_allowed()
2241 * Why not allow any VMA that has vm_ops->pfn_mkwrite? GUP could in vm_mixed_zeropage_allowed()
2242 * find the shared zeropage and longterm-pin it, which would in vm_mixed_zeropage_allowed()
2244 * page due to vma->vm_ops->pfn_mkwrite, because what's mapped would in vm_mixed_zeropage_allowed()
2249 return vma->vm_ops && vma->vm_ops->pfn_mkwrite && in vm_mixed_zeropage_allowed()
2250 (vma_is_fsdax(vma) || vma->vm_flags & VM_IO); in vm_mixed_zeropage_allowed()
2259 return -EINVAL; in validate_page_before_insert()
2262 return -EINVAL; in validate_page_before_insert()
2266 return -EINVAL; in validate_page_before_insert()
2280 return -EBUSY; in insert_page_into_pte_locked()
2285 return -EFAULT; in insert_page_into_pte_locked()
2305 inc_mm_counter(vma->vm_mm, mm_counter_file(folio)); in insert_page_into_pte_locked()
2308 set_pte_at(vma->vm_mm, addr, pte, pteval); in insert_page_into_pte_locked()
2322 retval = -ENOMEM; in insert_page()
2323 pte = get_locked_pte(vma->vm_mm, addr, &ptl); in insert_page()
2353 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
2359 ret = -EFAULT; in insert_pages()
2365 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
2368 ret = -ENOMEM; in insert_pages()
2378 ret = -EFAULT; in insert_pages()
2387 remaining_pages_total -= pte_idx; in insert_pages()
2394 pages_to_write_in_pmd -= batch_size; in insert_pages()
2395 remaining_pages_total -= batch_size; in insert_pages()
2406 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
2408 * @addr: target start user address of these pages
2423 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
2425 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
2426 return -EFAULT; in vm_insert_pages()
2427 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
2428 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
2429 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
2433 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
2438 * vm_insert_page - insert single page into user vma
2440 * @addr: target user address of this page
2460 * Usually this function is called from f_op->mmap() handler
2461 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
2463 * function from other places, for example from page-fault handler.
2470 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
2471 return -EFAULT; in vm_insert_page()
2472 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
2473 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
2474 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
2477 return insert_page(vma, addr, page, vma->vm_page_prot, false); in vm_insert_page()
2482 * __vm_map_pages - maps range of kernel pages into user vma
2498 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
2503 return -ENXIO; in __vm_map_pages()
2506 if (count > num - offset) in __vm_map_pages()
2507 return -ENXIO; in __vm_map_pages()
2520 * vm_map_pages - maps range of kernel pages starts with non zero offset
2540 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
2545 * vm_map_pages_zero - map range of kernel pages starts with zero offset
2567 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
2568 pte_t *pte, entry; in insert_pfn() local
2574 entry = ptep_get(pte); in insert_pfn()
2575 if (!pte_none(entry)) { in insert_pfn()
2587 if (pte_pfn(entry) != pfn) { in insert_pfn()
2588 WARN_ON_ONCE(!is_zero_pfn(pte_pfn(entry))); in insert_pfn()
2591 entry = pte_mkyoung(entry); in insert_pfn()
2592 entry = maybe_mkwrite(pte_mkdirty(entry), vma); in insert_pfn()
2593 if (ptep_set_access_flags(vma, addr, pte, entry, 1)) in insert_pfn()
2600 entry = pte_mkspecial(pfn_pte(pfn, prot)); in insert_pfn()
2603 entry = pte_mkyoung(entry); in insert_pfn()
2604 entry = maybe_mkwrite(pte_mkdirty(entry), vma); in insert_pfn()
2607 set_pte_at(mm, addr, pte, entry); in insert_pfn()
2616 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2618 * @addr: target user address of this page
2623 * to override pgprot on a per-page basis.
2630 * pgprot typically only differs from @vma->vm_page_prot when drivers set
2631 * caching- and encryption bits different than those of @vma->vm_page_prot,
2632 * because the caching- or encryption mode may not be known at mmap() time.
2634 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2637 * functions that don't touch caching- or encryption bits, using pte_modify()
2640 * Also when new page-table entries are created, this is only done using the
2641 * fault() callback, and never using the value of vma->vm_page_prot,
2642 * except for page-table entries that point to anonymous pages as the result
2657 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2658 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2660 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2661 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2663 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2676 * vmf_insert_pfn - insert single pfn into user vma
2678 * @addr: target user address of this page
2684 * This function should only be called from a vm_ops->fault handler, and
2698 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2709 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2719 pgprot_t pgprot = vma->vm_page_prot; in __vm_insert_mixed()
2725 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2754 if (err == -ENOMEM) in __vm_insert_mixed()
2756 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2765 pgprot_t pgprot = vmf->vma->vm_page_prot; in vmf_insert_page_mkwrite()
2766 unsigned long addr = vmf->address; in vmf_insert_page_mkwrite()
2769 if (addr < vmf->vma->vm_start || addr >= vmf->vma->vm_end) in vmf_insert_page_mkwrite()
2772 err = insert_page(vmf->vma, addr, page, pgprot, write); in vmf_insert_page_mkwrite()
2773 if (err == -ENOMEM) in vmf_insert_page_mkwrite()
2775 if (err < 0 && err != -EBUSY) in vmf_insert_page_mkwrite()
2791 * different entry in the mean time, we treat that as success as we assume
2792 * the same entry was actually inserted.
2803 * in null mappings (currently treated as "copy-on-access")
2815 return -ENOMEM; in remap_pte_range()
2820 err = -EACCES; in remap_pte_range()
2839 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2842 return -ENOMEM; in remap_pmd_range()
2862 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2865 return -ENOMEM; in remap_pud_range()
2884 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2887 return -ENOMEM; in remap_p4d_range()
2903 * There's a horrible special case to handle copy-on-write in get_remap_pgoff()
2905 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in get_remap_pgoff()
2910 return -EINVAL; in get_remap_pgoff()
2923 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range_internal()
2927 return -EINVAL; in remap_pfn_range_internal()
2929 VM_WARN_ON_ONCE((vma->vm_flags & VM_REMAP_FLAGS) != VM_REMAP_FLAGS); in remap_pfn_range_internal()
2932 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range_internal()
2948 * must have pre-validated the caching bits of the pgprot_t.
2974 return ERR_PTR(-EINVAL); in pfnmap_track_ctx_alloc()
2979 return ERR_PTR(-ENOMEM); in pfnmap_track_ctx_alloc()
2982 ctx->pfn = pfn; in pfnmap_track_ctx_alloc()
2983 ctx->size = size; in pfnmap_track_ctx_alloc()
2984 kref_init(&ctx->kref); in pfnmap_track_ctx_alloc()
2992 pfnmap_untrack(ctx->pfn, ctx->size); in pfnmap_track_ctx_release()
3013 if (addr == vma->vm_start && addr + size == vma->vm_end) { in remap_pfn_range_track()
3014 if (vma->pfnmap_track_ctx) in remap_pfn_range_track()
3015 return -EINVAL; in remap_pfn_range_track()
3020 return -EINVAL; in remap_pfn_range_track()
3026 kref_put(&ctx->kref, pfnmap_track_ctx_release); in remap_pfn_range_track()
3028 vma->pfnmap_track_ctx = ctx; in remap_pfn_range_track()
3053 get_remap_pgoff(desc->vm_flags, desc->start, desc->end, in remap_pfn_range_prepare()
3054 desc->start, desc->end, pfn, &desc->pgoff); in remap_pfn_range_prepare()
3055 desc->vm_flags |= VM_REMAP_FLAGS; in remap_pfn_range_prepare()
3064 err = get_remap_pgoff(vma->vm_flags, addr, end, in remap_pfn_range_prepare_vma()
3065 vma->vm_start, vma->vm_end, in remap_pfn_range_prepare_vma()
3066 pfn, &vma->vm_pgoff); in remap_pfn_range_prepare_vma()
3075 * remap_pfn_range - remap kernel memory to userspace
3077 * @addr: target page aligned user address to start at
3078 * @pfn: page frame number of kernel physical memory address
3106 * vm_iomap_memory - remap memory to userspace
3115 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
3116 * whatever write-combining details or similar.
3126 return -EINVAL; in vm_iomap_memory()
3128 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
3136 return -EINVAL; in vm_iomap_memory()
3139 if (vma->vm_pgoff > pages) in vm_iomap_memory()
3140 return -EINVAL; in vm_iomap_memory()
3141 pfn += vma->vm_pgoff; in vm_iomap_memory()
3142 pages -= vma->vm_pgoff; in vm_iomap_memory()
3145 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
3147 return -EINVAL; in vm_iomap_memory()
3150 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
3168 return -ENOMEM; in apply_to_pte_range()
3174 return -EINVAL; in apply_to_pte_range()
3211 return -ENOMEM; in apply_to_pmd_range()
3220 return -EINVAL; in apply_to_pmd_range()
3247 return -ENOMEM; in apply_to_pud_range()
3256 return -EINVAL; in apply_to_pud_range()
3283 return -ENOMEM; in apply_to_p4d_range()
3292 return -EINVAL; in apply_to_p4d_range()
3318 return -EINVAL; in __apply_to_page_range()
3326 err = -EINVAL; in __apply_to_page_range()
3371 * handle_pte_fault chooses page fault handler according to an entry which was
3372 * read non-atomically. Before making any commitment, on those architectures
3383 spin_lock(vmf->ptl); in pte_unmap_same()
3384 same = pte_same(ptep_get(vmf->pte), vmf->orig_pte); in pte_unmap_same()
3385 spin_unlock(vmf->ptl); in pte_unmap_same()
3388 pte_unmap(vmf->pte); in pte_unmap_same()
3389 vmf->pte = NULL; in pte_unmap_same()
3396 * -EHWPOISON: copy failed due to hwpoison in source page
3397 * -EAGAIN: copied failed (some other reason)
3405 struct vm_area_struct *vma = vmf->vma; in __wp_page_copy_user()
3406 struct mm_struct *mm = vma->vm_mm; in __wp_page_copy_user()
3407 unsigned long addr = vmf->address; in __wp_page_copy_user()
3411 return -EHWPOISON; in __wp_page_copy_user()
3417 * a "struct page" for it. We do a best-effort copy by in __wp_page_copy_user()
3418 * just copying from the original user address. If that in __wp_page_copy_user()
3419 * fails, we just zero-fill it. Live with it. in __wp_page_copy_user()
3429 vmf->pte = NULL; in __wp_page_copy_user()
3430 if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { in __wp_page_copy_user()
3431 pte_t entry; in __wp_page_copy_user() local
3433 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
3434 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
3439 if (vmf->pte) in __wp_page_copy_user()
3440 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
3441 ret = -EAGAIN; in __wp_page_copy_user()
3445 entry = pte_mkyoung(vmf->orig_pte); in __wp_page_copy_user()
3446 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in __wp_page_copy_user()
3447 update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1); in __wp_page_copy_user()
3457 if (vmf->pte) in __wp_page_copy_user()
3460 /* Re-validate under PTL if the page is still mapped */ in __wp_page_copy_user()
3461 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
3462 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
3464 if (vmf->pte) in __wp_page_copy_user()
3465 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
3466 ret = -EAGAIN; in __wp_page_copy_user()
3477 * use-case in __wp_page_copy_user()
3488 if (vmf->pte) in __wp_page_copy_user()
3489 pte_unmap_unlock(vmf->pte, vmf->ptl); in __wp_page_copy_user()
3499 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
3502 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
3512 * Notify the address space that the page is about to become writable so that
3520 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
3522 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
3524 if (vmf->vma->vm_file && in do_page_mkwrite()
3525 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
3528 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
3530 vmf->flags = old_flags; in do_page_mkwrite()
3535 if (!folio->mapping) { in do_page_mkwrite()
3552 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
3554 struct folio *folio = page_folio(vmf->page); in fault_dirty_shared_page()
3556 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
3561 * Take a local copy of the address_space - folio.mapping may be zeroed in fault_dirty_shared_page()
3563 * pinned by vma->vm_file's reference. We rely on folio_unlock()'s in fault_dirty_shared_page()
3570 file_update_time(vma->vm_file); in fault_dirty_shared_page()
3601 * any related book-keeping.
3604 __releases(vmf->ptl) in wp_page_reuse()
3606 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
3607 pte_t entry; in wp_page_reuse() local
3609 VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); in wp_page_reuse()
3610 VM_WARN_ON(is_zero_pfn(pte_pfn(vmf->orig_pte))); in wp_page_reuse()
3614 !PageAnonExclusive(vmf->page)); in wp_page_reuse()
3620 folio_xchg_last_cpupid(folio, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
3623 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
3624 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
3625 entry = maybe_mkwrite(pte_mkdirty(entry), vma); in wp_page_reuse()
3626 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
3627 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_reuse()
3628 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
3634 * vm_ops that have a ->map_pages have been audited and don't need
3639 struct vm_area_struct *vma = vmf->vma; in vmf_can_call_fault()
3641 if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK)) in vmf_can_call_fault()
3648 * __vmf_anon_prepare - Prepare to handle an anonymous fault.
3654 * only protected by the per-VMA lock, the caller must retry with the
3657 * do with only the per-VMA lock held for this VMA.
3664 struct vm_area_struct *vma = vmf->vma; in __vmf_anon_prepare()
3667 if (likely(vma->anon_vma)) in __vmf_anon_prepare()
3669 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in __vmf_anon_prepare()
3670 if (!mmap_read_trylock(vma->vm_mm)) in __vmf_anon_prepare()
3675 if (vmf->flags & FAULT_FLAG_VMA_LOCK) in __vmf_anon_prepare()
3676 mmap_read_unlock(vma->vm_mm); in __vmf_anon_prepare()
3689 * - Allocate a page, copy the content of the old page to the new one.
3690 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
3691 * - Take the PTL. If the pte changed, bail out and release the allocated page
3692 * - If the pte is still the way we remember it, update the page table and all
3693 * relevant references. This includes dropping the reference the page-table
3695 * - In any case, unlock the PTL and drop the reference we took to the old page.
3699 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_page_copy()
3700 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
3701 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
3704 pte_t entry; in wp_page_copy() local
3712 if (vmf->page) in wp_page_copy()
3713 old_folio = page_folio(vmf->page); in wp_page_copy()
3718 pfn_is_zero = is_zero_pfn(pte_pfn(vmf->orig_pte)); in wp_page_copy()
3719 new_folio = folio_prealloc(mm, vma, vmf->address, pfn_is_zero); in wp_page_copy()
3726 err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); in wp_page_copy()
3730 * it's fine. If not, userspace would re-fault on in wp_page_copy()
3731 * the same address and we will handle the fault in wp_page_copy()
3733 * The -EHWPOISON case will not be retried. in wp_page_copy()
3740 return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; in wp_page_copy()
3742 kmsan_copy_page_meta(&new_folio->page, vmf->page); in wp_page_copy()
3748 vmf->address & PAGE_MASK, in wp_page_copy()
3749 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
3753 * Re-check the pte - we dropped the lock in wp_page_copy()
3755 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
3756 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in wp_page_copy()
3763 ksm_might_unmap_zero_page(mm, vmf->orig_pte); in wp_page_copy()
3766 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
3767 entry = folio_mk_pte(new_folio, vma->vm_page_prot); in wp_page_copy()
3768 entry = pte_sw_mkyoung(entry); in wp_page_copy()
3770 if (pte_soft_dirty(vmf->orig_pte)) in wp_page_copy()
3771 entry = pte_mksoft_dirty(entry); in wp_page_copy()
3772 if (pte_uffd_wp(vmf->orig_pte)) in wp_page_copy()
3773 entry = pte_mkuffd_wp(entry); in wp_page_copy()
3775 entry = maybe_mkwrite(pte_mkdirty(entry), vma); in wp_page_copy()
3779 * Clear the pte entry and flush it first, before updating the in wp_page_copy()
3780 * pte with the new entry, to keep TLBs on different CPUs in in wp_page_copy()
3785 ptep_clear_flush(vma, vmf->address, vmf->pte); in wp_page_copy()
3786 folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE); in wp_page_copy()
3788 BUG_ON(unshare && pte_write(entry)); in wp_page_copy()
3789 set_pte_at(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
3790 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_copy()
3814 folio_remove_rmap_pte(old_folio, vmf->page, vma); in wp_page_copy()
3820 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3821 } else if (vmf->pte) { in wp_page_copy()
3822 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
3823 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3849 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3853 * @folio: the folio of vmf->page
3856 * shared mapping due to PTE being read-only once the mapped page is prepared.
3867 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3868 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3869 &vmf->ptl); in finish_mkwrite_fault()
3870 if (!vmf->pte) in finish_mkwrite_fault()
3876 if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) { in finish_mkwrite_fault()
3877 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3878 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3891 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3893 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3896 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3901 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3902 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3912 __releases(vmf->ptl) in wp_page_shared()
3914 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3919 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3922 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3974 if (test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids)) in __wp_can_reuse_large_anon_folio()
3997 if (test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids)) in __wp_can_reuse_large_anon_folio()
4004 VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id && in __wp_can_reuse_large_anon_folio()
4005 folio_mm_id(folio, 1) != vma->vm_mm->mm_id); in __wp_can_reuse_large_anon_folio()
4072 * It is done by copying the page to a new address and decrementing the
4073 * shared-page counter for the old page.
4076 * done by the caller (the low-level page fault routine in most cases).
4084 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4089 __releases(vmf->ptl) in do_wp_page()
4091 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_wp_page()
4092 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
4097 if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) { in do_wp_page()
4099 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
4105 * etc.) because we're only removing the uffd-wp bit, in do_wp_page()
4108 pte = pte_clear_uffd_wp(ptep_get(vmf->pte)); in do_wp_page()
4110 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_wp_page()
4115 vmf->orig_pte = pte; in do_wp_page()
4119 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
4122 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
4123 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
4124 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
4127 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
4129 if (vmf->page) in do_wp_page()
4130 folio = page_folio(vmf->page); in do_wp_page()
4136 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in do_wp_page()
4139 * VM_PFNMAP VMA. FS DAX also wants ops->pfn_mkwrite called. in do_wp_page()
4142 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
4144 if (!vmf->page || is_fsdax_page(vmf->page)) { in do_wp_page()
4145 vmf->page = NULL; in do_wp_page()
4159 (PageAnonExclusive(vmf->page) || wp_can_reuse_anon_folio(folio, vma))) { in do_wp_page()
4160 if (!PageAnonExclusive(vmf->page)) in do_wp_page()
4161 SetPageAnonExclusive(vmf->page); in do_wp_page()
4163 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
4175 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
4187 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
4199 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
4200 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
4205 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
4206 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
4212 * unmap_mapping_folio() - Unmap single folio from processes.
4224 struct address_space *mapping = folio->mapping; in unmap_mapping_folio()
4231 first_index = folio->index; in unmap_mapping_folio()
4232 last_index = folio_next_index(folio) - 1; in unmap_mapping_folio()
4239 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_folio()
4240 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_folio()
4246 * unmap_mapping_pages() - Unmap pages from processes.
4247 * @mapping: The address space containing pages to be unmapped.
4252 * Unmap the pages in this address space from any userspace process which
4262 pgoff_t last_index = start + nr - 1; in unmap_mapping_pages()
4269 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
4270 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_pages()
4277 * unmap_mapping_range - unmap the portion of all mmaps in the specified
4281 * @mapping: the address space containing mmaps to be unmapped.
4297 pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
4302 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
4304 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
4312 * Restore a potential device exclusive pte to a working pte entry
4316 struct folio *folio = page_folio(vmf->page); in remove_device_exclusive_entry()
4317 struct vm_area_struct *vma = vmf->vma; in remove_device_exclusive_entry()
4323 * the PTL so a racing thread can remove the device-exclusive in remove_device_exclusive_entry()
4324 * entry and unmap it. If the folio is free the entry must in remove_device_exclusive_entry()
4326 * been re-allocated after being freed all we do is lock and in remove_device_exclusive_entry()
4338 vma->vm_mm, vmf->address & PAGE_MASK, in remove_device_exclusive_entry()
4339 (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); in remove_device_exclusive_entry()
4342 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in remove_device_exclusive_entry()
4343 &vmf->ptl); in remove_device_exclusive_entry()
4344 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in remove_device_exclusive_entry()
4345 restore_exclusive_pte(vma, folio, vmf->page, vmf->address, in remove_device_exclusive_entry()
4346 vmf->pte, vmf->orig_pte); in remove_device_exclusive_entry()
4348 if (vmf->pte) in remove_device_exclusive_entry()
4349 pte_unmap_unlock(vmf->pte, vmf->ptl); in remove_device_exclusive_entry()
4363 if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) || in should_try_to_free_swap()
4378 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in pte_marker_clear()
4379 vmf->address, &vmf->ptl); in pte_marker_clear()
4380 if (!vmf->pte) in pte_marker_clear()
4383 * Be careful so that we will only recover a special uffd-wp pte into a in pte_marker_clear()
4390 if (pte_same(vmf->orig_pte, ptep_get(vmf->pte))) in pte_marker_clear()
4391 pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); in pte_marker_clear()
4392 pte_unmap_unlock(vmf->pte, vmf->ptl); in pte_marker_clear()
4398 if (vma_is_anonymous(vmf->vma)) in do_pte_missing()
4405 * This is actually a page-missing access, but with uffd-wp special pte
4406 * installed. It means this pte was wr-protected before being unmapped.
4412 * got unregistered - we can simply clear them. in pte_marker_handle_uffd_wp()
4414 if (unlikely(!userfaultfd_wp(vmf->vma))) in pte_marker_handle_uffd_wp()
4422 const softleaf_t entry = softleaf_from_pte(vmf->orig_pte); in handle_pte_marker() local
4423 const pte_marker marker = softleaf_to_marker(entry); in handle_pte_marker()
4432 /* Higher priority than uffd-wp when data corrupted */ in handle_pte_marker()
4440 if (softleaf_is_uffd_wp_marker(entry)) in handle_pte_marker()
4449 struct vm_area_struct *vma = vmf->vma; in __alloc_swap_folio()
4451 softleaf_t entry; in __alloc_swap_folio() local
4453 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address); in __alloc_swap_folio()
4457 entry = softleaf_from_pte(vmf->orig_pte); in __alloc_swap_folio()
4458 if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, in __alloc_swap_folio()
4459 GFP_KERNEL, entry)) { in __alloc_swap_folio()
4475 softleaf_t entry; in can_swapin_thp() local
4479 addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); in can_swapin_thp()
4480 idx = (vmf->address - addr) / PAGE_SIZE; in can_swapin_thp()
4483 if (!pte_same(pte, pte_move_swp_offset(vmf->orig_pte, -idx))) in can_swapin_thp()
4485 entry = softleaf_from_pte(pte); in can_swapin_thp()
4494 if (unlikely(swap_zeromap_batch(entry, nr_pages, NULL) != nr_pages)) in can_swapin_thp()
4496 if (unlikely(non_swapcache_batch(entry, nr_pages) != nr_pages)) in can_swapin_thp()
4527 struct vm_area_struct *vma = vmf->vma; in alloc_swap_folio()
4531 softleaf_t entry; in alloc_swap_folio() local
4538 * If uffd is active for the vma we need per-page fault fidelity to in alloc_swap_folio()
4546 * lack handling for such cases, so fallback to swapping in order-0 in alloc_swap_folio()
4552 entry = softleaf_from_pte(vmf->orig_pte); in alloc_swap_folio()
4557 orders = thp_vma_allowable_orders(vma, vma->vm_flags, TVA_PAGEFAULT, in alloc_swap_folio()
4558 BIT(PMD_ORDER) - 1); in alloc_swap_folio()
4559 orders = thp_vma_suitable_orders(vma, vmf->address, orders); in alloc_swap_folio()
4560 orders = thp_swap_suitable_orders(swp_offset(entry), in alloc_swap_folio()
4561 vmf->address, orders); in alloc_swap_folio()
4566 pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in alloc_swap_folio()
4567 vmf->address & PMD_MASK, &ptl); in alloc_swap_folio()
4577 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_swap_folio()
4588 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_swap_folio()
4591 if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, in alloc_swap_folio()
4592 gfp, entry)) in alloc_swap_folio()
4614 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4623 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
4631 softleaf_t entry; in do_swap_page() local
4637 unsigned long address; in do_swap_page() local
4643 entry = softleaf_from_pte(vmf->orig_pte); in do_swap_page()
4644 if (unlikely(!softleaf_is_swap(entry))) { in do_swap_page()
4645 if (softleaf_is_migration(entry)) { in do_swap_page()
4646 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
4647 vmf->address); in do_swap_page()
4648 } else if (softleaf_is_device_exclusive(entry)) { in do_swap_page()
4649 vmf->page = softleaf_to_page(entry); in do_swap_page()
4651 } else if (softleaf_is_device_private(entry)) { in do_swap_page()
4652 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_swap_page()
4662 vmf->page = softleaf_to_page(entry); in do_swap_page()
4663 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
4664 vmf->address, &vmf->ptl); in do_swap_page()
4665 if (unlikely(!vmf->pte || in do_swap_page()
4666 !pte_same(ptep_get(vmf->pte), in do_swap_page()
4667 vmf->orig_pte))) in do_swap_page()
4674 if (trylock_page(vmf->page)) { in do_swap_page()
4677 get_page(vmf->page); in do_swap_page()
4678 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4679 pgmap = page_pgmap(vmf->page); in do_swap_page()
4680 ret = pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
4681 unlock_page(vmf->page); in do_swap_page()
4682 put_page(vmf->page); in do_swap_page()
4684 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4686 } else if (softleaf_is_hwpoison(entry)) { in do_swap_page()
4688 } else if (softleaf_is_marker(entry)) { in do_swap_page()
4691 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
4698 si = get_swap_device(entry); in do_swap_page()
4702 folio = swap_cache_get_folio(entry); in do_swap_page()
4704 swap_update_readahead(folio, vma, vmf->address); in do_swap_page()
4708 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
4709 __swap_count(entry) == 1) { in do_swap_page()
4718 entry.val = ALIGN_DOWN(entry.val, nr_pages); in do_swap_page()
4722 * may finish swapin first, free the entry, and in do_swap_page()
4723 * swapout reusing the same entry. It's in do_swap_page()
4725 * to entry reuse. in do_swap_page()
4727 if (swapcache_prepare(entry, nr_pages)) { in do_swap_page()
4739 memcg1_swapin(entry, nr_pages); in do_swap_page()
4741 shadow = swap_cache_get_shadow(entry); in do_swap_page()
4747 /* To provide entry to swap_read_folio() */ in do_swap_page()
4748 folio->swap = entry; in do_swap_page()
4750 folio->private = NULL; in do_swap_page()
4753 folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, in do_swap_page()
4763 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
4764 vmf->address, &vmf->ptl); in do_swap_page()
4765 if (likely(vmf->pte && in do_swap_page()
4766 pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
4774 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
4781 page = folio_file_page(folio, swp_offset(entry)); in do_swap_page()
4790 if (unlikely(!folio_matches_swap_entry(folio, entry))) in do_swap_page()
4804 * folio->index of non-ksm folios would be nonlinear inside the in do_swap_page()
4805 * anon VMA -- the ksm flag is lost on actual swapout. in do_swap_page()
4807 folio = ksm_might_need_to_copy(folio, vma, vmf->address); in do_swap_page()
4812 } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { in do_swap_page()
4826 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && in do_swap_page()
4836 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
4837 &vmf->ptl); in do_swap_page()
4838 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
4849 unsigned long folio_start = ALIGN_DOWN(vmf->address, nr * PAGE_SIZE); in do_swap_page()
4850 unsigned long idx = (vmf->address - folio_start) / PAGE_SIZE; in do_swap_page()
4851 pte_t *folio_ptep = vmf->pte - idx; in do_swap_page()
4854 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || in do_swap_page()
4859 address = folio_start; in do_swap_page()
4866 address = vmf->address; in do_swap_page()
4867 ptep = vmf->pte; in do_swap_page()
4871 unsigned long folio_start = address - idx * PAGE_SIZE; in do_swap_page()
4876 if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start))) in do_swap_page()
4878 if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end))) in do_swap_page()
4881 folio_ptep = vmf->pte - idx; in do_swap_page()
4883 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || in do_swap_page()
4888 address = folio_start; in do_swap_page()
4891 entry = folio->swap; in do_swap_page()
4892 page = &folio->page; in do_swap_page()
4909 * the swap entry concurrently) for certainly exclusive pages. in do_swap_page()
4912 exclusive = pte_swp_exclusive(vmf->orig_pte); in do_swap_page()
4916 * swapcache -> certainly exclusive. in do_swap_page()
4920 data_race(si->flags & SWP_STABLE_WRITES)) { in do_swap_page()
4945 * when reading from swap. This metadata may be indexed by swap entry in do_swap_page()
4948 arch_swap_restore(folio_swap(entry, folio), folio); in do_swap_page()
4951 * Remove the swap entry and conditionally try to free up the swapcache. in do_swap_page()
4955 swap_free_nr(entry, nr_pages); in do_swap_page()
4956 if (should_try_to_free_swap(folio, vma, vmf->flags)) in do_swap_page()
4959 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); in do_swap_page()
4960 add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages); in do_swap_page()
4961 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
4962 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
4964 if (pte_swp_uffd_wp(vmf->orig_pte)) in do_swap_page()
4970 * exposing them to the swapcache or because the swap entry indicates in do_swap_page()
4975 if ((vma->vm_flags & VM_WRITE) && !userfaultfd_pte_wp(vma, pte) && in do_swap_page()
4978 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4980 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
4985 folio_ref_add(folio, nr_pages - 1); in do_swap_page()
4987 vmf->orig_pte = pte_advance_pfn(pte, page_idx); in do_swap_page()
4991 folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE); in do_swap_page()
5002 folio_add_new_anon_rmap(folio, vma, address, rmap_flags); in do_swap_page()
5004 folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address, in do_swap_page()
5010 set_ptes(vma->vm_mm, address, ptep, pte, nr_pages); in do_swap_page()
5011 arch_do_swap_page_nr(vma->vm_mm, vma, address, in do_swap_page()
5017 * Hold the lock to avoid the swap entry to be reused in do_swap_page()
5028 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
5035 /* No need to invalidate - it was non-present before */ in do_swap_page()
5036 update_mmu_cache_range(vmf, vma, address, ptep, nr_pages); in do_swap_page()
5038 if (vmf->pte) in do_swap_page()
5039 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
5043 swapcache_clear(si, entry, nr_pages); in do_swap_page()
5051 if (vmf->pte) in do_swap_page()
5052 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
5062 swapcache_clear(si, entry, nr_pages); in do_swap_page()
5085 struct vm_area_struct *vma = vmf->vma; in alloc_anon_folio()
5095 * If uffd is active for the vma we need per-page fault fidelity to in alloc_anon_folio()
5104 * the faulting address and still be fully contained in the vma. in alloc_anon_folio()
5106 orders = thp_vma_allowable_orders(vma, vma->vm_flags, TVA_PAGEFAULT, in alloc_anon_folio()
5107 BIT(PMD_ORDER) - 1); in alloc_anon_folio()
5108 orders = thp_vma_suitable_orders(vma, vmf->address, orders); in alloc_anon_folio()
5113 pte = pte_offset_map(vmf->pmd, vmf->address & PMD_MASK); in alloc_anon_folio()
5115 return ERR_PTR(-EAGAIN); in alloc_anon_folio()
5124 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_anon_folio()
5138 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_anon_folio()
5141 if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) { in alloc_anon_folio()
5151 * that the page corresponding to the faulting address in alloc_anon_folio()
5155 folio_zero_user(folio, vmf->address); in alloc_anon_folio()
5165 return folio_prealloc(vma->vm_mm, vma, vmf->address, true); in alloc_anon_folio()
5169 * We enter with non-exclusive mmap_lock (to exclude vma changes,
5175 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
5176 unsigned long addr = vmf->address; in do_anonymous_page()
5180 pte_t entry; in do_anonymous_page() local
5182 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
5183 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
5190 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
5193 /* Use the zero-page for reads */ in do_anonymous_page()
5194 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
5195 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
5196 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
5197 vma->vm_page_prot)); in do_anonymous_page()
5198 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
5199 vmf->address, &vmf->ptl); in do_anonymous_page()
5200 if (!vmf->pte) in do_anonymous_page()
5203 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
5206 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
5211 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
5221 /* Returns NULL on OOM or ERR_PTR(-EAGAIN) if we must retry the fault */ in do_anonymous_page()
5229 addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); in do_anonymous_page()
5238 entry = folio_mk_pte(folio, vma->vm_page_prot); in do_anonymous_page()
5239 entry = pte_sw_mkyoung(entry); in do_anonymous_page()
5240 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
5241 entry = pte_mkwrite(pte_mkdirty(entry), vma); in do_anonymous_page()
5243 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); in do_anonymous_page()
5244 if (!vmf->pte) in do_anonymous_page()
5247 update_mmu_tlb(vma, addr, vmf->pte); in do_anonymous_page()
5249 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { in do_anonymous_page()
5250 update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); in do_anonymous_page()
5254 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
5260 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
5265 folio_ref_add(folio, nr_pages - 1); in do_anonymous_page()
5266 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); in do_anonymous_page()
5272 entry = pte_mkuffd_wp(entry); in do_anonymous_page()
5273 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages); in do_anonymous_page()
5275 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
5276 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages); in do_anonymous_page()
5278 if (vmf->pte) in do_anonymous_page()
5279 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
5289 * The mmap_lock must have been held on entry, and may have been
5290 * released depending on flags and vma->vm_ops->fault() return value.
5295 struct vm_area_struct *vma = vmf->vma; in __do_fault()
5314 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
5315 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
5316 if (!vmf->prealloc_pte) in __do_fault()
5320 ret = vma->vm_ops->fault(vmf); in __do_fault()
5325 folio = page_folio(vmf->page); in __do_fault()
5326 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
5329 if (page_mapped(vmf->page)) in __do_fault()
5332 if (mapping_evict_folio(folio->mapping, folio)) in __do_fault()
5337 vmf->page = NULL; in __do_fault()
5344 VM_BUG_ON_PAGE(!folio_test_locked(folio), vmf->page); in __do_fault()
5352 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
5354 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
5359 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
5360 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
5365 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
5366 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
5367 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
5368 pmd_t entry; in do_set_pmd() local
5374 * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any in do_set_pmd()
5378 if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags, in do_set_pmd()
5387 page = &folio->page; in do_set_pmd()
5400 * related to pte entry. Use the preallocated table for that. in do_set_pmd()
5402 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
5403 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
5404 if (!vmf->prealloc_pte) in do_set_pmd()
5408 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
5409 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
5414 entry = folio_mk_pmd(folio, vma->vm_page_prot); in do_set_pmd()
5416 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); in do_set_pmd()
5418 add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR); in do_set_pmd()
5427 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
5429 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
5435 spin_unlock(vmf->ptl); in do_set_pmd()
5446 * set_pte_range - Set a range of PTEs to point to pages in a folio.
5451 * @addr: The first address to create a PTE for.
5456 struct vm_area_struct *vma = vmf->vma; in set_pte_range()
5457 bool write = vmf->flags & FAULT_FLAG_WRITE; in set_pte_range()
5458 bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); in set_pte_range()
5459 pte_t entry; in set_pte_range() local
5462 entry = mk_pte(page, vma->vm_page_prot); in set_pte_range()
5465 entry = pte_mkold(entry); in set_pte_range()
5467 entry = pte_sw_mkyoung(entry); in set_pte_range()
5470 entry = maybe_mkwrite(pte_mkdirty(entry), vma); in set_pte_range()
5471 else if (pte_write(entry) && folio_test_dirty(folio)) in set_pte_range()
5472 entry = pte_mkdirty(entry); in set_pte_range()
5474 entry = pte_mkuffd_wp(entry); in set_pte_range()
5475 /* copy-on-write page */ in set_pte_range()
5476 if (write && !(vma->vm_flags & VM_SHARED)) { in set_pte_range()
5483 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr); in set_pte_range()
5485 /* no need to invalidate: a not-present page won't be cached */ in set_pte_range()
5486 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr); in set_pte_range()
5491 if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID) in vmf_pte_changed()
5492 return !pte_same(ptep_get(vmf->pte), vmf->orig_pte); in vmf_pte_changed()
5494 return !pte_none(ptep_get(vmf->pte)); in vmf_pte_changed()
5498 * finish_fault - finish page fault once we have prepared the page to fault
5514 struct vm_area_struct *vma = vmf->vma; in finish_fault()
5518 bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && in finish_fault()
5519 !(vma->vm_flags & VM_SHARED); in finish_fault()
5525 addr = vmf->address; in finish_fault()
5529 page = vmf->cow_page; in finish_fault()
5531 page = vmf->page; in finish_fault()
5538 if (!(vma->vm_flags & VM_SHARED)) { in finish_fault()
5539 ret = check_stable_address_space(vma->vm_mm); in finish_fault()
5544 if (!needs_fallback && vma->vm_file) { in finish_fault()
5545 struct address_space *mapping = vma->vm_file->f_mapping; in finish_fault()
5548 file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); in finish_fault()
5561 if (pmd_none(*vmf->pmd)) { in finish_fault()
5568 if (vmf->prealloc_pte) in finish_fault()
5569 pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); in finish_fault()
5570 else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) in finish_fault()
5576 /* Using per-page fault to maintain the uffd semantics */ in finish_fault()
5581 /* The page offset of vmf->address within the VMA. */ in finish_fault()
5582 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in finish_fault()
5583 /* The index of the entry in the pagetable for fault page. */ in finish_fault()
5584 pgoff_t pte_off = pte_index(vmf->address); in finish_fault()
5587 * Fallback to per-page fault in case the folio size in page in finish_fault()
5591 vma_off + (nr_pages - idx) > vma_pages(vma) || in finish_fault()
5593 pte_off + (nr_pages - idx) > PTRS_PER_PTE)) { in finish_fault()
5597 addr = vmf->address - idx * PAGE_SIZE; in finish_fault()
5598 page = &folio->page; in finish_fault()
5602 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in finish_fault()
5603 addr, &vmf->ptl); in finish_fault()
5604 if (!vmf->pte) in finish_fault()
5607 /* Re-check under ptl */ in finish_fault()
5609 update_mmu_tlb(vma, addr, vmf->pte); in finish_fault()
5612 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { in finish_fault()
5614 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
5618 folio_ref_add(folio, nr_pages - 1); in finish_fault()
5621 add_mm_counter(vma->vm_mm, type, nr_pages); in finish_fault()
5625 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
5646 return -EINVAL; in fault_around_bytes_set()
5649 * The minimum value is 1 page, however this results in no fault-around in fault_around_bytes_set()
5670 * do_fault_around() tries to map few pages around the fault address. The hope
5674 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
5675 * not ready to be mapped: not up-to-date, locked, etc.
5684 * The virtual address of the area that we map is naturally aligned to
5692 pgoff_t pte_off = pte_index(vmf->address); in do_fault_around()
5693 /* The page offset of vmf->address within the VMA. */ in do_fault_around()
5694 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in do_fault_around()
5698 /* The PTE offset of the start address, clamped to the VMA. */ in do_fault_around()
5700 pte_off - min(pte_off, vma_off)); in do_fault_around()
5702 /* The PTE offset of the end address, clamped to the VMA and PTE. */ in do_fault_around()
5704 pte_off + vma_pages(vmf->vma) - vma_off) - 1; in do_fault_around()
5706 if (pmd_none(*vmf->pmd)) { in do_fault_around()
5707 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
5708 if (!vmf->prealloc_pte) in do_fault_around()
5713 ret = vmf->vma->vm_ops->map_pages(vmf, in do_fault_around()
5714 vmf->pgoff + from_pte - pte_off, in do_fault_around()
5715 vmf->pgoff + to_pte - pte_off); in do_fault_around()
5721 /* Return true if we should do read fault-around, false otherwise */
5724 /* No ->map_pages? No way to fault around... */ in should_fault_around()
5725 if (!vmf->vma->vm_ops->map_pages) in should_fault_around()
5728 if (uffd_disable_fault_around(vmf->vma)) in should_fault_around()
5741 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
5760 folio = page_folio(vmf->page); in do_read_fault()
5769 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
5779 folio = folio_prealloc(vma->vm_mm, vma, vmf->address, false); in do_cow_fault()
5783 vmf->cow_page = &folio->page; in do_cow_fault()
5791 if (copy_mc_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma)) { in do_cow_fault()
5799 unlock_page(vmf->page); in do_cow_fault()
5800 put_page(vmf->page); in do_cow_fault()
5811 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
5823 folio = page_folio(vmf->page); in do_shared_fault()
5826 * Check if the backing address space wants to know that the page is in do_shared_fault()
5829 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
5852 * We enter with non-exclusive mmap_lock (to exclude vma changes,
5861 struct vm_area_struct *vma = vmf->vma; in do_fault()
5862 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
5868 if (!vma->vm_ops->fault) { in do_fault()
5869 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in do_fault()
5870 vmf->address, &vmf->ptl); in do_fault()
5871 if (unlikely(!vmf->pte)) in do_fault()
5881 if (unlikely(pte_none(ptep_get(vmf->pte)))) in do_fault()
5886 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
5888 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
5890 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
5896 if (vmf->prealloc_pte) { in do_fault()
5897 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
5898 vmf->prealloc_pte = NULL; in do_fault()
5907 struct vm_area_struct *vma = vmf->vma; in numa_migrate_check()
5921 * Flag if the folio is shared between multiple address spaces. This in numa_migrate_check()
5924 if (folio_maybe_mapped_shared(folio) && (vma->vm_flags & VM_SHARED)) in numa_migrate_check()
5931 *last_cpupid = (-1 & LAST_CPUPID_MASK); in numa_migrate_check()
5957 pte = pte_modify(old_pte, vma->vm_page_prot); in numa_rebuild_single_mapping()
5969 int nr = pte_pfn(fault_pte) - folio_pfn(folio); in numa_rebuild_large_mapping()
5970 unsigned long start, end, addr = vmf->address; in numa_rebuild_large_mapping()
5971 unsigned long addr_start = addr - (nr << PAGE_SHIFT); in numa_rebuild_large_mapping()
5976 start = max3(addr_start, pt_start, vma->vm_start); in numa_rebuild_large_mapping()
5978 vma->vm_end); in numa_rebuild_large_mapping()
5979 start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT); in numa_rebuild_large_mapping()
5993 ptent = pte_modify(ptent, vma->vm_page_prot); in numa_rebuild_large_mapping()
6006 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
6020 spin_lock(vmf->ptl); in do_numa_page()
6022 old_pte = ptep_get(vmf->pte); in do_numa_page()
6024 if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { in do_numa_page()
6025 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
6029 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
6037 can_change_pte_writable(vma, vmf->address, pte)) in do_numa_page()
6040 folio = vm_normal_folio(vma, vmf->address, pte); in do_numa_page()
6047 target_nid = numa_migrate_check(folio, vmf, vmf->address, &flags, in do_numa_page()
6056 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
6069 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_numa_page()
6070 vmf->address, &vmf->ptl); in do_numa_page()
6071 if (unlikely(!vmf->pte)) in do_numa_page()
6073 if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in do_numa_page()
6074 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
6080 * non-accessible ptes, some can allow access by kernel mode. in do_numa_page()
6086 numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, in do_numa_page()
6088 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
6097 struct vm_area_struct *vma = vmf->vma; in create_huge_pmd()
6100 if (vma->vm_ops->huge_fault) in create_huge_pmd()
6101 return vma->vm_ops->huge_fault(vmf, PMD_ORDER); in create_huge_pmd()
6108 struct vm_area_struct *vma = vmf->vma; in wp_huge_pmd()
6109 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_huge_pmd()
6114 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) { in wp_huge_pmd()
6115 if (userfaultfd_wp_async(vmf->vma)) in wp_huge_pmd()
6122 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pmd()
6123 if (vma->vm_ops->huge_fault) { in wp_huge_pmd()
6124 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); in wp_huge_pmd()
6131 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
6132 __split_huge_pmd(vma, vmf->pmd, vmf->address, false); in wp_huge_pmd()
6141 struct vm_area_struct *vma = vmf->vma; in create_huge_pud()
6145 if (vma->vm_ops->huge_fault) in create_huge_pud()
6146 return vma->vm_ops->huge_fault(vmf, PUD_ORDER); in create_huge_pud()
6155 struct vm_area_struct *vma = vmf->vma; in wp_huge_pud()
6161 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pud()
6162 if (vma->vm_ops->huge_fault) { in wp_huge_pud()
6163 ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); in wp_huge_pud()
6169 /* COW or write-notify not handled on PUD level: split pud.*/ in wp_huge_pud()
6170 __split_huge_pud(vma, vmf->pud, vmf->address); in wp_huge_pud()
6177 * page table. For example, a non-populated virtual page is accessed
6184 * stale read-only TLB entry exists in the local CPU and needs to be
6197 if (vmf->flags & FAULT_FLAG_TRIED) in fix_spurious_fault()
6205 if (vmf->flags & FAULT_FLAG_WRITE) { in fix_spurious_fault()
6207 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, in fix_spurious_fault()
6208 vmf->pte); in fix_spurious_fault()
6210 flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address, in fix_spurious_fault()
6211 vmf->pmd); in fix_spurious_fault()
6223 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
6231 pte_t entry; in handle_pte_fault() local
6233 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
6235 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
6240 vmf->pte = NULL; in handle_pte_fault()
6241 vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
6251 * Use the maywrite version to indicate that vmf->pte may be in handle_pte_fault()
6253 * change of the !pte_none() entry, there is no need to recheck in handle_pte_fault()
6258 vmf->pte = pte_offset_map_rw_nolock(vmf->vma->vm_mm, vmf->pmd, in handle_pte_fault()
6259 vmf->address, &dummy_pmdval, in handle_pte_fault()
6260 &vmf->ptl); in handle_pte_fault()
6261 if (unlikely(!vmf->pte)) in handle_pte_fault()
6263 vmf->orig_pte = ptep_get_lockless(vmf->pte); in handle_pte_fault()
6264 vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
6266 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
6267 pte_unmap(vmf->pte); in handle_pte_fault()
6268 vmf->pte = NULL; in handle_pte_fault()
6272 if (!vmf->pte) in handle_pte_fault()
6275 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
6278 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
6281 spin_lock(vmf->ptl); in handle_pte_fault()
6282 entry = vmf->orig_pte; in handle_pte_fault()
6283 if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) { in handle_pte_fault()
6284 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
6287 if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { in handle_pte_fault()
6288 if (!pte_write(entry)) in handle_pte_fault()
6290 else if (likely(vmf->flags & FAULT_FLAG_WRITE)) in handle_pte_fault()
6291 entry = pte_mkdirty(entry); in handle_pte_fault()
6293 entry = pte_mkyoung(entry); in handle_pte_fault()
6294 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
6295 vmf->flags & FAULT_FLAG_WRITE)) in handle_pte_fault()
6296 update_mmu_cache_range(vmf, vmf->vma, vmf->address, in handle_pte_fault()
6297 vmf->pte, 1); in handle_pte_fault()
6301 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
6306 * On entry, we hold either the VMA lock or the mmap_lock
6312 unsigned long address, unsigned int flags) in __handle_mm_fault() argument
6316 .address = address & PAGE_MASK, in __handle_mm_fault()
6317 .real_address = address, in __handle_mm_fault()
6319 .pgoff = linear_page_index(vma, address), in __handle_mm_fault()
6322 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
6323 vm_flags_t vm_flags = vma->vm_flags; in __handle_mm_fault()
6328 pgd = pgd_offset(mm, address); in __handle_mm_fault()
6329 p4d = p4d_alloc(mm, pgd, address); in __handle_mm_fault()
6333 vmf.pud = pud_alloc(mm, p4d, address); in __handle_mm_fault()
6363 vmf.pmd = pmd_alloc(mm, vmf.pud, address); in __handle_mm_fault()
6415 * mm_account_fault - Do page fault accounting
6418 * of perf event counters, but we'll still do the per-task accounting to
6420 * @address: the faulted address.
6427 * still be in per-arch page fault handlers at the entry of page fault.
6430 unsigned long address, unsigned int flags, in mm_account_fault() argument
6448 * Do not account for unsuccessful faults (e.g. when the address wasn't in mm_account_fault()
6464 current->maj_flt++; in mm_account_fault()
6466 current->min_flt++; in mm_account_fault()
6477 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); in mm_account_fault()
6479 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); in mm_account_fault()
6486 current->in_lru_fault = vma_has_recency(vma); in lru_gen_enter_fault()
6491 current->in_lru_fault = false; in lru_gen_exit_fault()
6511 * just treat it like an ordinary read-fault otherwise. in sanitize_fault_flags()
6513 if (!is_cow_mapping(vma->vm_flags)) in sanitize_fault_flags()
6516 /* Write faults on read-only mappings are impossible ... */ in sanitize_fault_flags()
6517 if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE))) in sanitize_fault_flags()
6520 if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) && in sanitize_fault_flags()
6521 !is_cow_mapping(vma->vm_flags))) in sanitize_fault_flags()
6526 * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of in sanitize_fault_flags()
6545 vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, in handle_mm_fault() argument
6549 struct mm_struct *mm = vma->vm_mm; in handle_mm_fault()
6566 is_droppable = !!(vma->vm_flags & VM_DROPPABLE); in handle_mm_fault()
6578 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
6580 ret = __handle_mm_fault(vma, address, flags); in handle_mm_fault()
6583 * Warning: It is no longer safe to dereference vma-> after this point, in handle_mm_fault()
6606 mm_account_fault(mm, regs, address, flags, ret); in handle_mm_fault()
6615 * We've already handled the fast-path in-line.
6617 int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) in __p4d_alloc() argument
6619 p4d_t *new = p4d_alloc_one(mm, address); in __p4d_alloc()
6621 return -ENOMEM; in __p4d_alloc()
6623 spin_lock(&mm->page_table_lock); in __p4d_alloc()
6630 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
6638 * We've already handled the fast-path in-line.
6640 int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) in __pud_alloc() argument
6642 pud_t *new = pud_alloc_one(mm, address); in __pud_alloc()
6644 return -ENOMEM; in __pud_alloc()
6646 spin_lock(&mm->page_table_lock); in __pud_alloc()
6653 spin_unlock(&mm->page_table_lock); in __pud_alloc()
6661 * We've already handled the fast-path in-line.
6663 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) in __pmd_alloc() argument
6666 pmd_t *new = pmd_alloc_one(mm, address); in __pmd_alloc()
6668 return -ENOMEM; in __pmd_alloc()
6689 args->lock = lock; in pfnmap_args_setup()
6690 args->ptep = ptep; in pfnmap_args_setup()
6691 args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); in pfnmap_args_setup()
6692 args->addr_mask = addr_mask; in pfnmap_args_setup()
6693 args->pgprot = pgprot; in pfnmap_args_setup()
6694 args->writable = writable; in pfnmap_args_setup()
6695 args->special = special; in pfnmap_args_setup()
6701 struct file *file = vma->vm_file; in pfnmap_lockdep_assert()
6702 struct address_space *mapping = file ? file->f_mapping : NULL; in pfnmap_lockdep_assert()
6705 lockdep_assert(lockdep_is_held(&mapping->i_mmap_rwsem) || in pfnmap_lockdep_assert()
6706 lockdep_is_held(&vma->vm_mm->mmap_lock)); in pfnmap_lockdep_assert()
6708 lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock)); in pfnmap_lockdep_assert()
6713 * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
6716 * The caller needs to setup args->vma and args->address to point to the
6717 * virtual address as the target of such lookup. On a successful return,
6733 * a later point in time can trigger use-after-free.
6745 struct vm_area_struct *vma = args->vma; in follow_pfnmap_start()
6746 unsigned long address = args->address; in follow_pfnmap_start() local
6747 struct mm_struct *mm = vma->vm_mm; in follow_pfnmap_start()
6757 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) in follow_pfnmap_start()
6760 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfnmap_start()
6763 pgdp = pgd_offset(mm, address); in follow_pfnmap_start()
6767 p4dp = p4d_offset(pgdp, address); in follow_pfnmap_start()
6772 pudp = pud_offset(p4dp, address); in follow_pfnmap_start()
6788 pmdp = pmd_offset(pudp, address); in follow_pfnmap_start()
6802 ptep = pte_offset_map_lock(mm, pmdp, address, &lock); in follow_pfnmap_start()
6815 return -EINVAL; in follow_pfnmap_start()
6828 if (args->lock) in follow_pfnmap_end()
6829 spin_unlock(args->lock); in follow_pfnmap_end()
6830 if (args->ptep) in follow_pfnmap_end()
6831 pte_unmap(args->ptep); in follow_pfnmap_end()
6837 * generic_access_phys - generic implementation for iomem mmap access
6839 * @addr: userspace address, not relative offset within @vma
6855 int ret = -EINVAL; in generic_access_phys()
6857 struct follow_pfnmap_args args = { .vma = vma, .address = addr }; in generic_access_phys()
6861 return -EINVAL; in generic_access_phys()
6868 return -EINVAL; in generic_access_phys()
6872 return -ENOMEM; in generic_access_phys()
6900 * Access another process' address space as given in mm.
6911 /* Untag the address before looking up the VMA */ in __access_remote_vm()
6935 return buf - old_buf; in __access_remote_vm()
6947 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
6948 bytes = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
6956 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
6957 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
6958 bytes = PAGE_SIZE-offset; in __access_remote_vm()
6971 len -= bytes; in __access_remote_vm()
6977 return buf - old_buf; in __access_remote_vm()
6981 * access_remote_vm - access another process' address space
6982 * @mm: the mm_struct of the target address space
6983 * @addr: start address to access
6999 * Access another process' address space.
7023 * Copy a string from another process's address space as given in mm.
7024 * If there is any error return -EFAULT.
7035 return -EFAULT; in __copy_remote_vm_str()
7041 err = -EFAULT; in __copy_remote_vm_str()
7060 err = -EFAULT; in __copy_remote_vm_str()
7066 offset = addr & (PAGE_SIZE - 1); in __copy_remote_vm_str()
7067 if (bytes > PAGE_SIZE - offset) in __copy_remote_vm_str()
7068 bytes = PAGE_SIZE - offset; in __copy_remote_vm_str()
7079 buf += bytes - 1; in __copy_remote_vm_str()
7086 addr += bytes - 1; in __copy_remote_vm_str()
7087 copy_from_user_page(vma, page, addr, buf, maddr + (PAGE_SIZE - 1), 1); in __copy_remote_vm_str()
7091 len -= bytes; in __copy_remote_vm_str()
7100 return buf - old_buf; in __copy_remote_vm_str()
7104 * copy_remote_vm_str - copy a string from another process's address space.
7105 * @tsk: the task of the target address space
7106 * @addr: start address to read from
7114 * not including the trailing NUL. Always guaranteed to leave NUL-terminated
7115 * buffer. On any error, return -EFAULT.
7129 return -EFAULT; in copy_remote_vm_str()
7146 struct mm_struct *mm = current->mm; in print_vma_addr()
7156 if (vma && vma->vm_file) { in print_vma_addr()
7157 struct file *f = vma->vm_file; in print_vma_addr()
7158 ip -= vma->vm_start; in print_vma_addr()
7159 ip += vma->vm_pgoff << PAGE_SHIFT; in print_vma_addr()
7161 vma->vm_start, in print_vma_addr()
7162 vma->vm_end - vma->vm_start); in print_vma_addr()
7173 if (current->mm) in __might_fault()
7174 might_lock_read(¤t->mm->mmap_lock); in __might_fault()
7192 ~(((unsigned long)nr_pages << PAGE_SHIFT) - 1); in process_huge_page()
7196 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
7202 for (i = nr_pages - 1; i >= 2 * n; i--) { in process_huge_page()
7210 base = nr_pages - 2 * (nr_pages - n); in process_huge_page()
7211 l = nr_pages - n; in process_huge_page()
7221 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
7226 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
7262 * folio_zero_user - Zero a folio which will be mapped to userspace.
7264 * @addr_hint: The address will be accessed or the base address if uncelar.
7293 return -EHWPOISON; in copy_user_gigantic_page()
7307 struct page *dst = folio_page(copy_arg->dst, idx); in copy_subpage()
7308 struct page *src = folio_page(copy_arg->src, idx); in copy_subpage()
7310 if (copy_mc_user_highpage(dst, src, addr, copy_arg->vma)) in copy_subpage()
7311 return -EHWPOISON; in copy_subpage()
7351 ret_val -= (PAGE_SIZE - rc); in copy_folio_from_user()
7369 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
7380 ptdesc->ptl = ptl; in ptlock_alloc()
7386 if (ptdesc->ptl) in ptlock_free()
7387 kmem_cache_free(page_ptl_cachep, ptdesc->ptl); in ptlock_free()