memory.c - OpenGrok cross reference for /linux/mm/memory.c

Lines Matching +full:entry +full:- +full:address
1 // SPDX-License-Identifier: GPL-2.0-only
9  * demand-loading started 01.12.91 - seems it is high on the list of
10  * things wanted, and it should be easy to implement. - Linus
14  * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15  * pages started 02.12.91, seems to work. - Linus.
21  * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27  * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
29  * 20.12.91  -  Ok, making the swap-device changeable like the root.
33  * 05.04.94  -  Multi-page memory management added for v1.1.
36  * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
69 #include <linux/memory-tiers.h>
89 #include "pgalloc-track.h"
94 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
102  * Return true if the original pte was a uffd-wp pte marker (so the pte was
103  * wr-protected).
107 	if (!userfaultfd_wp(vmf->vma))  in vmf_orig_pte_uffd_wp()
109 	if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))  in vmf_orig_pte_uffd_wp()
112 	return pte_is_uffd_wp_marker(vmf->orig_pte);  in vmf_orig_pte_uffd_wp()
116  * Randomize the address space (stacks, mmaps, brk, etc.).
195 	mm_dec_nr_ptes(tlb->mm);  in free_pte_range()
223 	if (end - 1 > ceiling - 1)  in free_pmd_range()
229 	mm_dec_nr_pmds(tlb->mm);  in free_pmd_range()
257 	if (end - 1 > ceiling - 1)  in free_pud_range()
263 	mm_dec_nr_puds(tlb->mm);  in free_pud_range()
291 	if (end - 1 > ceiling - 1)  in free_p4d_range()
300  * free_pgd_range - Unmap and free page tables in the range
302  * @addr: virtual address start
303  * @end: virtual address end
304  * @floor: lowest address boundary
305  * @ceiling: highest address boundary
307  * This function tears down all user-level page tables in the
308  * specified virtual address range [@addr..@end). It is part of
325 	 * Why all these "- 1"s?  Because 0 represents both the bottom  in free_pgd_range()
326 	 * of the address space and the top of it (using -1 for the  in free_pgd_range()
329 	 * the address space, but end 0 and ceiling 0 refer to the top  in free_pgd_range()
330 	 * Comparisons need to use "end - 1" and "ceiling - 1" (though  in free_pgd_range()
341 	 * bother to round floor or end up - the tests don't need that.  in free_pgd_range()
355 	if (end - 1 > ceiling - 1)  in free_pgd_range()
356 		end -= PMD_SIZE;  in free_pgd_range()
357 	if (addr > end - 1)  in free_pgd_range()
364 	pgd = pgd_offset(tlb->mm, addr);  in free_pgd_range()
382 		unsigned long addr = vma->vm_start;  in free_pgtables()
389 		next = mas_find(mas, ceiling - 1);  in free_pgtables()
407 		while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {  in free_pgtables()
409 			next = mas_find(mas, ceiling - 1);  in free_pgtables()
419 		free_pgd_range(tlb, addr, vma->vm_end,  in free_pgtables()
420 			floor, next ? next->vm_start : ceiling);  in free_pgtables()
439 		 * of a chain of data-dependent loads, meaning most CPUs (alpha  in pmd_install()
441 		 * seen in-order. See the alpha page table accessors for the  in pmd_install()
455 		return -ENOMEM;  in __pte_alloc()
467 		return -ENOMEM;  in __pte_alloc_kernel()
575  * This function is called to print an error when a bad page table entry (e.g.,
576  * corrupted page table entry) is found. For example, we might have a
577  * PFN-mapped pte in a region that doesn't allow it.
582  * re-walk the page table to dump information: the caller MUST prevent page
587 		unsigned long addr, unsigned long long entry, struct page *page,  in print_bad_page_map()  argument
596 	mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;  in print_bad_page_map()
599 	pr_alert("BUG: Bad page map in process %s  %s:%08llx", current->comm,  in print_bad_page_map()
600 		 pgtable_level_to_str(level), entry);  in print_bad_page_map()
601 	__print_bad_page_map_pgtable(vma->vm_mm, addr);  in print_bad_page_map()
605 		 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);  in print_bad_page_map()
607 		 vma->vm_file,  in print_bad_page_map()
608 		 vma->vm_ops ? vma->vm_ops->fault : NULL,  in print_bad_page_map()
609 		 vma->vm_file ? vma->vm_file->f_op->mmap : NULL,  in print_bad_page_map()
610 		 vma->vm_file ? vma->vm_file->f_op->mmap_prepare : NULL,  in print_bad_page_map()
611 		 mapping ? mapping->a_ops->read_folio : NULL);  in print_bad_page_map()
619  * __vm_normal_page() - Get the "struct page" associated with a page table entry.
620  * @vma: The VMA mapping the page table entry.
621  * @addr: The address where the page table entry is mapped.
622  * @pfn: The PFN stored in the page table entry.
623  * @special: Whether the page table entry is marked "special".
625  * @entry: The page table entry value for error reporting purposes only.
639  * page table entry bit, such as pte_special(), in which case this function is
641  * entry bit, which requires a more complicated scheme, described below.
645  * cannot be looked up through the PFN stored in the page table entry, but
646  * instead will be looked up through vm_ops->find_normal_page(). So far, this
658  *	pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
662  * This restricts such mappings to be a linear translation from virtual address
685 		unsigned long long entry, enum pgtable_level level)  in __vm_normal_page()  argument
690 			if (vma->vm_ops && vma->vm_ops->find_normal_page)  in __vm_normal_page()
691 				return vma->vm_ops->find_normal_page(vma, addr);  in __vm_normal_page()
693 			if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))  in __vm_normal_page()
698 			print_bad_page_map(vma, addr, entry, NULL, level);  in __vm_normal_page()
706 		if (unlikely(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) {  in __vm_normal_page()
707 			if (vma->vm_flags & VM_MIXEDMAP) {  in __vm_normal_page()
712 				unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;  in __vm_normal_page()
715 				if (pfn == vma->vm_pgoff + off)  in __vm_normal_page()
717 				if (!is_cow_mapping(vma->vm_flags))  in __vm_normal_page()
727 		/* Corrupted page table entry. */  in __vm_normal_page()
728 		print_bad_page_map(vma, addr, entry, NULL, level);  in __vm_normal_page()
740  * vm_normal_page() - Get the "struct page" associated with a PTE
742  * @addr: The address where the @pte is mapped.
759  * vm_normal_folio() - Get the "struct folio" associated with a PTE
761  * @addr: The address where the @pte is mapped.
782  * vm_normal_page_pmd() - Get the "struct page" associated with a PMD
784  * @addr: The address where the @pmd is mapped.
801  * vm_normal_folio_pmd() - Get the "struct folio" associated with a PMD
803  * @addr: The address where the @pmd is mapped.
823  * vm_normal_page_pud() - Get the "struct page" associated with a PUD
825  * @addr: The address where the @pud is mapped.
843  * restore_exclusive_pte - Restore a device-exclusive entry
844  * @vma: VMA covering @address
847  * @address: the virtual address
851  * Restore a device-exclusive non-swap entry to an ordinary present pte.
857  * a device-exclusive entry can map it into the device to make forward
869 		struct folio *folio, struct page *page, unsigned long address,  in restore_exclusive_pte()  argument
876 	pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));  in restore_exclusive_pte()
883 	if ((vma->vm_flags & VM_WRITE) &&  in restore_exclusive_pte()
884 	    can_change_pte_writable(vma, address, pte)) {  in restore_exclusive_pte()
889 	set_pte_at(vma->vm_mm, address, ptep, pte);  in restore_exclusive_pte()
892 	 * No need to invalidate - it was non-present before. However  in restore_exclusive_pte()
895 	update_mmu_cache(vma, address, ptep);  in restore_exclusive_pte()
905 	const softleaf_t entry = softleaf_from_pte(orig_pte);  in try_restore_exclusive_pte()  local
906 	struct page *page = softleaf_to_page(entry);  in try_restore_exclusive_pte()
915 	return -EBUSY;  in try_restore_exclusive_pte()
929 	vm_flags_t vm_flags = dst_vma->vm_flags;  in copy_nonpresent_pte()
931 	softleaf_t entry = softleaf_from_pte(orig_pte);  in copy_nonpresent_pte()  local
936 	if (likely(softleaf_is_swap(entry))) {  in copy_nonpresent_pte()
937 		if (swap_duplicate(entry) < 0)  in copy_nonpresent_pte()
938 			return -EIO;  in copy_nonpresent_pte()
941 		if (unlikely(list_empty(&dst_mm->mmlist))) {  in copy_nonpresent_pte()
943 			if (list_empty(&dst_mm->mmlist))  in copy_nonpresent_pte()
944 				list_add(&dst_mm->mmlist,  in copy_nonpresent_pte()
945 						&src_mm->mmlist);  in copy_nonpresent_pte()
948 		/* Mark the swap entry as shared. */  in copy_nonpresent_pte()
954 	} else if (softleaf_is_migration(entry)) {  in copy_nonpresent_pte()
955 		folio = softleaf_to_folio(entry);  in copy_nonpresent_pte()
959 		if (!softleaf_is_migration_read(entry) &&  in copy_nonpresent_pte()
963 			 * to be set to read. A previously exclusive entry is  in copy_nonpresent_pte()
966 			entry = make_readable_migration_entry(  in copy_nonpresent_pte()
967 							swp_offset(entry));  in copy_nonpresent_pte()
968 			pte = softleaf_to_pte(entry);  in copy_nonpresent_pte()
975 	} else if (softleaf_is_device_private(entry)) {  in copy_nonpresent_pte()
976 		page = softleaf_to_page(entry);  in copy_nonpresent_pte()
994 		 * We do not preserve soft-dirty information, because so  in copy_nonpresent_pte()
1000 		if (softleaf_is_device_private_write(entry) &&  in copy_nonpresent_pte()
1002 			entry = make_readable_device_private_entry(  in copy_nonpresent_pte()
1003 							swp_offset(entry));  in copy_nonpresent_pte()
1004 			pte = swp_entry_to_pte(entry);  in copy_nonpresent_pte()
1009 	} else if (softleaf_is_device_exclusive(entry)) {  in copy_nonpresent_pte()
1012 		 * original entry then copying as for a present pte. Device  in copy_nonpresent_pte()
1016 		VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags));  in copy_nonpresent_pte()
1018 			return -EBUSY;  in copy_nonpresent_pte()
1019 		return -ENOENT;  in copy_nonpresent_pte()
1020 	} else if (softleaf_is_marker(entry)) {  in copy_nonpresent_pte()
1021 		pte_marker marker = copy_pte_marker(entry, dst_vma);  in copy_nonpresent_pte()
1039  * and re-use the pte the traditional way.
1041  * And if we need a pre-allocated page but don't yet have
1056 		return -EAGAIN;  in copy_present_page()
1063 	if (copy_mc_user_highpage(&new_folio->page, page, addr, src_vma))  in copy_present_page()
1064 		return -EHWPOISON;  in copy_present_page()
1073 	pte = folio_mk_pte(new_folio, dst_vma->vm_page_prot);  in copy_present_page()
1076 		/* Uffd-wp needs to be delivered to dest pte as well */  in copy_present_page()
1078 	set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);  in copy_present_page()
1086 	struct mm_struct *src_mm = src_vma->vm_mm;  in __copy_present_ptes()
1089 	if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) {  in __copy_present_ptes()
1095 	if (src_vma->vm_flags & VM_SHARED)  in __copy_present_ptes()
1102 	set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr);  in __copy_present_ptes()
1106  * Copy one present PTE, trying to batch-process subsequent PTEs that map
1109  * Returns -EAGAIN if one preallocated page is required to copy the next PTE.
1134 		if (!(src_vma->vm_flags & VM_SHARED))  in copy_present_ptes()
1145 				return -EAGAIN;  in copy_present_ptes()
1212 	struct mm_struct *dst_mm = dst_vma->vm_mm;  in copy_pte_range()
1213 	struct mm_struct *src_mm = src_vma->vm_mm;  in copy_pte_range()
1221 	softleaf_t entry = softleaf_mk_none();  in copy_pte_range()  local
1233 	 * protected by mmap_lock-less collapse skipping areas with anon_vma  in copy_pte_range()
1239 		ret = -ENOMEM;  in copy_pte_range()
1245 	 * retract_page_tables() are using vma->anon_vma to be exclusive, so  in copy_pte_range()
1265 		 * We are holding two locks at this point - either of them  in copy_pte_range()
1284 			if (ret == -EIO) {  in copy_pte_range()
1285 				entry = softleaf_from_pte(ptep_get(src_pte));  in copy_pte_range()
1287 			} else if (ret == -EBUSY) {  in copy_pte_range()
1297 			 * Device exclusive entry restored, continue by copying  in copy_pte_range()
1300 			WARN_ON_ONCE(ret != -ENOENT);  in copy_pte_range()
1303 		max_nr = (end - addr) / PAGE_SIZE;  in copy_pte_range()
1307 		 * If we need a pre-allocated page for this pte, drop the  in copy_pte_range()
1311 		if (unlikely(ret == -EAGAIN || ret == -EHWPOISON))  in copy_pte_range()
1315 			 * pre-alloc page cannot be reused by next time so as  in copy_pte_range()
1317 			 * will allocate page according to address).  This  in copy_pte_range()
1334 	if (ret == -EIO) {  in copy_pte_range()
1335 		VM_WARN_ON_ONCE(!entry.val);  in copy_pte_range()
1336 		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) {  in copy_pte_range()
1337 			ret = -ENOMEM;  in copy_pte_range()
1340 		entry.val = 0;  in copy_pte_range()
1341 	} else if (ret == -EBUSY || unlikely(ret == -EHWPOISON)) {  in copy_pte_range()
1343 	} else if (ret ==  -EAGAIN) {  in copy_pte_range()
1346 			return -ENOMEM;  in copy_pte_range()
1367 	struct mm_struct *dst_mm = dst_vma->vm_mm;  in copy_pmd_range()
1368 	struct mm_struct *src_mm = src_vma->vm_mm;  in copy_pmd_range()
1374 		return -ENOMEM;  in copy_pmd_range()
1381 			VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma);  in copy_pmd_range()
1384 			if (err == -ENOMEM)  in copy_pmd_range()
1385 				return -ENOMEM;  in copy_pmd_range()
1394 			return -ENOMEM;  in copy_pmd_range()
1404 	struct mm_struct *dst_mm = dst_vma->vm_mm;  in copy_pud_range()
1405 	struct mm_struct *src_mm = src_vma->vm_mm;  in copy_pud_range()
1411 		return -ENOMEM;  in copy_pud_range()
1418 			VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma);  in copy_pud_range()
1421 			if (err == -ENOMEM)  in copy_pud_range()
1422 				return -ENOMEM;  in copy_pud_range()
1431 			return -ENOMEM;  in copy_pud_range()
1441 	struct mm_struct *dst_mm = dst_vma->vm_mm;  in copy_p4d_range()
1447 		return -ENOMEM;  in copy_p4d_range()
1455 			return -ENOMEM;  in copy_p4d_range()
1468 	if (src_vma->vm_flags & VM_COPY_ON_FORK)  in vma_needs_copy()
1474 	if (src_vma->anon_vma)  in vma_needs_copy()
1490 	unsigned long addr = src_vma->vm_start;  in copy_page_range()
1491 	unsigned long end = src_vma->vm_end;  in copy_page_range()
1492 	struct mm_struct *dst_mm = dst_vma->vm_mm;  in copy_page_range()
1493 	struct mm_struct *src_mm = src_vma->vm_mm;  in copy_page_range()
1511 	is_cow = is_cow_mapping(src_vma->vm_flags);  in copy_page_range()
1525 		raw_write_seqcount_begin(&src_mm->write_protect_seq);  in copy_page_range()
1537 			ret = -ENOMEM;  in copy_page_range()
1543 		raw_write_seqcount_end(&src_mm->write_protect_seq);  in copy_page_range()
1553 	if (!details || details->reclaim_pt)  in should_zap_cows()
1557 	return details->even_cows;  in should_zap_cows()
1568 	/* Otherwise we should only zap non-anon folios */  in should_zap_folio()
1577 	return details->zap_flags & ZAP_FLAG_DROP_MARKER;  in zap_drop_markers()
1581  * This function makes sure that we'll replace the none pte with an uffd-wp
1584  * Returns true if uffd-wp ptes was installed, false otherwise.
1607 		if (--nr == 0)  in zap_install_uffd_wp_if_needed()
1622 	struct mm_struct *mm = tlb->mm;  in zap_present_folio_ptes()
1626 		ptent = get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm);  in zap_present_folio_ptes()
1636 		rss[mm_counter(folio)] -= nr;  in zap_present_folio_ptes()
1638 		/* We don't need up-to-date accessed/dirty bits. */  in zap_present_folio_ptes()
1639 		clear_full_ptes(mm, addr, pte, nr, tlb->fullmm);  in zap_present_folio_ptes()
1640 		rss[MM_ANONPAGES] -= nr;  in zap_present_folio_ptes()
1662  * Zap or skip at least one present PTE, trying to batch-process subsequent
1673 	struct mm_struct *mm = tlb->mm;  in zap_present_ptes()
1680 		/* We don't need up-to-date accessed/dirty bits. */  in zap_present_ptes()
1681 		ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);  in zap_present_ptes()
1718 	softleaf_t entry;  in zap_nonpresent_ptes()  local
1722 	entry = softleaf_from_pte(ptent);  in zap_nonpresent_ptes()
1723 	if (softleaf_is_device_private(entry) ||  in zap_nonpresent_ptes()
1724 	    softleaf_is_device_exclusive(entry)) {  in zap_nonpresent_ptes()
1725 		struct page *page = softleaf_to_page(entry);  in zap_nonpresent_ptes()
1733 		 * consider uffd-wp bit when zap. For more information,  in zap_nonpresent_ptes()
1737 		rss[mm_counter(folio)]--;  in zap_nonpresent_ptes()
1740 	} else if (softleaf_is_swap(entry)) {  in zap_nonpresent_ptes()
1746 		rss[MM_SWAPENTS] -= nr;  in zap_nonpresent_ptes()
1747 		free_swap_and_cache_nr(entry, nr);  in zap_nonpresent_ptes()
1748 	} else if (softleaf_is_migration(entry)) {  in zap_nonpresent_ptes()
1749 		struct folio *folio = softleaf_to_folio(entry);  in zap_nonpresent_ptes()
1753 		rss[mm_counter(folio)]--;  in zap_nonpresent_ptes()
1754 	} else if (softleaf_is_uffd_wp_marker(entry)) {  in zap_nonpresent_ptes()
1761 	} else if (softleaf_is_guard_marker(entry)) {  in zap_nonpresent_ptes()
1769 	} else if (softleaf_is_hwpoison(entry) ||  in zap_nonpresent_ptes()
1770 		   softleaf_is_poison_marker(entry)) {  in zap_nonpresent_ptes()
1774 		/* We should have covered all the swap entry types */  in zap_nonpresent_ptes()
1775 		pr_alert("unrecognized swap entry 0x%lx\n", entry.val);  in zap_nonpresent_ptes()
1778 	clear_not_present_full_ptes(vma->vm_mm, addr, pte, nr, tlb->fullmm);  in zap_nonpresent_ptes()
1792 	int max_nr = (end - addr) / PAGE_SIZE;  in do_zap_pte_range()
1802 		max_nr -= nr;  in do_zap_pte_range()
1826 	struct mm_struct *mm = tlb->mm;  in zap_pte_range()
1924 			if (next - addr != HPAGE_PMD_SIZE)  in zap_pmd_range()
1931 		} else if (details && details->single_folio &&  in zap_pmd_range()
1932 			   folio_test_pmd_mappable(details->single_folio) &&  in zap_pmd_range()
1933 			   next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {  in zap_pmd_range()
1934 			spinlock_t *ptl = pmd_lock(tlb->mm, pmd);  in zap_pmd_range()
1948 			pmd--;  in zap_pmd_range()
1966 			if (next - addr != HPAGE_PUD_SIZE) {  in zap_pud_range()
1967 				mmap_assert_locked(tlb->mm);  in zap_pud_range()
2012 	pgd = pgd_offset(vma->vm_mm, addr);  in unmap_page_range()
2027 	unsigned long start = max(vma->vm_start, start_addr);  in unmap_single_vma()
2030 	if (start >= vma->vm_end)  in unmap_single_vma()
2032 	end = min(vma->vm_end, end_addr);  in unmap_single_vma()
2033 	if (end <= vma->vm_start)  in unmap_single_vma()
2036 	if (vma->vm_file)  in unmap_single_vma()
2042 			 * It is undesirable to test vma->vm_file as it  in unmap_single_vma()
2043 			 * should be non-null for valid hugetlb area.  in unmap_single_vma()
2046 			 * hugetlbfs ->mmap method fails,  in unmap_single_vma()
2047 			 * mmap_region() nullifies vma->vm_file  in unmap_single_vma()
2052 			if (vma->vm_file) {  in unmap_single_vma()
2054 				    details->zap_flags : 0;  in unmap_single_vma()
2064  * unmap_vmas - unmap a range of memory covered by a list of vma's
2065  * @tlb: address of the caller's struct mmu_gather
2068  * @start_addr: virtual address at which to start unmapping
2069  * @end_addr: virtual address at which to end unmapping
2076  * The VMA list must be sorted in ascending virtual address order.
2078  * unmap_vmas() assumes that the caller will flush the whole unmapped address
2080  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
2090 		/* Careful - we need to zap private pages too! */  in unmap_vmas()
2094 	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,  in unmap_vmas()
2103 		vma = mas_find(mas, tree_end - 1);  in unmap_vmas()
2109  * zap_page_range_single_batched - remove user pages in a given range
2112  * @address: starting address of pages to remove
2117  * hugetlb, @tlb is flushed and re-initialized by this function.
2120 		struct vm_area_struct *vma, unsigned long address,  in zap_page_range_single_batched()  argument
2123 	const unsigned long end = address + size;  in zap_page_range_single_batched()
2126 	VM_WARN_ON_ONCE(!tlb || tlb->mm != vma->vm_mm);  in zap_page_range_single_batched()
2128 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,  in zap_page_range_single_batched()
2129 				address, end);  in zap_page_range_single_batched()
2131 	update_hiwater_rss(vma->vm_mm);  in zap_page_range_single_batched()
2134 	 * unmap 'address-end' not 'range.start-range.end' as range  in zap_page_range_single_batched()
2137 	unmap_single_vma(tlb, vma, address, end, details);  in zap_page_range_single_batched()
2146 		tlb_gather_mmu(tlb, vma->vm_mm);  in zap_page_range_single_batched()
2151  * zap_page_range_single - remove user pages in a given range
2153  * @address: starting address of pages to zap
2159 void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,  in zap_page_range_single()  argument
2164 	tlb_gather_mmu(&tlb, vma->vm_mm);  in zap_page_range_single()
2165 	zap_page_range_single_batched(&tlb, vma, address, size, details);  in zap_page_range_single()
2170  * zap_vma_ptes - remove ptes mapping the vma
2172  * @address: starting address of pages to zap
2177  * The entire address range must be fully contained within the vma.
2180 void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,  in zap_vma_ptes()  argument
2183 	if (!range_in_vma(vma, address, address + size) ||  in zap_vma_ptes()
2184 	    		!(vma->vm_flags & VM_PFNMAP))  in zap_vma_ptes()
2187 	zap_page_range_single(vma, address, size, NULL);  in zap_vma_ptes()
2225 	VM_WARN_ON_ONCE(vma->vm_flags & VM_PFNMAP);  in vm_mixed_zeropage_allowed()
2232 	if (mm_forbids_zeropage(vma->vm_mm))  in vm_mixed_zeropage_allowed()
2235 	if (is_cow_mapping(vma->vm_flags))  in vm_mixed_zeropage_allowed()
2238 	if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE)))  in vm_mixed_zeropage_allowed()
2241 	 * Why not allow any VMA that has vm_ops->pfn_mkwrite? GUP could  in vm_mixed_zeropage_allowed()
2242 	 * find the shared zeropage and longterm-pin it, which would  in vm_mixed_zeropage_allowed()
2244 	 * page due to vma->vm_ops->pfn_mkwrite, because what's mapped would  in vm_mixed_zeropage_allowed()
2249 	return vma->vm_ops && vma->vm_ops->pfn_mkwrite &&  in vm_mixed_zeropage_allowed()
2250 	       (vma_is_fsdax(vma) || vma->vm_flags & VM_IO);  in vm_mixed_zeropage_allowed()
2259 		return -EINVAL;  in validate_page_before_insert()
2262 			return -EINVAL;  in validate_page_before_insert()
2266 		return -EINVAL;  in validate_page_before_insert()
2280 			return -EBUSY;  in insert_page_into_pte_locked()
2285 			return -EFAULT;  in insert_page_into_pte_locked()
2305 		inc_mm_counter(vma->vm_mm, mm_counter_file(folio));  in insert_page_into_pte_locked()
2308 	set_pte_at(vma->vm_mm, addr, pte, pteval);  in insert_page_into_pte_locked()
2322 	retval = -ENOMEM;  in insert_page()
2323 	pte = get_locked_pte(vma->vm_mm, addr, &ptl);  in insert_page()
2353 	struct mm_struct *const mm = vma->vm_mm;  in insert_pages()
2359 	ret = -EFAULT;  in insert_pages()
2365 		remaining_pages_total, PTRS_PER_PTE - pte_index(addr));  in insert_pages()
2368 	ret = -ENOMEM;  in insert_pages()
2378 			ret = -EFAULT;  in insert_pages()
2387 				remaining_pages_total -= pte_idx;  in insert_pages()
2394 		pages_to_write_in_pmd -= batch_size;  in insert_pages()
2395 		remaining_pages_total -= batch_size;  in insert_pages()
2406  * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
2408  * @addr: target start user address of these pages
2423 	const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;  in vm_insert_pages()
2425 	if (addr < vma->vm_start || end_addr >= vma->vm_end)  in vm_insert_pages()
2426 		return -EFAULT;  in vm_insert_pages()
2427 	if (!(vma->vm_flags & VM_MIXEDMAP)) {  in vm_insert_pages()
2428 		BUG_ON(mmap_read_trylock(vma->vm_mm));  in vm_insert_pages()
2429 		BUG_ON(vma->vm_flags & VM_PFNMAP);  in vm_insert_pages()
2433 	return insert_pages(vma, addr, pages, num, vma->vm_page_prot);  in vm_insert_pages()
2438  * vm_insert_page - insert single page into user vma
2440  * @addr: target user address of this page
2460  * Usually this function is called from f_op->mmap() handler
2461  * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
2463  * function from other places, for example from page-fault handler.
2470 	if (addr < vma->vm_start || addr >= vma->vm_end)  in vm_insert_page()
2471 		return -EFAULT;  in vm_insert_page()
2472 	if (!(vma->vm_flags & VM_MIXEDMAP)) {  in vm_insert_page()
2473 		BUG_ON(mmap_read_trylock(vma->vm_mm));  in vm_insert_page()
2474 		BUG_ON(vma->vm_flags & VM_PFNMAP);  in vm_insert_page()
2477 	return insert_page(vma, addr, page, vma->vm_page_prot, false);  in vm_insert_page()
2482  * __vm_map_pages - maps range of kernel pages into user vma
2498 	unsigned long uaddr = vma->vm_start;  in __vm_map_pages()
2503 		return -ENXIO;  in __vm_map_pages()
2506 	if (count > num - offset)  in __vm_map_pages()
2507 		return -ENXIO;  in __vm_map_pages()
2520  * vm_map_pages - maps range of kernel pages starts with non zero offset
2540 	return __vm_map_pages(vma, pages, num, vma->vm_pgoff);  in vm_map_pages()
2545  * vm_map_pages_zero - map range of kernel pages starts with zero offset
2567 	struct mm_struct *mm = vma->vm_mm;  in insert_pfn()
2568 	pte_t *pte, entry;  in insert_pfn()  local
2574 	entry = ptep_get(pte);  in insert_pfn()
2575 	if (!pte_none(entry)) {  in insert_pfn()
2587 			if (pte_pfn(entry) != pfn) {  in insert_pfn()
2588 				WARN_ON_ONCE(!is_zero_pfn(pte_pfn(entry)));  in insert_pfn()
2591 			entry = pte_mkyoung(entry);  in insert_pfn()
2592 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);  in insert_pfn()
2593 			if (ptep_set_access_flags(vma, addr, pte, entry, 1))  in insert_pfn()
2600 	entry = pte_mkspecial(pfn_pte(pfn, prot));  in insert_pfn()
2603 		entry = pte_mkyoung(entry);  in insert_pfn()
2604 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);  in insert_pfn()
2607 	set_pte_at(mm, addr, pte, entry);  in insert_pfn()
2616  * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2618  * @addr: target user address of this page
2623  * to override pgprot on a per-page basis.
2630  * pgprot typically only differs from @vma->vm_page_prot when drivers set
2631  * caching- and encryption bits different than those of @vma->vm_page_prot,
2632  * because the caching- or encryption mode may not be known at mmap() time.
2634  * This is ok as long as @vma->vm_page_prot is not used by the core vm
2637  * functions that don't touch caching- or encryption bits, using pte_modify()
2640  * Also when new page-table entries are created, this is only done using the
2641  * fault() callback, and never using the value of vma->vm_page_prot,
2642  * except for page-table entries that point to anonymous pages as the result
2657 	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));  in vmf_insert_pfn_prot()
2658 	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==  in vmf_insert_pfn_prot()
2660 	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));  in vmf_insert_pfn_prot()
2661 	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));  in vmf_insert_pfn_prot()
2663 	if (addr < vma->vm_start || addr >= vma->vm_end)  in vmf_insert_pfn_prot()
2676  * vmf_insert_pfn - insert single pfn into user vma
2678  * @addr: target user address of this page
2684  * This function should only be called from a vm_ops->fault handler, and
2698 	return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);  in vmf_insert_pfn()
2709 	if (vma->vm_flags & VM_MIXEDMAP)  in vm_mixed_ok()
2719 	pgprot_t pgprot = vma->vm_page_prot;  in __vm_insert_mixed()
2725 	if (addr < vma->vm_start || addr >= vma->vm_end)  in __vm_insert_mixed()
2754 	if (err == -ENOMEM)  in __vm_insert_mixed()
2756 	if (err < 0 && err != -EBUSY)  in __vm_insert_mixed()
2765 	pgprot_t pgprot = vmf->vma->vm_page_prot;  in vmf_insert_page_mkwrite()
2766 	unsigned long addr = vmf->address;  in vmf_insert_page_mkwrite()
2769 	if (addr < vmf->vma->vm_start || addr >= vmf->vma->vm_end)  in vmf_insert_page_mkwrite()
2772 	err = insert_page(vmf->vma, addr, page, pgprot, write);  in vmf_insert_page_mkwrite()
2773 	if (err == -ENOMEM)  in vmf_insert_page_mkwrite()
2775 	if (err < 0 && err != -EBUSY)  in vmf_insert_page_mkwrite()
2791  *  different entry in the mean time, we treat that as success as we assume
2792  *  the same entry was actually inserted.
2803  * in null mappings (currently treated as "copy-on-access")
2815 		return -ENOMEM;  in remap_pte_range()
2820 			err = -EACCES;  in remap_pte_range()
2839 	pfn -= addr >> PAGE_SHIFT;  in remap_pmd_range()
2842 		return -ENOMEM;  in remap_pmd_range()
2862 	pfn -= addr >> PAGE_SHIFT;  in remap_pud_range()
2865 		return -ENOMEM;  in remap_pud_range()
2884 	pfn -= addr >> PAGE_SHIFT;  in remap_p4d_range()
2887 		return -ENOMEM;  in remap_p4d_range()
2903 	 * There's a horrible special case to handle copy-on-write  in get_remap_pgoff()
2905 	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".  in get_remap_pgoff()
2910 			return -EINVAL;  in get_remap_pgoff()
2923 	struct mm_struct *mm = vma->vm_mm;  in remap_pfn_range_internal()
2927 		return -EINVAL;  in remap_pfn_range_internal()
2929 	VM_WARN_ON_ONCE((vma->vm_flags & VM_REMAP_FLAGS) != VM_REMAP_FLAGS);  in remap_pfn_range_internal()
2932 	pfn -= addr >> PAGE_SHIFT;  in remap_pfn_range_internal()
2948  * must have pre-validated the caching bits of the pgprot_t.
2974 		return ERR_PTR(-EINVAL);  in pfnmap_track_ctx_alloc()
2979 		return ERR_PTR(-ENOMEM);  in pfnmap_track_ctx_alloc()
2982 	ctx->pfn = pfn;  in pfnmap_track_ctx_alloc()
2983 	ctx->size = size;  in pfnmap_track_ctx_alloc()
2984 	kref_init(&ctx->kref);  in pfnmap_track_ctx_alloc()
2992 	pfnmap_untrack(ctx->pfn, ctx->size);  in pfnmap_track_ctx_release()
3013 	if (addr == vma->vm_start && addr + size == vma->vm_end) {  in remap_pfn_range_track()
3014 		if (vma->pfnmap_track_ctx)  in remap_pfn_range_track()
3015 			return -EINVAL;  in remap_pfn_range_track()
3020 		return -EINVAL;  in remap_pfn_range_track()
3026 			kref_put(&ctx->kref, pfnmap_track_ctx_release);  in remap_pfn_range_track()
3028 			vma->pfnmap_track_ctx = ctx;  in remap_pfn_range_track()
3053 	get_remap_pgoff(desc->vm_flags, desc->start, desc->end,  in remap_pfn_range_prepare()
3054 			desc->start, desc->end, pfn, &desc->pgoff);  in remap_pfn_range_prepare()
3055 	desc->vm_flags |= VM_REMAP_FLAGS;  in remap_pfn_range_prepare()
3064 	err = get_remap_pgoff(vma->vm_flags, addr, end,  in remap_pfn_range_prepare_vma()
3065 			      vma->vm_start, vma->vm_end,  in remap_pfn_range_prepare_vma()
3066 			      pfn, &vma->vm_pgoff);  in remap_pfn_range_prepare_vma()
3075  * remap_pfn_range - remap kernel memory to userspace
3077  * @addr: target page aligned user address to start at
3078  * @pfn: page frame number of kernel physical memory address
3106  * vm_iomap_memory - remap memory to userspace
3115  * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
3116  * whatever write-combining details or similar.
3126 		return -EINVAL;  in vm_iomap_memory()
3128 	 * You *really* shouldn't map things that aren't page-aligned,  in vm_iomap_memory()
3136 		return -EINVAL;  in vm_iomap_memory()
3139 	if (vma->vm_pgoff > pages)  in vm_iomap_memory()
3140 		return -EINVAL;  in vm_iomap_memory()
3141 	pfn += vma->vm_pgoff;  in vm_iomap_memory()
3142 	pages -= vma->vm_pgoff;  in vm_iomap_memory()
3145 	vm_len = vma->vm_end - vma->vm_start;  in vm_iomap_memory()
3147 		return -EINVAL;  in vm_iomap_memory()
3150 	return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);  in vm_iomap_memory()
3168 			return -ENOMEM;  in apply_to_pte_range()
3174 			return -EINVAL;  in apply_to_pte_range()
3211 			return -ENOMEM;  in apply_to_pmd_range()
3220 			return -EINVAL;  in apply_to_pmd_range()
3247 			return -ENOMEM;  in apply_to_pud_range()
3256 			return -EINVAL;  in apply_to_pud_range()
3283 			return -ENOMEM;  in apply_to_p4d_range()
3292 			return -EINVAL;  in apply_to_p4d_range()
3318 		return -EINVAL;  in __apply_to_page_range()
3326 			err = -EINVAL;  in __apply_to_page_range()
3371  * handle_pte_fault chooses page fault handler according to an entry which was
3372  * read non-atomically.  Before making any commitment, on those architectures
3383 		spin_lock(vmf->ptl);  in pte_unmap_same()
3384 		same = pte_same(ptep_get(vmf->pte), vmf->orig_pte);  in pte_unmap_same()
3385 		spin_unlock(vmf->ptl);  in pte_unmap_same()
3388 	pte_unmap(vmf->pte);  in pte_unmap_same()
3389 	vmf->pte = NULL;  in pte_unmap_same()
3396  *	-EHWPOISON:	copy failed due to hwpoison in source page
3397  *	-EAGAIN:	copied failed (some other reason)
3405 	struct vm_area_struct *vma = vmf->vma;  in __wp_page_copy_user()
3406 	struct mm_struct *mm = vma->vm_mm;  in __wp_page_copy_user()
3407 	unsigned long addr = vmf->address;  in __wp_page_copy_user()
3411 			return -EHWPOISON;  in __wp_page_copy_user()
3417 	 * a "struct page" for it. We do a best-effort copy by  in __wp_page_copy_user()
3418 	 * just copying from the original user address. If that  in __wp_page_copy_user()
3419 	 * fails, we just zero-fill it. Live with it.  in __wp_page_copy_user()
3429 	vmf->pte = NULL;  in __wp_page_copy_user()
3430 	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {  in __wp_page_copy_user()
3431 		pte_t entry;  in __wp_page_copy_user()  local
3433 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);  in __wp_page_copy_user()
3434 		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {  in __wp_page_copy_user()
3439 			if (vmf->pte)  in __wp_page_copy_user()
3440 				update_mmu_tlb(vma, addr, vmf->pte);  in __wp_page_copy_user()
3441 			ret = -EAGAIN;  in __wp_page_copy_user()
3445 		entry = pte_mkyoung(vmf->orig_pte);  in __wp_page_copy_user()
3446 		if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))  in __wp_page_copy_user()
3447 			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);  in __wp_page_copy_user()
3457 		if (vmf->pte)  in __wp_page_copy_user()
3460 		/* Re-validate under PTL if the page is still mapped */  in __wp_page_copy_user()
3461 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);  in __wp_page_copy_user()
3462 		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {  in __wp_page_copy_user()
3464 			if (vmf->pte)  in __wp_page_copy_user()
3465 				update_mmu_tlb(vma, addr, vmf->pte);  in __wp_page_copy_user()
3466 			ret = -EAGAIN;  in __wp_page_copy_user()
3477 			 * use-case  in __wp_page_copy_user()
3488 	if (vmf->pte)  in __wp_page_copy_user()
3489 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in __wp_page_copy_user()
3499 	struct file *vm_file = vma->vm_file;  in __get_fault_gfp_mask()
3502 		return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO;  in __get_fault_gfp_mask()
3512  * Notify the address space that the page is about to become writable so that
3520 	unsigned int old_flags = vmf->flags;  in do_page_mkwrite()
3522 	vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;  in do_page_mkwrite()
3524 	if (vmf->vma->vm_file &&  in do_page_mkwrite()
3525 	    IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host))  in do_page_mkwrite()
3528 	ret = vmf->vma->vm_ops->page_mkwrite(vmf);  in do_page_mkwrite()
3530 	vmf->flags = old_flags;  in do_page_mkwrite()
3535 		if (!folio->mapping) {  in do_page_mkwrite()
3552 	struct vm_area_struct *vma = vmf->vma;  in fault_dirty_shared_page()
3554 	struct folio *folio = page_folio(vmf->page);  in fault_dirty_shared_page()
3556 	bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite;  in fault_dirty_shared_page()
3561 	 * Take a local copy of the address_space - folio.mapping may be zeroed  in fault_dirty_shared_page()
3563 	 * pinned by vma->vm_file's reference.  We rely on folio_unlock()'s  in fault_dirty_shared_page()
3570 		file_update_time(vma->vm_file);  in fault_dirty_shared_page()
3601  * any related book-keeping.
3604 	__releases(vmf->ptl)  in wp_page_reuse()
3606 	struct vm_area_struct *vma = vmf->vma;  in wp_page_reuse()
3607 	pte_t entry;  in wp_page_reuse()  local
3609 	VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE));  in wp_page_reuse()
3610 	VM_WARN_ON(is_zero_pfn(pte_pfn(vmf->orig_pte)));  in wp_page_reuse()
3614 			  !PageAnonExclusive(vmf->page));  in wp_page_reuse()
3620 		folio_xchg_last_cpupid(folio, (1 << LAST_CPUPID_SHIFT) - 1);  in wp_page_reuse()
3623 	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));  in wp_page_reuse()
3624 	entry = pte_mkyoung(vmf->orig_pte);  in wp_page_reuse()
3625 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);  in wp_page_reuse()
3626 	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))  in wp_page_reuse()
3627 		update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);  in wp_page_reuse()
3628 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in wp_page_reuse()
3634  * vm_ops that have a ->map_pages have been audited and don't need
3639 	struct vm_area_struct *vma = vmf->vma;  in vmf_can_call_fault()
3641 	if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK))  in vmf_can_call_fault()
3648  * __vmf_anon_prepare - Prepare to handle an anonymous fault.
3654  * only protected by the per-VMA lock, the caller must retry with the
3657  * do with only the per-VMA lock held for this VMA.
3664 	struct vm_area_struct *vma = vmf->vma;  in __vmf_anon_prepare()
3667 	if (likely(vma->anon_vma))  in __vmf_anon_prepare()
3669 	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {  in __vmf_anon_prepare()
3670 		if (!mmap_read_trylock(vma->vm_mm))  in __vmf_anon_prepare()
3675 	if (vmf->flags & FAULT_FLAG_VMA_LOCK)  in __vmf_anon_prepare()
3676 		mmap_read_unlock(vma->vm_mm);  in __vmf_anon_prepare()
3689  * - Allocate a page, copy the content of the old page to the new one.
3690  * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
3691  * - Take the PTL. If the pte changed, bail out and release the allocated page
3692  * - If the pte is still the way we remember it, update the page table and all
3693  *   relevant references. This includes dropping the reference the page-table
3695  * - In any case, unlock the PTL and drop the reference we took to the old page.
3699 	const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;  in wp_page_copy()
3700 	struct vm_area_struct *vma = vmf->vma;  in wp_page_copy()
3701 	struct mm_struct *mm = vma->vm_mm;  in wp_page_copy()
3704 	pte_t entry;  in wp_page_copy()  local
3712 	if (vmf->page)  in wp_page_copy()
3713 		old_folio = page_folio(vmf->page);  in wp_page_copy()
3718 	pfn_is_zero = is_zero_pfn(pte_pfn(vmf->orig_pte));  in wp_page_copy()
3719 	new_folio = folio_prealloc(mm, vma, vmf->address, pfn_is_zero);  in wp_page_copy()
3726 		err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);  in wp_page_copy()
3730 			 * it's fine. If not, userspace would re-fault on  in wp_page_copy()
3731 			 * the same address and we will handle the fault  in wp_page_copy()
3733 			 * The -EHWPOISON case will not be retried.  in wp_page_copy()
3740 			return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0;  in wp_page_copy()
3742 		kmsan_copy_page_meta(&new_folio->page, vmf->page);  in wp_page_copy()
3748 				vmf->address & PAGE_MASK,  in wp_page_copy()
3749 				(vmf->address & PAGE_MASK) + PAGE_SIZE);  in wp_page_copy()
3753 	 * Re-check the pte - we dropped the lock  in wp_page_copy()
3755 	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);  in wp_page_copy()
3756 	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {  in wp_page_copy()
3763 			ksm_might_unmap_zero_page(mm, vmf->orig_pte);  in wp_page_copy()
3766 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));  in wp_page_copy()
3767 		entry = folio_mk_pte(new_folio, vma->vm_page_prot);  in wp_page_copy()
3768 		entry = pte_sw_mkyoung(entry);  in wp_page_copy()
3770 			if (pte_soft_dirty(vmf->orig_pte))  in wp_page_copy()
3771 				entry = pte_mksoft_dirty(entry);  in wp_page_copy()
3772 			if (pte_uffd_wp(vmf->orig_pte))  in wp_page_copy()
3773 				entry = pte_mkuffd_wp(entry);  in wp_page_copy()
3775 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);  in wp_page_copy()
3779 		 * Clear the pte entry and flush it first, before updating the  in wp_page_copy()
3780 		 * pte with the new entry, to keep TLBs on different CPUs in  in wp_page_copy()
3785 		ptep_clear_flush(vma, vmf->address, vmf->pte);  in wp_page_copy()
3786 		folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE);  in wp_page_copy()
3788 		BUG_ON(unshare && pte_write(entry));  in wp_page_copy()
3789 		set_pte_at(mm, vmf->address, vmf->pte, entry);  in wp_page_copy()
3790 		update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);  in wp_page_copy()
3814 			folio_remove_rmap_pte(old_folio, vmf->page, vma);  in wp_page_copy()
3820 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in wp_page_copy()
3821 	} else if (vmf->pte) {  in wp_page_copy()
3822 		update_mmu_tlb(vma, vmf->address, vmf->pte);  in wp_page_copy()
3823 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in wp_page_copy()
3849  * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3853  * @folio: the folio of vmf->page
3856  * shared mapping due to PTE being read-only once the mapped page is prepared.
3867 	WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));  in finish_mkwrite_fault()
3868 	vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,  in finish_mkwrite_fault()
3869 				       &vmf->ptl);  in finish_mkwrite_fault()
3870 	if (!vmf->pte)  in finish_mkwrite_fault()
3876 	if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) {  in finish_mkwrite_fault()
3877 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);  in finish_mkwrite_fault()
3878 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in finish_mkwrite_fault()
3891 	struct vm_area_struct *vma = vmf->vma;  in wp_pfn_shared()
3893 	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {  in wp_pfn_shared()
3896 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in wp_pfn_shared()
3901 		vmf->flags |= FAULT_FLAG_MKWRITE;  in wp_pfn_shared()
3902 		ret = vma->vm_ops->pfn_mkwrite(vmf);  in wp_pfn_shared()
3912 	__releases(vmf->ptl)  in wp_page_shared()
3914 	struct vm_area_struct *vma = vmf->vma;  in wp_page_shared()
3919 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {  in wp_page_shared()
3922 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in wp_page_shared()
3974 	if (test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids))  in __wp_can_reuse_large_anon_folio()
3997 	if (test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids))  in __wp_can_reuse_large_anon_folio()
4004 	VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id &&  in __wp_can_reuse_large_anon_folio()
4005 			folio_mm_id(folio, 1) != vma->vm_mm->mm_id);  in __wp_can_reuse_large_anon_folio()
4072  * It is done by copying the page to a new address and decrementing the
4073  * shared-page counter for the old page.
4076  * done by the caller (the low-level page fault routine in most cases).
4084  * We enter with non-exclusive mmap_lock (to exclude vma changes,
4089 	__releases(vmf->ptl)  in do_wp_page()
4091 	const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;  in do_wp_page()
4092 	struct vm_area_struct *vma = vmf->vma;  in do_wp_page()
4097 		if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) {  in do_wp_page()
4099 				pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_wp_page()
4105 			 * etc.) because we're only removing the uffd-wp bit,  in do_wp_page()
4108 			pte = pte_clear_uffd_wp(ptep_get(vmf->pte));  in do_wp_page()
4110 			set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);  in do_wp_page()
4115 			vmf->orig_pte = pte;  in do_wp_page()
4119 		 * Userfaultfd write-protect can defer flushes. Ensure the TLB  in do_wp_page()
4122 		if (unlikely(userfaultfd_wp(vmf->vma) &&  in do_wp_page()
4123 			     mm_tlb_flush_pending(vmf->vma->vm_mm)))  in do_wp_page()
4124 			flush_tlb_page(vmf->vma, vmf->address);  in do_wp_page()
4127 	vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);  in do_wp_page()
4129 	if (vmf->page)  in do_wp_page()
4130 		folio = page_folio(vmf->page);  in do_wp_page()
4136 	if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {  in do_wp_page()
4139 		 * VM_PFNMAP VMA. FS DAX also wants ops->pfn_mkwrite called.  in do_wp_page()
4142 		 * Just mark the pages writable and/or call ops->pfn_mkwrite.  in do_wp_page()
4144 		if (!vmf->page || is_fsdax_page(vmf->page)) {  in do_wp_page()
4145 			vmf->page = NULL;  in do_wp_page()
4159 	    (PageAnonExclusive(vmf->page) || wp_can_reuse_anon_folio(folio, vma))) {  in do_wp_page()
4160 		if (!PageAnonExclusive(vmf->page))  in do_wp_page()
4161 			SetPageAnonExclusive(vmf->page);  in do_wp_page()
4163 			pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_wp_page()
4175 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_wp_page()
4187 	zap_page_range_single(vma, start_addr, end_addr - start_addr, details);  in unmap_mapping_range_vma()
4199 		vba = vma->vm_pgoff;  in unmap_mapping_range_tree()
4200 		vea = vba + vma_pages(vma) - 1;  in unmap_mapping_range_tree()
4205 			((zba - vba) << PAGE_SHIFT) + vma->vm_start,  in unmap_mapping_range_tree()
4206 			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,  in unmap_mapping_range_tree()
4212  * unmap_mapping_folio() - Unmap single folio from processes.
4224 	struct address_space *mapping = folio->mapping;  in unmap_mapping_folio()
4231 	first_index = folio->index;  in unmap_mapping_folio()
4232 	last_index = folio_next_index(folio) - 1;  in unmap_mapping_folio()
4239 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))  in unmap_mapping_folio()
4240 		unmap_mapping_range_tree(&mapping->i_mmap, first_index,  in unmap_mapping_folio()
4246  * unmap_mapping_pages() - Unmap pages from processes.
4247  * @mapping: The address space containing pages to be unmapped.
4252  * Unmap the pages in this address space from any userspace process which
4262 	pgoff_t	last_index = start + nr - 1;  in unmap_mapping_pages()
4269 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))  in unmap_mapping_pages()
4270 		unmap_mapping_range_tree(&mapping->i_mmap, first_index,  in unmap_mapping_pages()
4277  * unmap_mapping_range - unmap the portion of all mmaps in the specified
4281  * @mapping: the address space containing mmaps to be unmapped.
4297 	pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT;  in unmap_mapping_range()
4302 			(holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;  in unmap_mapping_range()
4304 			hlen = ULONG_MAX - hba + 1;  in unmap_mapping_range()
4312  * Restore a potential device exclusive pte to a working pte entry
4316 	struct folio *folio = page_folio(vmf->page);  in remove_device_exclusive_entry()
4317 	struct vm_area_struct *vma = vmf->vma;  in remove_device_exclusive_entry()
4323 	 * the PTL so a racing thread can remove the device-exclusive  in remove_device_exclusive_entry()
4324 	 * entry and unmap it. If the folio is free the entry must  in remove_device_exclusive_entry()
4326 	 * been re-allocated after being freed all we do is lock and  in remove_device_exclusive_entry()
4338 				vma->vm_mm, vmf->address & PAGE_MASK,  in remove_device_exclusive_entry()
4339 				(vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);  in remove_device_exclusive_entry()
4342 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,  in remove_device_exclusive_entry()
4343 				&vmf->ptl);  in remove_device_exclusive_entry()
4344 	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte)))  in remove_device_exclusive_entry()
4345 		restore_exclusive_pte(vma, folio, vmf->page, vmf->address,  in remove_device_exclusive_entry()
4346 				      vmf->pte, vmf->orig_pte);  in remove_device_exclusive_entry()
4348 	if (vmf->pte)  in remove_device_exclusive_entry()
4349 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in remove_device_exclusive_entry()
4363 	if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) ||  in should_try_to_free_swap()
4378 	vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,  in pte_marker_clear()
4379 				       vmf->address, &vmf->ptl);  in pte_marker_clear()
4380 	if (!vmf->pte)  in pte_marker_clear()
4383 	 * Be careful so that we will only recover a special uffd-wp pte into a  in pte_marker_clear()
4390 	if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))  in pte_marker_clear()
4391 		pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);  in pte_marker_clear()
4392 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in pte_marker_clear()
4398 	if (vma_is_anonymous(vmf->vma))  in do_pte_missing()
4405  * This is actually a page-missing access, but with uffd-wp special pte
4406  * installed.  It means this pte was wr-protected before being unmapped.
4412 	 * got unregistered - we can simply clear them.  in pte_marker_handle_uffd_wp()
4414 	if (unlikely(!userfaultfd_wp(vmf->vma)))  in pte_marker_handle_uffd_wp()
4422 	const softleaf_t entry = softleaf_from_pte(vmf->orig_pte);  in handle_pte_marker()  local
4423 	const pte_marker marker = softleaf_to_marker(entry);  in handle_pte_marker()
4432 	/* Higher priority than uffd-wp when data corrupted */  in handle_pte_marker()
4440 	if (softleaf_is_uffd_wp_marker(entry))  in handle_pte_marker()
4449 	struct vm_area_struct *vma = vmf->vma;  in __alloc_swap_folio()
4451 	softleaf_t entry;  in __alloc_swap_folio()  local
4453 	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address);  in __alloc_swap_folio()
4457 	entry = softleaf_from_pte(vmf->orig_pte);  in __alloc_swap_folio()
4458 	if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,  in __alloc_swap_folio()
4459 					   GFP_KERNEL, entry)) {  in __alloc_swap_folio()
4475 	softleaf_t entry;  in can_swapin_thp()  local
4479 	addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE);  in can_swapin_thp()
4480 	idx = (vmf->address - addr) / PAGE_SIZE;  in can_swapin_thp()
4483 	if (!pte_same(pte, pte_move_swp_offset(vmf->orig_pte, -idx)))  in can_swapin_thp()
4485 	entry = softleaf_from_pte(pte);  in can_swapin_thp()
4494 	if (unlikely(swap_zeromap_batch(entry, nr_pages, NULL) != nr_pages))  in can_swapin_thp()
4496 	if (unlikely(non_swapcache_batch(entry, nr_pages) != nr_pages))  in can_swapin_thp()
4527 	struct vm_area_struct *vma = vmf->vma;  in alloc_swap_folio()
4531 	softleaf_t entry;  in alloc_swap_folio()  local
4538 	 * If uffd is active for the vma we need per-page fault fidelity to  in alloc_swap_folio()
4546 	 * lack handling for such cases, so fallback to swapping in order-0  in alloc_swap_folio()
4552 	entry = softleaf_from_pte(vmf->orig_pte);  in alloc_swap_folio()
4557 	orders = thp_vma_allowable_orders(vma, vma->vm_flags, TVA_PAGEFAULT,  in alloc_swap_folio()
4558 					  BIT(PMD_ORDER) - 1);  in alloc_swap_folio()
4559 	orders = thp_vma_suitable_orders(vma, vmf->address, orders);  in alloc_swap_folio()
4560 	orders = thp_swap_suitable_orders(swp_offset(entry),  in alloc_swap_folio()
4561 					  vmf->address, orders);  in alloc_swap_folio()
4566 	pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,  in alloc_swap_folio()
4567 				  vmf->address & PMD_MASK, &ptl);  in alloc_swap_folio()
4577 		addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);  in alloc_swap_folio()
4588 		addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);  in alloc_swap_folio()
4591 			if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,  in alloc_swap_folio()
4592 							    gfp, entry))  in alloc_swap_folio()
4614  * We enter with non-exclusive mmap_lock (to exclude vma changes,
4623 	struct vm_area_struct *vma = vmf->vma;  in do_swap_page()
4631 	softleaf_t entry;  in do_swap_page()  local
4637 	unsigned long address;  in do_swap_page()  local
4643 	entry = softleaf_from_pte(vmf->orig_pte);  in do_swap_page()
4644 	if (unlikely(!softleaf_is_swap(entry))) {  in do_swap_page()
4645 		if (softleaf_is_migration(entry)) {  in do_swap_page()
4646 			migration_entry_wait(vma->vm_mm, vmf->pmd,  in do_swap_page()
4647 					     vmf->address);  in do_swap_page()
4648 		} else if (softleaf_is_device_exclusive(entry)) {  in do_swap_page()
4649 			vmf->page = softleaf_to_page(entry);  in do_swap_page()
4651 		} else if (softleaf_is_device_private(entry)) {  in do_swap_page()
4652 			if (vmf->flags & FAULT_FLAG_VMA_LOCK) {  in do_swap_page()
4662 			vmf->page = softleaf_to_page(entry);  in do_swap_page()
4663 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  in do_swap_page()
4664 					vmf->address, &vmf->ptl);  in do_swap_page()
4665 			if (unlikely(!vmf->pte ||  in do_swap_page()
4666 				     !pte_same(ptep_get(vmf->pte),  in do_swap_page()
4667 							vmf->orig_pte)))  in do_swap_page()
4674 			if (trylock_page(vmf->page)) {  in do_swap_page()
4677 				get_page(vmf->page);  in do_swap_page()
4678 				pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_swap_page()
4679 				pgmap = page_pgmap(vmf->page);  in do_swap_page()
4680 				ret = pgmap->ops->migrate_to_ram(vmf);  in do_swap_page()
4681 				unlock_page(vmf->page);  in do_swap_page()
4682 				put_page(vmf->page);  in do_swap_page()
4684 				pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_swap_page()
4686 		} else if (softleaf_is_hwpoison(entry)) {  in do_swap_page()
4688 		} else if (softleaf_is_marker(entry)) {  in do_swap_page()
4691 			print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL);  in do_swap_page()
4698 	si = get_swap_device(entry);  in do_swap_page()
4702 	folio = swap_cache_get_folio(entry);  in do_swap_page()
4704 		swap_update_readahead(folio, vma, vmf->address);  in do_swap_page()
4708 		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&  in do_swap_page()
4709 		    __swap_count(entry) == 1) {  in do_swap_page()
4718 					entry.val = ALIGN_DOWN(entry.val, nr_pages);  in do_swap_page()
4722 				 * may finish swapin first, free the entry, and  in do_swap_page()
4723 				 * swapout reusing the same entry. It's  in do_swap_page()
4725 				 * to entry reuse.  in do_swap_page()
4727 				if (swapcache_prepare(entry, nr_pages)) {  in do_swap_page()
4739 				memcg1_swapin(entry, nr_pages);  in do_swap_page()
4741 				shadow = swap_cache_get_shadow(entry);  in do_swap_page()
4747 				/* To provide entry to swap_read_folio() */  in do_swap_page()
4748 				folio->swap = entry;  in do_swap_page()
4750 				folio->private = NULL;  in do_swap_page()
4753 			folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,  in do_swap_page()
4763 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  in do_swap_page()
4764 					vmf->address, &vmf->ptl);  in do_swap_page()
4765 			if (likely(vmf->pte &&  in do_swap_page()
4766 				   pte_same(ptep_get(vmf->pte), vmf->orig_pte)))  in do_swap_page()
4774 		count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);  in do_swap_page()
4781 	page = folio_file_page(folio, swp_offset(entry));  in do_swap_page()
4790 		if (unlikely(!folio_matches_swap_entry(folio, entry)))  in do_swap_page()
4804 		 * folio->index of non-ksm folios would be nonlinear inside the  in do_swap_page()
4805 		 * anon VMA -- the ksm flag is lost on actual swapout.  in do_swap_page()
4807 		folio = ksm_might_need_to_copy(folio, vma, vmf->address);  in do_swap_page()
4812 		} else if (unlikely(folio == ERR_PTR(-EHWPOISON))) {  in do_swap_page()
4826 		if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache &&  in do_swap_page()
4836 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,  in do_swap_page()
4837 			&vmf->ptl);  in do_swap_page()
4838 	if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte)))  in do_swap_page()
4849 		unsigned long folio_start = ALIGN_DOWN(vmf->address, nr * PAGE_SIZE);  in do_swap_page()
4850 		unsigned long idx = (vmf->address - folio_start) / PAGE_SIZE;  in do_swap_page()
4851 		pte_t *folio_ptep = vmf->pte - idx;  in do_swap_page()
4854 		if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) ||  in do_swap_page()
4859 		address = folio_start;  in do_swap_page()
4866 	address = vmf->address;  in do_swap_page()
4867 	ptep = vmf->pte;  in do_swap_page()
4871 		unsigned long folio_start = address - idx * PAGE_SIZE;  in do_swap_page()
4876 		if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start)))  in do_swap_page()
4878 		if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end)))  in do_swap_page()
4881 		folio_ptep = vmf->pte - idx;  in do_swap_page()
4883 		if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) ||  in do_swap_page()
4888 		address = folio_start;  in do_swap_page()
4891 		entry = folio->swap;  in do_swap_page()
4892 		page = &folio->page;  in do_swap_page()
4909 	 * the swap entry concurrently) for certainly exclusive pages.  in do_swap_page()
4912 		exclusive = pte_swp_exclusive(vmf->orig_pte);  in do_swap_page()
4916 			 * swapcache -> certainly exclusive.  in do_swap_page()
4920 			  data_race(si->flags & SWP_STABLE_WRITES)) {  in do_swap_page()
4945 	 * when reading from swap. This metadata may be indexed by swap entry  in do_swap_page()
4948 	arch_swap_restore(folio_swap(entry, folio), folio);  in do_swap_page()
4951 	 * Remove the swap entry and conditionally try to free up the swapcache.  in do_swap_page()
4955 	swap_free_nr(entry, nr_pages);  in do_swap_page()
4956 	if (should_try_to_free_swap(folio, vma, vmf->flags))  in do_swap_page()
4959 	add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);  in do_swap_page()
4960 	add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages);  in do_swap_page()
4961 	pte = mk_pte(page, vma->vm_page_prot);  in do_swap_page()
4962 	if (pte_swp_soft_dirty(vmf->orig_pte))  in do_swap_page()
4964 	if (pte_swp_uffd_wp(vmf->orig_pte))  in do_swap_page()
4970 	 * exposing them to the swapcache or because the swap entry indicates  in do_swap_page()
4975 		if ((vma->vm_flags & VM_WRITE) && !userfaultfd_pte_wp(vma, pte) &&  in do_swap_page()
4978 			if (vmf->flags & FAULT_FLAG_WRITE) {  in do_swap_page()
4980 				vmf->flags &= ~FAULT_FLAG_WRITE;  in do_swap_page()
4985 	folio_ref_add(folio, nr_pages - 1);  in do_swap_page()
4987 	vmf->orig_pte = pte_advance_pfn(pte, page_idx);  in do_swap_page()
4991 		folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);  in do_swap_page()
5002 		folio_add_new_anon_rmap(folio, vma, address, rmap_flags);  in do_swap_page()
5004 		folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address,  in do_swap_page()
5010 	set_ptes(vma->vm_mm, address, ptep, pte, nr_pages);  in do_swap_page()
5011 	arch_do_swap_page_nr(vma->vm_mm, vma, address,  in do_swap_page()
5017 		 * Hold the lock to avoid the swap entry to be reused  in do_swap_page()
5028 	if (vmf->flags & FAULT_FLAG_WRITE) {  in do_swap_page()
5035 	/* No need to invalidate - it was non-present before */  in do_swap_page()
5036 	update_mmu_cache_range(vmf, vma, address, ptep, nr_pages);  in do_swap_page()
5038 	if (vmf->pte)  in do_swap_page()
5039 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_swap_page()
5043 		swapcache_clear(si, entry, nr_pages);  in do_swap_page()
5051 	if (vmf->pte)  in do_swap_page()
5052 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_swap_page()
5062 		swapcache_clear(si, entry, nr_pages);  in do_swap_page()
5085 	struct vm_area_struct *vma = vmf->vma;  in alloc_anon_folio()
5095 	 * If uffd is active for the vma we need per-page fault fidelity to  in alloc_anon_folio()
5104 	 * the faulting address and still be fully contained in the vma.  in alloc_anon_folio()
5106 	orders = thp_vma_allowable_orders(vma, vma->vm_flags, TVA_PAGEFAULT,  in alloc_anon_folio()
5107 					  BIT(PMD_ORDER) - 1);  in alloc_anon_folio()
5108 	orders = thp_vma_suitable_orders(vma, vmf->address, orders);  in alloc_anon_folio()
5113 	pte = pte_offset_map(vmf->pmd, vmf->address & PMD_MASK);  in alloc_anon_folio()
5115 		return ERR_PTR(-EAGAIN);  in alloc_anon_folio()
5124 		addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);  in alloc_anon_folio()
5138 		addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);  in alloc_anon_folio()
5141 			if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {  in alloc_anon_folio()
5151 			 * that the page corresponding to the faulting address  in alloc_anon_folio()
5155 				folio_zero_user(folio, vmf->address);  in alloc_anon_folio()
5165 	return folio_prealloc(vma->vm_mm, vma, vmf->address, true);  in alloc_anon_folio()
5169  * We enter with non-exclusive mmap_lock (to exclude vma changes,
5175 	struct vm_area_struct *vma = vmf->vma;  in do_anonymous_page()
5176 	unsigned long addr = vmf->address;  in do_anonymous_page()
5180 	pte_t entry;  in do_anonymous_page()  local
5182 	/* File mapping without ->vm_ops ? */  in do_anonymous_page()
5183 	if (vma->vm_flags & VM_SHARED)  in do_anonymous_page()
5190 	if (pte_alloc(vma->vm_mm, vmf->pmd))  in do_anonymous_page()
5193 	/* Use the zero-page for reads */  in do_anonymous_page()
5194 	if (!(vmf->flags & FAULT_FLAG_WRITE) &&  in do_anonymous_page()
5195 			!mm_forbids_zeropage(vma->vm_mm)) {  in do_anonymous_page()
5196 		entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),  in do_anonymous_page()
5197 						vma->vm_page_prot));  in do_anonymous_page()
5198 		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  in do_anonymous_page()
5199 				vmf->address, &vmf->ptl);  in do_anonymous_page()
5200 		if (!vmf->pte)  in do_anonymous_page()
5203 			update_mmu_tlb(vma, vmf->address, vmf->pte);  in do_anonymous_page()
5206 		ret = check_stable_address_space(vma->vm_mm);  in do_anonymous_page()
5211 			pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_anonymous_page()
5221 	/* Returns NULL on OOM or ERR_PTR(-EAGAIN) if we must retry the fault */  in do_anonymous_page()
5229 	addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE);  in do_anonymous_page()
5238 	entry = folio_mk_pte(folio, vma->vm_page_prot);  in do_anonymous_page()
5239 	entry = pte_sw_mkyoung(entry);  in do_anonymous_page()
5240 	if (vma->vm_flags & VM_WRITE)  in do_anonymous_page()
5241 		entry = pte_mkwrite(pte_mkdirty(entry), vma);  in do_anonymous_page()
5243 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);  in do_anonymous_page()
5244 	if (!vmf->pte)  in do_anonymous_page()
5247 		update_mmu_tlb(vma, addr, vmf->pte);  in do_anonymous_page()
5249 	} else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {  in do_anonymous_page()
5250 		update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages);  in do_anonymous_page()
5254 	ret = check_stable_address_space(vma->vm_mm);  in do_anonymous_page()
5260 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_anonymous_page()
5265 	folio_ref_add(folio, nr_pages - 1);  in do_anonymous_page()
5266 	add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);  in do_anonymous_page()
5272 		entry = pte_mkuffd_wp(entry);  in do_anonymous_page()
5273 	set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);  in do_anonymous_page()
5275 	/* No need to invalidate - it was non-present before */  in do_anonymous_page()
5276 	update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages);  in do_anonymous_page()
5278 	if (vmf->pte)  in do_anonymous_page()
5279 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_anonymous_page()
5289  * The mmap_lock must have been held on entry, and may have been
5290  * released depending on flags and vma->vm_ops->fault() return value.
5295 	struct vm_area_struct *vma = vmf->vma;  in __do_fault()
5314 	if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {  in __do_fault()
5315 		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);  in __do_fault()
5316 		if (!vmf->prealloc_pte)  in __do_fault()
5320 	ret = vma->vm_ops->fault(vmf);  in __do_fault()
5325 	folio = page_folio(vmf->page);  in __do_fault()
5326 	if (unlikely(PageHWPoison(vmf->page))) {  in __do_fault()
5329 			if (page_mapped(vmf->page))  in __do_fault()
5332 			if (mapping_evict_folio(folio->mapping, folio))  in __do_fault()
5337 		vmf->page = NULL;  in __do_fault()
5344 		VM_BUG_ON_PAGE(!folio_test_locked(folio), vmf->page);  in __do_fault()
5352 	struct vm_area_struct *vma = vmf->vma;  in deposit_prealloc_pte()
5354 	pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);  in deposit_prealloc_pte()
5359 	mm_inc_nr_ptes(vma->vm_mm);  in deposit_prealloc_pte()
5360 	vmf->prealloc_pte = NULL;  in deposit_prealloc_pte()
5365 	struct vm_area_struct *vma = vmf->vma;  in do_set_pmd()
5366 	bool write = vmf->flags & FAULT_FLAG_WRITE;  in do_set_pmd()
5367 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;  in do_set_pmd()
5368 	pmd_t entry;  in do_set_pmd()  local
5374 	 * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any  in do_set_pmd()
5378 	if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags,  in do_set_pmd()
5387 	page = &folio->page;  in do_set_pmd()
5400 	 * related to pte entry. Use the preallocated table for that.  in do_set_pmd()
5402 	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {  in do_set_pmd()
5403 		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);  in do_set_pmd()
5404 		if (!vmf->prealloc_pte)  in do_set_pmd()
5408 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);  in do_set_pmd()
5409 	if (unlikely(!pmd_none(*vmf->pmd)))  in do_set_pmd()
5414 	entry = folio_mk_pmd(folio, vma->vm_page_prot);  in do_set_pmd()
5416 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);  in do_set_pmd()
5418 	add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR);  in do_set_pmd()
5427 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);  in do_set_pmd()
5429 	update_mmu_cache_pmd(vma, haddr, vmf->pmd);  in do_set_pmd()
5435 	spin_unlock(vmf->ptl);  in do_set_pmd()
5446  * set_pte_range - Set a range of PTEs to point to pages in a folio.
5451  * @addr: The first address to create a PTE for.
5456 	struct vm_area_struct *vma = vmf->vma;  in set_pte_range()
5457 	bool write = vmf->flags & FAULT_FLAG_WRITE;  in set_pte_range()
5458 	bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE);  in set_pte_range()
5459 	pte_t entry;  in set_pte_range()  local
5462 	entry = mk_pte(page, vma->vm_page_prot);  in set_pte_range()
5465 		entry = pte_mkold(entry);  in set_pte_range()
5467 		entry = pte_sw_mkyoung(entry);  in set_pte_range()
5470 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);  in set_pte_range()
5471 	else if (pte_write(entry) && folio_test_dirty(folio))  in set_pte_range()
5472 		entry = pte_mkdirty(entry);  in set_pte_range()
5474 		entry = pte_mkuffd_wp(entry);  in set_pte_range()
5475 	/* copy-on-write page */  in set_pte_range()
5476 	if (write && !(vma->vm_flags & VM_SHARED)) {  in set_pte_range()
5483 	set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr);  in set_pte_range()
5485 	/* no need to invalidate: a not-present page won't be cached */  in set_pte_range()
5486 	update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr);  in set_pte_range()
5491 	if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)  in vmf_pte_changed()
5492 		return !pte_same(ptep_get(vmf->pte), vmf->orig_pte);  in vmf_pte_changed()
5494 	return !pte_none(ptep_get(vmf->pte));  in vmf_pte_changed()
5498  * finish_fault - finish page fault once we have prepared the page to fault
5514 	struct vm_area_struct *vma = vmf->vma;  in finish_fault()
5518 	bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) &&  in finish_fault()
5519 		      !(vma->vm_flags & VM_SHARED);  in finish_fault()
5525 	addr = vmf->address;  in finish_fault()
5529 		page = vmf->cow_page;  in finish_fault()
5531 		page = vmf->page;  in finish_fault()
5538 	if (!(vma->vm_flags & VM_SHARED)) {  in finish_fault()
5539 		ret = check_stable_address_space(vma->vm_mm);  in finish_fault()
5544 	if (!needs_fallback && vma->vm_file) {  in finish_fault()
5545 		struct address_space *mapping = vma->vm_file->f_mapping;  in finish_fault()
5548 		file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);  in finish_fault()
5561 	if (pmd_none(*vmf->pmd)) {  in finish_fault()
5568 		if (vmf->prealloc_pte)  in finish_fault()
5569 			pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);  in finish_fault()
5570 		else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))  in finish_fault()
5576 	/* Using per-page fault to maintain the uffd semantics */  in finish_fault()
5581 		/* The page offset of vmf->address within the VMA. */  in finish_fault()
5582 		pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff;  in finish_fault()
5583 		/* The index of the entry in the pagetable for fault page. */  in finish_fault()
5584 		pgoff_t pte_off = pte_index(vmf->address);  in finish_fault()
5587 		 * Fallback to per-page fault in case the folio size in page  in finish_fault()
5591 			    vma_off + (nr_pages - idx) > vma_pages(vma) ||  in finish_fault()
5593 			    pte_off + (nr_pages - idx)  > PTRS_PER_PTE)) {  in finish_fault()
5597 			addr = vmf->address - idx * PAGE_SIZE;  in finish_fault()
5598 			page = &folio->page;  in finish_fault()
5602 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  in finish_fault()
5603 				       addr, &vmf->ptl);  in finish_fault()
5604 	if (!vmf->pte)  in finish_fault()
5607 	/* Re-check under ptl */  in finish_fault()
5609 		update_mmu_tlb(vma, addr, vmf->pte);  in finish_fault()
5612 	} else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {  in finish_fault()
5614 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in finish_fault()
5618 	folio_ref_add(folio, nr_pages - 1);  in finish_fault()
5621 	add_mm_counter(vma->vm_mm, type, nr_pages);  in finish_fault()
5625 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in finish_fault()
5646 		return -EINVAL;  in fault_around_bytes_set()
5649 	 * The minimum value is 1 page, however this results in no fault-around  in fault_around_bytes_set()
5670  * do_fault_around() tries to map few pages around the fault address. The hope
5674  * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
5675  * not ready to be mapped: not up-to-date, locked, etc.
5684  * The virtual address of the area that we map is naturally aligned to
5692 	pgoff_t pte_off = pte_index(vmf->address);  in do_fault_around()
5693 	/* The page offset of vmf->address within the VMA. */  in do_fault_around()
5694 	pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff;  in do_fault_around()
5698 	/* The PTE offset of the start address, clamped to the VMA. */  in do_fault_around()
5700 		       pte_off - min(pte_off, vma_off));  in do_fault_around()
5702 	/* The PTE offset of the end address, clamped to the VMA and PTE. */  in do_fault_around()
5704 		      pte_off + vma_pages(vmf->vma) - vma_off) - 1;  in do_fault_around()
5706 	if (pmd_none(*vmf->pmd)) {  in do_fault_around()
5707 		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);  in do_fault_around()
5708 		if (!vmf->prealloc_pte)  in do_fault_around()
5713 	ret = vmf->vma->vm_ops->map_pages(vmf,  in do_fault_around()
5714 			vmf->pgoff + from_pte - pte_off,  in do_fault_around()
5715 			vmf->pgoff + to_pte - pte_off);  in do_fault_around()
5721 /* Return true if we should do read fault-around, false otherwise */
5724 	/* No ->map_pages?  No way to fault around... */  in should_fault_around()
5725 	if (!vmf->vma->vm_ops->map_pages)  in should_fault_around()
5728 	if (uffd_disable_fault_around(vmf->vma))  in should_fault_around()
5741 	 * Let's call ->map_pages() first and use ->fault() as fallback  in do_read_fault()
5760 	folio = page_folio(vmf->page);  in do_read_fault()
5769 	struct vm_area_struct *vma = vmf->vma;  in do_cow_fault()
5779 	folio = folio_prealloc(vma->vm_mm, vma, vmf->address, false);  in do_cow_fault()
5783 	vmf->cow_page = &folio->page;  in do_cow_fault()
5791 	if (copy_mc_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma)) {  in do_cow_fault()
5799 	unlock_page(vmf->page);  in do_cow_fault()
5800 	put_page(vmf->page);  in do_cow_fault()
5811 	struct vm_area_struct *vma = vmf->vma;  in do_shared_fault()
5823 	folio = page_folio(vmf->page);  in do_shared_fault()
5826 	 * Check if the backing address space wants to know that the page is  in do_shared_fault()
5829 	if (vma->vm_ops->page_mkwrite) {  in do_shared_fault()
5852  * We enter with non-exclusive mmap_lock (to exclude vma changes,
5861 	struct vm_area_struct *vma = vmf->vma;  in do_fault()
5862 	struct mm_struct *vm_mm = vma->vm_mm;  in do_fault()
5868 	if (!vma->vm_ops->fault) {  in do_fault()
5869 		vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,  in do_fault()
5870 					       vmf->address, &vmf->ptl);  in do_fault()
5871 		if (unlikely(!vmf->pte))  in do_fault()
5881 			if (unlikely(pte_none(ptep_get(vmf->pte))))  in do_fault()
5886 			pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_fault()
5888 	} else if (!(vmf->flags & FAULT_FLAG_WRITE))  in do_fault()
5890 	else if (!(vma->vm_flags & VM_SHARED))  in do_fault()
5896 	if (vmf->prealloc_pte) {  in do_fault()
5897 		pte_free(vm_mm, vmf->prealloc_pte);  in do_fault()
5898 		vmf->prealloc_pte = NULL;  in do_fault()
5907 	struct vm_area_struct *vma = vmf->vma;  in numa_migrate_check()
5921 	 * Flag if the folio is shared between multiple address spaces. This  in numa_migrate_check()
5924 	if (folio_maybe_mapped_shared(folio) && (vma->vm_flags & VM_SHARED))  in numa_migrate_check()
5931 		*last_cpupid = (-1 & LAST_CPUPID_MASK);  in numa_migrate_check()
5957 	pte = pte_modify(old_pte, vma->vm_page_prot);  in numa_rebuild_single_mapping()
5969 	int nr = pte_pfn(fault_pte) - folio_pfn(folio);  in numa_rebuild_large_mapping()
5970 	unsigned long start, end, addr = vmf->address;  in numa_rebuild_large_mapping()
5971 	unsigned long addr_start = addr - (nr << PAGE_SHIFT);  in numa_rebuild_large_mapping()
5976 	start = max3(addr_start, pt_start, vma->vm_start);  in numa_rebuild_large_mapping()
5978 		   vma->vm_end);  in numa_rebuild_large_mapping()
5979 	start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT);  in numa_rebuild_large_mapping()
5993 			ptent = pte_modify(ptent, vma->vm_page_prot);  in numa_rebuild_large_mapping()
6006 	struct vm_area_struct *vma = vmf->vma;  in do_numa_page()
6020 	spin_lock(vmf->ptl);  in do_numa_page()
6022 	old_pte = ptep_get(vmf->pte);  in do_numa_page()
6024 	if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {  in do_numa_page()
6025 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_numa_page()
6029 	pte = pte_modify(old_pte, vma->vm_page_prot);  in do_numa_page()
6037 	    can_change_pte_writable(vma, vmf->address, pte))  in do_numa_page()
6040 	folio = vm_normal_folio(vma, vmf->address, pte);  in do_numa_page()
6047 	target_nid = numa_migrate_check(folio, vmf, vmf->address, &flags,  in do_numa_page()
6056 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_numa_page()
6069 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  in do_numa_page()
6070 				       vmf->address, &vmf->ptl);  in do_numa_page()
6071 	if (unlikely(!vmf->pte))  in do_numa_page()
6073 	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {  in do_numa_page()
6074 		pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_numa_page()
6080 	 * non-accessible ptes, some can allow access by kernel mode.  in do_numa_page()
6086 		numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,  in do_numa_page()
6088 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in do_numa_page()
6097 	struct vm_area_struct *vma = vmf->vma;  in create_huge_pmd()
6100 	if (vma->vm_ops->huge_fault)  in create_huge_pmd()
6101 		return vma->vm_ops->huge_fault(vmf, PMD_ORDER);  in create_huge_pmd()
6108 	struct vm_area_struct *vma = vmf->vma;  in wp_huge_pmd()
6109 	const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;  in wp_huge_pmd()
6114 		    userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) {  in wp_huge_pmd()
6115 			if (userfaultfd_wp_async(vmf->vma))  in wp_huge_pmd()
6122 	if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {  in wp_huge_pmd()
6123 		if (vma->vm_ops->huge_fault) {  in wp_huge_pmd()
6124 			ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);  in wp_huge_pmd()
6131 	/* COW or write-notify handled on pte level: split pmd. */  in wp_huge_pmd()
6132 	__split_huge_pmd(vma, vmf->pmd, vmf->address, false);  in wp_huge_pmd()
6141 	struct vm_area_struct *vma = vmf->vma;  in create_huge_pud()
6145 	if (vma->vm_ops->huge_fault)  in create_huge_pud()
6146 		return vma->vm_ops->huge_fault(vmf, PUD_ORDER);  in create_huge_pud()
6155 	struct vm_area_struct *vma = vmf->vma;  in wp_huge_pud()
6161 	if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {  in wp_huge_pud()
6162 		if (vma->vm_ops->huge_fault) {  in wp_huge_pud()
6163 			ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);  in wp_huge_pud()
6169 	/* COW or write-notify not handled on PUD level: split pud.*/  in wp_huge_pud()
6170 	__split_huge_pud(vma, vmf->pud, vmf->address);  in wp_huge_pud()
6177  * page table.  For example, a non-populated virtual page is accessed
6184  * stale read-only TLB entry exists in the local CPU and needs to be
6197 	if (vmf->flags & FAULT_FLAG_TRIED)  in fix_spurious_fault()
6205 	if (vmf->flags & FAULT_FLAG_WRITE) {  in fix_spurious_fault()
6207 			flush_tlb_fix_spurious_fault(vmf->vma, vmf->address,  in fix_spurious_fault()
6208 						     vmf->pte);  in fix_spurious_fault()
6210 			flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address,  in fix_spurious_fault()
6211 							 vmf->pmd);  in fix_spurious_fault()
6223  * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
6231 	pte_t entry;  in handle_pte_fault()  local
6233 	if (unlikely(pmd_none(*vmf->pmd))) {  in handle_pte_fault()
6235 		 * Leave __pte_alloc() until later: because vm_ops->fault may  in handle_pte_fault()
6240 		vmf->pte = NULL;  in handle_pte_fault()
6241 		vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID;  in handle_pte_fault()
6251 		 * Use the maywrite version to indicate that vmf->pte may be  in handle_pte_fault()
6253 		 * change of the !pte_none() entry, there is no need to recheck  in handle_pte_fault()
6258 		vmf->pte = pte_offset_map_rw_nolock(vmf->vma->vm_mm, vmf->pmd,  in handle_pte_fault()
6259 						    vmf->address, &dummy_pmdval,  in handle_pte_fault()
6260 						    &vmf->ptl);  in handle_pte_fault()
6261 		if (unlikely(!vmf->pte))  in handle_pte_fault()
6263 		vmf->orig_pte = ptep_get_lockless(vmf->pte);  in handle_pte_fault()
6264 		vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID;  in handle_pte_fault()
6266 		if (pte_none(vmf->orig_pte)) {  in handle_pte_fault()
6267 			pte_unmap(vmf->pte);  in handle_pte_fault()
6268 			vmf->pte = NULL;  in handle_pte_fault()
6272 	if (!vmf->pte)  in handle_pte_fault()
6275 	if (!pte_present(vmf->orig_pte))  in handle_pte_fault()
6278 	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))  in handle_pte_fault()
6281 	spin_lock(vmf->ptl);  in handle_pte_fault()
6282 	entry = vmf->orig_pte;  in handle_pte_fault()
6283 	if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) {  in handle_pte_fault()
6284 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);  in handle_pte_fault()
6287 	if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {  in handle_pte_fault()
6288 		if (!pte_write(entry))  in handle_pte_fault()
6290 		else if (likely(vmf->flags & FAULT_FLAG_WRITE))  in handle_pte_fault()
6291 			entry = pte_mkdirty(entry);  in handle_pte_fault()
6293 	entry = pte_mkyoung(entry);  in handle_pte_fault()
6294 	if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,  in handle_pte_fault()
6295 				vmf->flags & FAULT_FLAG_WRITE))  in handle_pte_fault()
6296 		update_mmu_cache_range(vmf, vmf->vma, vmf->address,  in handle_pte_fault()
6297 				vmf->pte, 1);  in handle_pte_fault()
6301 	pte_unmap_unlock(vmf->pte, vmf->ptl);  in handle_pte_fault()
6306  * On entry, we hold either the VMA lock or the mmap_lock
6312 		unsigned long address, unsigned int flags)  in __handle_mm_fault()  argument
6316 		.address = address & PAGE_MASK,  in __handle_mm_fault()
6317 		.real_address = address,  in __handle_mm_fault()
6319 		.pgoff = linear_page_index(vma, address),  in __handle_mm_fault()
6322 	struct mm_struct *mm = vma->vm_mm;  in __handle_mm_fault()
6323 	vm_flags_t vm_flags = vma->vm_flags;  in __handle_mm_fault()
6328 	pgd = pgd_offset(mm, address);  in __handle_mm_fault()
6329 	p4d = p4d_alloc(mm, pgd, address);  in __handle_mm_fault()
6333 	vmf.pud = pud_alloc(mm, p4d, address);  in __handle_mm_fault()
6363 	vmf.pmd = pmd_alloc(mm, vmf.pud, address);  in __handle_mm_fault()
6415  * mm_account_fault - Do page fault accounting
6418  *        of perf event counters, but we'll still do the per-task accounting to
6420  * @address: the faulted address.
6427  * still be in per-arch page fault handlers at the entry of page fault.
6430 				    unsigned long address, unsigned int flags,  in mm_account_fault()  argument
6448 	 * Do not account for unsuccessful faults (e.g. when the address wasn't  in mm_account_fault()
6464 		current->maj_flt++;  in mm_account_fault()
6466 		current->min_flt++;  in mm_account_fault()
6477 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);  in mm_account_fault()
6479 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);  in mm_account_fault()
6486 	current->in_lru_fault = vma_has_recency(vma);  in lru_gen_enter_fault()
6491 	current->in_lru_fault = false;  in lru_gen_exit_fault()
6511 		 * just treat it like an ordinary read-fault otherwise.  in sanitize_fault_flags()
6513 		if (!is_cow_mapping(vma->vm_flags))  in sanitize_fault_flags()
6516 		/* Write faults on read-only mappings are impossible ... */  in sanitize_fault_flags()
6517 		if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)))  in sanitize_fault_flags()
6520 		if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) &&  in sanitize_fault_flags()
6521 				 !is_cow_mapping(vma->vm_flags)))  in sanitize_fault_flags()
6526 	 * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of  in sanitize_fault_flags()
6545 vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,  in handle_mm_fault()  argument
6549 	struct mm_struct *mm = vma->vm_mm;  in handle_mm_fault()
6566 	is_droppable = !!(vma->vm_flags & VM_DROPPABLE);  in handle_mm_fault()
6578 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);  in handle_mm_fault()
6580 		ret = __handle_mm_fault(vma, address, flags);  in handle_mm_fault()
6583 	 * Warning: It is no longer safe to dereference vma-> after this point,  in handle_mm_fault()
6606 	mm_account_fault(mm, regs, address, flags, ret);  in handle_mm_fault()
6615  * We've already handled the fast-path in-line.
6617 int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)  in __p4d_alloc()  argument
6619 	p4d_t *new = p4d_alloc_one(mm, address);  in __p4d_alloc()
6621 		return -ENOMEM;  in __p4d_alloc()
6623 	spin_lock(&mm->page_table_lock);  in __p4d_alloc()
6630 	spin_unlock(&mm->page_table_lock);  in __p4d_alloc()
6638  * We've already handled the fast-path in-line.
6640 int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)  in __pud_alloc()  argument
6642 	pud_t *new = pud_alloc_one(mm, address);  in __pud_alloc()
6644 		return -ENOMEM;  in __pud_alloc()
6646 	spin_lock(&mm->page_table_lock);  in __pud_alloc()
6653 	spin_unlock(&mm->page_table_lock);  in __pud_alloc()
6661  * We've already handled the fast-path in-line.
6663 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)  in __pmd_alloc()  argument
6666 	pmd_t *new = pmd_alloc_one(mm, address);  in __pmd_alloc()
6668 		return -ENOMEM;  in __pmd_alloc()
6689 	args->lock = lock;  in pfnmap_args_setup()
6690 	args->ptep = ptep;  in pfnmap_args_setup()
6691 	args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);  in pfnmap_args_setup()
6692 	args->addr_mask = addr_mask;  in pfnmap_args_setup()
6693 	args->pgprot = pgprot;  in pfnmap_args_setup()
6694 	args->writable = writable;  in pfnmap_args_setup()
6695 	args->special = special;  in pfnmap_args_setup()
6701 	struct file *file = vma->vm_file;  in pfnmap_lockdep_assert()
6702 	struct address_space *mapping = file ? file->f_mapping : NULL;  in pfnmap_lockdep_assert()
6705 		lockdep_assert(lockdep_is_held(&mapping->i_mmap_rwsem) ||  in pfnmap_lockdep_assert()
6706 			       lockdep_is_held(&vma->vm_mm->mmap_lock));  in pfnmap_lockdep_assert()
6708 		lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));  in pfnmap_lockdep_assert()
6713  * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
6716  * The caller needs to setup args->vma and args->address to point to the
6717  * virtual address as the target of such lookup.  On a successful return,
6733  * a later point in time can trigger use-after-free.
6745 	struct vm_area_struct *vma = args->vma;  in follow_pfnmap_start()
6746 	unsigned long address = args->address;  in follow_pfnmap_start()  local
6747 	struct mm_struct *mm = vma->vm_mm;  in follow_pfnmap_start()
6757 	if (unlikely(address < vma->vm_start || address >= vma->vm_end))  in follow_pfnmap_start()
6760 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))  in follow_pfnmap_start()
6763 	pgdp = pgd_offset(mm, address);  in follow_pfnmap_start()
6767 	p4dp = p4d_offset(pgdp, address);  in follow_pfnmap_start()
6772 	pudp = pud_offset(p4dp, address);  in follow_pfnmap_start()
6788 	pmdp = pmd_offset(pudp, address);  in follow_pfnmap_start()
6802 	ptep = pte_offset_map_lock(mm, pmdp, address, &lock);  in follow_pfnmap_start()
6815 	return -EINVAL;  in follow_pfnmap_start()
6828 	if (args->lock)  in follow_pfnmap_end()
6829 		spin_unlock(args->lock);  in follow_pfnmap_end()
6830 	if (args->ptep)  in follow_pfnmap_end()
6831 		pte_unmap(args->ptep);  in follow_pfnmap_end()
6837  * generic_access_phys - generic implementation for iomem mmap access
6839  * @addr: userspace address, not relative offset within @vma
6855 	int ret = -EINVAL;  in generic_access_phys()
6857 	struct follow_pfnmap_args args = { .vma = vma, .address = addr };  in generic_access_phys()
6861 		return -EINVAL;  in generic_access_phys()
6868 		return -EINVAL;  in generic_access_phys()
6872 		return -ENOMEM;  in generic_access_phys()
6900  * Access another process' address space as given in mm.
6911 	/* Untag the address before looking up the VMA */  in __access_remote_vm()
6935 					return buf - old_buf;  in __access_remote_vm()
6947 			if (vma->vm_ops && vma->vm_ops->access)  in __access_remote_vm()
6948 				bytes = vma->vm_ops->access(vma, addr, buf,  in __access_remote_vm()
6956 			offset = addr & (PAGE_SIZE-1);  in __access_remote_vm()
6957 			if (bytes > PAGE_SIZE-offset)  in __access_remote_vm()
6958 				bytes = PAGE_SIZE-offset;  in __access_remote_vm()
6971 		len -= bytes;  in __access_remote_vm()
6977 	return buf - old_buf;  in __access_remote_vm()
6981  * access_remote_vm - access another process' address space
6982  * @mm:		the mm_struct of the target address space
6983  * @addr:	start address to access
6999  * Access another process' address space.
7023  * Copy a string from another process's address space as given in mm.
7024  * If there is any error return -EFAULT.
7035 		return -EFAULT;  in __copy_remote_vm_str()
7041 		err = -EFAULT;  in __copy_remote_vm_str()
7060 			err = -EFAULT;  in __copy_remote_vm_str()
7066 		offset = addr & (PAGE_SIZE - 1);  in __copy_remote_vm_str()
7067 		if (bytes > PAGE_SIZE - offset)  in __copy_remote_vm_str()
7068 			bytes = PAGE_SIZE - offset;  in __copy_remote_vm_str()
7079 		buf += bytes - 1;  in __copy_remote_vm_str()
7086 			addr += bytes - 1;  in __copy_remote_vm_str()
7087 			copy_from_user_page(vma, page, addr, buf, maddr + (PAGE_SIZE - 1), 1);  in __copy_remote_vm_str()
7091 		len -= bytes;  in __copy_remote_vm_str()
7100 	return buf - old_buf;  in __copy_remote_vm_str()
7104  * copy_remote_vm_str - copy a string from another process's address space.
7105  * @tsk:	the task of the target address space
7106  * @addr:	start address to read from
7114  * not including the trailing NUL. Always guaranteed to leave NUL-terminated
7115  * buffer. On any error, return -EFAULT.
7129 		return -EFAULT;  in copy_remote_vm_str()
7146 	struct mm_struct *mm = current->mm;  in print_vma_addr()
7156 	if (vma && vma->vm_file) {  in print_vma_addr()
7157 		struct file *f = vma->vm_file;  in print_vma_addr()
7158 		ip -= vma->vm_start;  in print_vma_addr()
7159 		ip += vma->vm_pgoff << PAGE_SHIFT;  in print_vma_addr()
7161 				vma->vm_start,  in print_vma_addr()
7162 				vma->vm_end - vma->vm_start);  in print_vma_addr()
7173 	if (current->mm)  in __might_fault()
7174 		might_lock_read(&current->mm->mmap_lock);  in __might_fault()
7192 		~(((unsigned long)nr_pages << PAGE_SHIFT) - 1);  in process_huge_page()
7196 	n = (addr_hint - addr) / PAGE_SIZE;  in process_huge_page()
7202 		for (i = nr_pages - 1; i >= 2 * n; i--) {  in process_huge_page()
7210 		base = nr_pages - 2 * (nr_pages - n);  in process_huge_page()
7211 		l = nr_pages - n;  in process_huge_page()
7221 	 * Process remaining subpages in left-right-left-right pattern  in process_huge_page()
7226 		int right_idx = base + 2 * l - 1 - i;  in process_huge_page()
7262  * folio_zero_user - Zero a folio which will be mapped to userspace.
7264  * @addr_hint: The address will be accessed or the base address if uncelar.
7293 			return -EHWPOISON;  in copy_user_gigantic_page()
7307 	struct page *dst = folio_page(copy_arg->dst, idx);  in copy_subpage()
7308 	struct page *src = folio_page(copy_arg->src, idx);  in copy_subpage()
7310 	if (copy_mc_user_highpage(dst, src, addr, copy_arg->vma))  in copy_subpage()
7311 		return -EHWPOISON;  in copy_subpage()
7351 		ret_val -= (PAGE_SIZE - rc);  in copy_folio_from_user()
7369 	page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,  in ptlock_cache_init()
7380 	ptdesc->ptl = ptl;  in ptlock_alloc()
7386 	if (ptdesc->ptl)  in ptlock_free()
7387 		kmem_cache_free(page_ptl_cachep, ptdesc->ptl);  in ptlock_free()