xref: /linux/arch/s390/mm/gmap_helpers.c (revision 7f9039c524a351c684149ecf1b3c5145a0dff2fe)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Helper functions for KVM guest address space mapping code
4  *
5  *    Copyright IBM Corp. 2007, 2025
6  */
7 #include <linux/mm_types.h>
8 #include <linux/mmap_lock.h>
9 #include <linux/mm.h>
10 #include <linux/hugetlb.h>
11 #include <linux/swap.h>
12 #include <linux/swapops.h>
13 #include <linux/pagewalk.h>
14 #include <linux/ksm.h>
15 #include <asm/gmap_helpers.h>
16 
17 /**
18  * ptep_zap_swap_entry() - discard a swap entry.
19  * @mm: the mm
20  * @entry: the swap entry that needs to be zapped
21  *
22  * Discards the given swap entry. If the swap entry was an actual swap
23  * entry (and not a migration entry, for example), the actual swapped
24  * page is also discarded from swap.
25  */
26 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
27 {
28 	if (!non_swap_entry(entry))
29 		dec_mm_counter(mm, MM_SWAPENTS);
30 	else if (is_migration_entry(entry))
31 		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
32 	free_swap_and_cache(entry);
33 }
34 
35 /**
36  * gmap_helper_zap_one_page() - discard a page if it was swapped.
37  * @mm: the mm
38  * @vmaddr: the userspace virtual address that needs to be discarded
39  *
40  * If the given address maps to a swap entry, discard it.
41  *
42  * Context: needs to be called while holding the mmap lock.
43  */
44 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
45 {
46 	struct vm_area_struct *vma;
47 	spinlock_t *ptl;
48 	pte_t *ptep;
49 
50 	mmap_assert_locked(mm);
51 
52 	/* Find the vm address for the guest address */
53 	vma = vma_lookup(mm, vmaddr);
54 	if (!vma || is_vm_hugetlb_page(vma))
55 		return;
56 
57 	/* Get pointer to the page table entry */
58 	ptep = get_locked_pte(mm, vmaddr, &ptl);
59 	if (unlikely(!ptep))
60 		return;
61 	if (pte_swap(*ptep))
62 		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
63 	pte_unmap_unlock(ptep, ptl);
64 }
65 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
66 
67 /**
68  * gmap_helper_discard() - discard user pages in the given range
69  * @mm: the mm
70  * @vmaddr: starting userspace address
71  * @end: end address (first address outside the range)
72  *
73  * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
74  *
75  * Context: needs to be called while holding the mmap lock.
76  */
77 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
78 {
79 	struct vm_area_struct *vma;
80 
81 	mmap_assert_locked(mm);
82 
83 	while (vmaddr < end) {
84 		vma = find_vma_intersection(mm, vmaddr, end);
85 		if (!vma)
86 			return;
87 		if (!is_vm_hugetlb_page(vma))
88 			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
89 		vmaddr = vma->vm_end;
90 	}
91 }
92 EXPORT_SYMBOL_GPL(gmap_helper_discard);
93 
94 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
95 				   unsigned long end, struct mm_walk *walk)
96 {
97 	unsigned long *found_addr = walk->private;
98 
99 	/* Return 1 of the page is a zeropage. */
100 	if (is_zero_pfn(pte_pfn(*pte))) {
101 		/*
102 		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
103 		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
104 		 * currently only works in COW mappings, which is also where
105 		 * mm_forbids_zeropage() is checked.
106 		 */
107 		if (!is_cow_mapping(walk->vma->vm_flags))
108 			return -EFAULT;
109 
110 		*found_addr = addr;
111 		return 1;
112 	}
113 	return 0;
114 }
115 
116 static const struct mm_walk_ops find_zeropage_ops = {
117 	.pte_entry      = find_zeropage_pte_entry,
118 	.walk_lock      = PGWALK_WRLOCK,
119 };
120 
121 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
122  * @mm: the mm whose zero pages are to be unshared
123  *
124  * Unshare all shared zeropages, replacing them by anonymous pages. Note that
125  * we cannot simply zap all shared zeropages, because this could later
126  * trigger unexpected userfaultfd missing events.
127  *
128  * This must be called after mm->context.allow_cow_sharing was
129  * set to 0, to avoid future mappings of shared zeropages.
130  *
131  * mm contracts with s390, that even if mm were to remove a page table,
132  * and racing with walk_page_range_vma() calling pte_offset_map_lock()
133  * would fail, it will never insert a page table containing empty zero
134  * pages once mm_forbids_zeropage(mm) i.e.
135  * mm->context.allow_cow_sharing is set to 0.
136  */
137 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
138 {
139 	struct vm_area_struct *vma;
140 	VMA_ITERATOR(vmi, mm, 0);
141 	unsigned long addr;
142 	vm_fault_t fault;
143 	int rc;
144 
145 	for_each_vma(vmi, vma) {
146 		/*
147 		 * We could only look at COW mappings, but it's more future
148 		 * proof to catch unexpected zeropages in other mappings and
149 		 * fail.
150 		 */
151 		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
152 			continue;
153 		addr = vma->vm_start;
154 
155 retry:
156 		rc = walk_page_range_vma(vma, addr, vma->vm_end,
157 					 &find_zeropage_ops, &addr);
158 		if (rc < 0)
159 			return rc;
160 		else if (!rc)
161 			continue;
162 
163 		/* addr was updated by find_zeropage_pte_entry() */
164 		fault = handle_mm_fault(vma, addr,
165 					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
166 					NULL);
167 		if (fault & VM_FAULT_OOM)
168 			return -ENOMEM;
169 		/*
170 		 * See break_ksm(): even after handle_mm_fault() returned 0, we
171 		 * must start the lookup from the current address, because
172 		 * handle_mm_fault() may back out if there's any difficulty.
173 		 *
174 		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
175 		 * maybe they could trigger in the future on concurrent
176 		 * truncation. In that case, the shared zeropage would be gone
177 		 * and we can simply retry and make progress.
178 		 */
179 		cond_resched();
180 		goto retry;
181 	}
182 
183 	return 0;
184 }
185 
186 /**
187  * gmap_helper_disable_cow_sharing() - disable all COW sharing
188  *
189  * Disable most COW-sharing of memory pages for the whole process:
190  * (1) Disable KSM and unmerge/unshare any KSM pages.
191  * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
192  *
193  * Not that we currently don't bother with COW-shared pages that are shared
194  * with parent/child processes due to fork().
195  */
196 int gmap_helper_disable_cow_sharing(void)
197 {
198 	struct mm_struct *mm = current->mm;
199 	int rc;
200 
201 	mmap_assert_write_locked(mm);
202 
203 	if (!mm->context.allow_cow_sharing)
204 		return 0;
205 
206 	mm->context.allow_cow_sharing = 0;
207 
208 	/* Replace all shared zeropages by anonymous pages. */
209 	rc = __gmap_helper_unshare_zeropages(mm);
210 	/*
211 	 * Make sure to disable KSM (if enabled for the whole process or
212 	 * individual VMAs). Note that nothing currently hinders user space
213 	 * from re-enabling it.
214 	 */
215 	if (!rc)
216 		rc = ksm_disable(mm);
217 	if (rc)
218 		mm->context.allow_cow_sharing = 1;
219 	return rc;
220 }
221 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
222