xref: /linux/arch/s390/mm/gmap_helpers.c (revision 6093a688a07da07808f0122f9aa2a3eed250d853)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Helper functions for KVM guest address space mapping code
4  *
5  *    Copyright IBM Corp. 2007, 2025
6  */
7 
8 #include <linux/export.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmap_lock.h>
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/swap.h>
14 #include <linux/swapops.h>
15 #include <linux/pagewalk.h>
16 #include <linux/ksm.h>
17 #include <asm/gmap_helpers.h>
18 #include <asm/pgtable.h>
19 
20 /**
21  * ptep_zap_swap_entry() - discard a swap entry.
22  * @mm: the mm
23  * @entry: the swap entry that needs to be zapped
24  *
25  * Discards the given swap entry. If the swap entry was an actual swap
26  * entry (and not a migration entry, for example), the actual swapped
27  * page is also discarded from swap.
28  */
29 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
30 {
31 	if (!non_swap_entry(entry))
32 		dec_mm_counter(mm, MM_SWAPENTS);
33 	else if (is_migration_entry(entry))
34 		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
35 	free_swap_and_cache(entry);
36 }
37 
38 /**
39  * gmap_helper_zap_one_page() - discard a page if it was swapped.
40  * @mm: the mm
41  * @vmaddr: the userspace virtual address that needs to be discarded
42  *
43  * If the given address maps to a swap entry, discard it.
44  *
45  * Context: needs to be called while holding the mmap lock.
46  */
47 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
48 {
49 	struct vm_area_struct *vma;
50 	spinlock_t *ptl;
51 	pgste_t pgste;
52 	pte_t *ptep;
53 
54 	mmap_assert_locked(mm);
55 
56 	/* Find the vm address for the guest address */
57 	vma = vma_lookup(mm, vmaddr);
58 	if (!vma || is_vm_hugetlb_page(vma))
59 		return;
60 
61 	/* Get pointer to the page table entry */
62 	ptep = get_locked_pte(mm, vmaddr, &ptl);
63 	if (unlikely(!ptep))
64 		return;
65 	if (pte_swap(*ptep)) {
66 		preempt_disable();
67 		pgste = pgste_get_lock(ptep);
68 
69 		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
70 		pte_clear(mm, vmaddr, ptep);
71 
72 		pgste_set_unlock(ptep, pgste);
73 		preempt_enable();
74 	}
75 	pte_unmap_unlock(ptep, ptl);
76 }
77 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
78 
79 /**
80  * gmap_helper_discard() - discard user pages in the given range
81  * @mm: the mm
82  * @vmaddr: starting userspace address
83  * @end: end address (first address outside the range)
84  *
85  * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
86  *
87  * Context: needs to be called while holding the mmap lock.
88  */
89 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
90 {
91 	struct vm_area_struct *vma;
92 
93 	mmap_assert_locked(mm);
94 
95 	while (vmaddr < end) {
96 		vma = find_vma_intersection(mm, vmaddr, end);
97 		if (!vma)
98 			return;
99 		if (!is_vm_hugetlb_page(vma))
100 			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
101 		vmaddr = vma->vm_end;
102 	}
103 }
104 EXPORT_SYMBOL_GPL(gmap_helper_discard);
105 
106 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
107 				   unsigned long end, struct mm_walk *walk)
108 {
109 	unsigned long *found_addr = walk->private;
110 
111 	/* Return 1 of the page is a zeropage. */
112 	if (is_zero_pfn(pte_pfn(*pte))) {
113 		/*
114 		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
115 		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
116 		 * currently only works in COW mappings, which is also where
117 		 * mm_forbids_zeropage() is checked.
118 		 */
119 		if (!is_cow_mapping(walk->vma->vm_flags))
120 			return -EFAULT;
121 
122 		*found_addr = addr;
123 		return 1;
124 	}
125 	return 0;
126 }
127 
128 static const struct mm_walk_ops find_zeropage_ops = {
129 	.pte_entry      = find_zeropage_pte_entry,
130 	.walk_lock      = PGWALK_WRLOCK,
131 };
132 
133 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
134  * @mm: the mm whose zero pages are to be unshared
135  *
136  * Unshare all shared zeropages, replacing them by anonymous pages. Note that
137  * we cannot simply zap all shared zeropages, because this could later
138  * trigger unexpected userfaultfd missing events.
139  *
140  * This must be called after mm->context.allow_cow_sharing was
141  * set to 0, to avoid future mappings of shared zeropages.
142  *
143  * mm contracts with s390, that even if mm were to remove a page table,
144  * and racing with walk_page_range_vma() calling pte_offset_map_lock()
145  * would fail, it will never insert a page table containing empty zero
146  * pages once mm_forbids_zeropage(mm) i.e.
147  * mm->context.allow_cow_sharing is set to 0.
148  */
149 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
150 {
151 	struct vm_area_struct *vma;
152 	VMA_ITERATOR(vmi, mm, 0);
153 	unsigned long addr;
154 	vm_fault_t fault;
155 	int rc;
156 
157 	for_each_vma(vmi, vma) {
158 		/*
159 		 * We could only look at COW mappings, but it's more future
160 		 * proof to catch unexpected zeropages in other mappings and
161 		 * fail.
162 		 */
163 		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
164 			continue;
165 		addr = vma->vm_start;
166 
167 retry:
168 		rc = walk_page_range_vma(vma, addr, vma->vm_end,
169 					 &find_zeropage_ops, &addr);
170 		if (rc < 0)
171 			return rc;
172 		else if (!rc)
173 			continue;
174 
175 		/* addr was updated by find_zeropage_pte_entry() */
176 		fault = handle_mm_fault(vma, addr,
177 					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
178 					NULL);
179 		if (fault & VM_FAULT_OOM)
180 			return -ENOMEM;
181 		/*
182 		 * See break_ksm(): even after handle_mm_fault() returned 0, we
183 		 * must start the lookup from the current address, because
184 		 * handle_mm_fault() may back out if there's any difficulty.
185 		 *
186 		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
187 		 * maybe they could trigger in the future on concurrent
188 		 * truncation. In that case, the shared zeropage would be gone
189 		 * and we can simply retry and make progress.
190 		 */
191 		cond_resched();
192 		goto retry;
193 	}
194 
195 	return 0;
196 }
197 
198 /**
199  * gmap_helper_disable_cow_sharing() - disable all COW sharing
200  *
201  * Disable most COW-sharing of memory pages for the whole process:
202  * (1) Disable KSM and unmerge/unshare any KSM pages.
203  * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
204  *
205  * Not that we currently don't bother with COW-shared pages that are shared
206  * with parent/child processes due to fork().
207  */
208 int gmap_helper_disable_cow_sharing(void)
209 {
210 	struct mm_struct *mm = current->mm;
211 	int rc;
212 
213 	mmap_assert_write_locked(mm);
214 
215 	if (!mm->context.allow_cow_sharing)
216 		return 0;
217 
218 	mm->context.allow_cow_sharing = 0;
219 
220 	/* Replace all shared zeropages by anonymous pages. */
221 	rc = __gmap_helper_unshare_zeropages(mm);
222 	/*
223 	 * Make sure to disable KSM (if enabled for the whole process or
224 	 * individual VMAs). Note that nothing currently hinders user space
225 	 * from re-enabling it.
226 	 */
227 	if (!rc)
228 		rc = ksm_disable(mm);
229 	if (rc)
230 		mm->context.allow_cow_sharing = 1;
231 	return rc;
232 }
233 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
234