xref: /linux/arch/s390/mm/gmap_helpers.c (revision 0723a166d1f1da4c60d7b11289383f073e4dee9b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Helper functions for KVM guest address space mapping code
4  *
5  *    Copyright IBM Corp. 2007, 2025
6  */
7 
8 #include <linux/export.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmap_lock.h>
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/swap.h>
14 #include <linux/leafops.h>
15 #include <linux/pagewalk.h>
16 #include <linux/ksm.h>
17 #include <asm/gmap_helpers.h>
18 #include <asm/pgtable.h>
19 
20 /**
21  * ptep_zap_softleaf_entry() - discard a software leaf entry.
22  * @mm: the mm
23  * @entry: the software leaf entry that needs to be zapped
24  *
25  * Discards the given software leaf entry. If the leaf entry was an actual
26  * swap entry (and not a migration entry, for example), the actual swapped
27  * page is also discarded from swap.
28  */
ptep_zap_softleaf_entry(struct mm_struct * mm,softleaf_t entry)29 static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry)
30 {
31 	if (softleaf_is_swap(entry))
32 		dec_mm_counter(mm, MM_SWAPENTS);
33 	else if (softleaf_is_migration(entry))
34 		dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry)));
35 	free_swap_and_cache(entry);
36 }
37 
38 /**
39  * gmap_helper_zap_one_page() - discard a page if it was swapped.
40  * @mm: the mm
41  * @vmaddr: the userspace virtual address that needs to be discarded
42  *
43  * If the given address maps to a swap entry, discard it.
44  *
45  * Context: needs to be called while holding the mmap lock.
46  */
gmap_helper_zap_one_page(struct mm_struct * mm,unsigned long vmaddr)47 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
48 {
49 	struct vm_area_struct *vma;
50 	unsigned long pgstev;
51 	spinlock_t *ptl;
52 	pgste_t pgste;
53 	pte_t *ptep;
54 
55 	mmap_assert_locked(mm);
56 
57 	/* Find the vm address for the guest address */
58 	vma = vma_lookup(mm, vmaddr);
59 	if (!vma || is_vm_hugetlb_page(vma))
60 		return;
61 
62 	/* Get pointer to the page table entry */
63 	ptep = get_locked_pte(mm, vmaddr, &ptl);
64 	if (unlikely(!ptep))
65 		return;
66 	if (pte_swap(*ptep)) {
67 		preempt_disable();
68 		pgste = pgste_get_lock(ptep);
69 		pgstev = pgste_val(pgste);
70 
71 		if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
72 		    (pgstev & _PGSTE_GPS_ZERO)) {
73 			ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep));
74 			pte_clear(mm, vmaddr, ptep);
75 		}
76 
77 		pgste_set_unlock(ptep, pgste);
78 		preempt_enable();
79 	}
80 	pte_unmap_unlock(ptep, ptl);
81 }
82 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
83 
84 /**
85  * gmap_helper_discard() - discard user pages in the given range
86  * @mm: the mm
87  * @vmaddr: starting userspace address
88  * @end: end address (first address outside the range)
89  *
90  * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
91  *
92  * Context: needs to be called while holding the mmap lock.
93  */
gmap_helper_discard(struct mm_struct * mm,unsigned long vmaddr,unsigned long end)94 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
95 {
96 	struct vm_area_struct *vma;
97 
98 	mmap_assert_locked(mm);
99 
100 	while (vmaddr < end) {
101 		vma = find_vma_intersection(mm, vmaddr, end);
102 		if (!vma)
103 			return;
104 		if (!is_vm_hugetlb_page(vma))
105 			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
106 		vmaddr = vma->vm_end;
107 	}
108 }
109 EXPORT_SYMBOL_GPL(gmap_helper_discard);
110 
find_zeropage_pte_entry(pte_t * pte,unsigned long addr,unsigned long end,struct mm_walk * walk)111 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
112 				   unsigned long end, struct mm_walk *walk)
113 {
114 	unsigned long *found_addr = walk->private;
115 
116 	/* Return 1 of the page is a zeropage. */
117 	if (is_zero_pfn(pte_pfn(*pte))) {
118 		/*
119 		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
120 		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
121 		 * currently only works in COW mappings, which is also where
122 		 * mm_forbids_zeropage() is checked.
123 		 */
124 		if (!is_cow_mapping(walk->vma->vm_flags))
125 			return -EFAULT;
126 
127 		*found_addr = addr;
128 		return 1;
129 	}
130 	return 0;
131 }
132 
133 static const struct mm_walk_ops find_zeropage_ops = {
134 	.pte_entry      = find_zeropage_pte_entry,
135 	.walk_lock      = PGWALK_WRLOCK,
136 };
137 
138 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
139  * @mm: the mm whose zero pages are to be unshared
140  *
141  * Unshare all shared zeropages, replacing them by anonymous pages. Note that
142  * we cannot simply zap all shared zeropages, because this could later
143  * trigger unexpected userfaultfd missing events.
144  *
145  * This must be called after mm->context.allow_cow_sharing was
146  * set to 0, to avoid future mappings of shared zeropages.
147  *
148  * mm contracts with s390, that even if mm were to remove a page table,
149  * and racing with walk_page_range_vma() calling pte_offset_map_lock()
150  * would fail, it will never insert a page table containing empty zero
151  * pages once mm_forbids_zeropage(mm) i.e.
152  * mm->context.allow_cow_sharing is set to 0.
153  */
__gmap_helper_unshare_zeropages(struct mm_struct * mm)154 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
155 {
156 	struct vm_area_struct *vma;
157 	VMA_ITERATOR(vmi, mm, 0);
158 	unsigned long addr;
159 	vm_fault_t fault;
160 	int rc;
161 
162 	for_each_vma(vmi, vma) {
163 		/*
164 		 * We could only look at COW mappings, but it's more future
165 		 * proof to catch unexpected zeropages in other mappings and
166 		 * fail.
167 		 */
168 		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
169 			continue;
170 		addr = vma->vm_start;
171 
172 retry:
173 		rc = walk_page_range_vma(vma, addr, vma->vm_end,
174 					 &find_zeropage_ops, &addr);
175 		if (rc < 0)
176 			return rc;
177 		else if (!rc)
178 			continue;
179 
180 		/* addr was updated by find_zeropage_pte_entry() */
181 		fault = handle_mm_fault(vma, addr,
182 					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
183 					NULL);
184 		if (fault & VM_FAULT_OOM)
185 			return -ENOMEM;
186 		/*
187 		 * See break_ksm(): even after handle_mm_fault() returned 0, we
188 		 * must start the lookup from the current address, because
189 		 * handle_mm_fault() may back out if there's any difficulty.
190 		 *
191 		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
192 		 * maybe they could trigger in the future on concurrent
193 		 * truncation. In that case, the shared zeropage would be gone
194 		 * and we can simply retry and make progress.
195 		 */
196 		cond_resched();
197 		goto retry;
198 	}
199 
200 	return 0;
201 }
202 
203 /**
204  * gmap_helper_disable_cow_sharing() - disable all COW sharing
205  *
206  * Disable most COW-sharing of memory pages for the whole process:
207  * (1) Disable KSM and unmerge/unshare any KSM pages.
208  * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
209  *
210  * Not that we currently don't bother with COW-shared pages that are shared
211  * with parent/child processes due to fork().
212  */
gmap_helper_disable_cow_sharing(void)213 int gmap_helper_disable_cow_sharing(void)
214 {
215 	struct mm_struct *mm = current->mm;
216 	int rc;
217 
218 	mmap_assert_write_locked(mm);
219 
220 	if (!mm->context.allow_cow_sharing)
221 		return 0;
222 
223 	mm->context.allow_cow_sharing = 0;
224 
225 	/* Replace all shared zeropages by anonymous pages. */
226 	rc = __gmap_helper_unshare_zeropages(mm);
227 	/*
228 	 * Make sure to disable KSM (if enabled for the whole process or
229 	 * individual VMAs). Note that nothing currently hinders user space
230 	 * from re-enabling it.
231 	 */
232 	if (!rc)
233 		rc = ksm_disable(mm);
234 	if (rc)
235 		mm->context.allow_cow_sharing = 1;
236 	return rc;
237 }
238 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
239