xref: /linux/arch/s390/mm/gmap_helpers.c (revision 7f9039c524a351c684149ecf1b3c5145a0dff2fe)
1*20019790SClaudio Imbrenda // SPDX-License-Identifier: GPL-2.0
2*20019790SClaudio Imbrenda /*
3*20019790SClaudio Imbrenda  *  Helper functions for KVM guest address space mapping code
4*20019790SClaudio Imbrenda  *
5*20019790SClaudio Imbrenda  *    Copyright IBM Corp. 2007, 2025
6*20019790SClaudio Imbrenda  */
7*20019790SClaudio Imbrenda #include <linux/mm_types.h>
8*20019790SClaudio Imbrenda #include <linux/mmap_lock.h>
9*20019790SClaudio Imbrenda #include <linux/mm.h>
10*20019790SClaudio Imbrenda #include <linux/hugetlb.h>
11*20019790SClaudio Imbrenda #include <linux/swap.h>
12*20019790SClaudio Imbrenda #include <linux/swapops.h>
13*20019790SClaudio Imbrenda #include <linux/pagewalk.h>
14*20019790SClaudio Imbrenda #include <linux/ksm.h>
15*20019790SClaudio Imbrenda #include <asm/gmap_helpers.h>
16*20019790SClaudio Imbrenda 
17*20019790SClaudio Imbrenda /**
18*20019790SClaudio Imbrenda  * ptep_zap_swap_entry() - discard a swap entry.
19*20019790SClaudio Imbrenda  * @mm: the mm
20*20019790SClaudio Imbrenda  * @entry: the swap entry that needs to be zapped
21*20019790SClaudio Imbrenda  *
22*20019790SClaudio Imbrenda  * Discards the given swap entry. If the swap entry was an actual swap
23*20019790SClaudio Imbrenda  * entry (and not a migration entry, for example), the actual swapped
24*20019790SClaudio Imbrenda  * page is also discarded from swap.
25*20019790SClaudio Imbrenda  */
26*20019790SClaudio Imbrenda static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
27*20019790SClaudio Imbrenda {
28*20019790SClaudio Imbrenda 	if (!non_swap_entry(entry))
29*20019790SClaudio Imbrenda 		dec_mm_counter(mm, MM_SWAPENTS);
30*20019790SClaudio Imbrenda 	else if (is_migration_entry(entry))
31*20019790SClaudio Imbrenda 		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
32*20019790SClaudio Imbrenda 	free_swap_and_cache(entry);
33*20019790SClaudio Imbrenda }
34*20019790SClaudio Imbrenda 
35*20019790SClaudio Imbrenda /**
36*20019790SClaudio Imbrenda  * gmap_helper_zap_one_page() - discard a page if it was swapped.
37*20019790SClaudio Imbrenda  * @mm: the mm
38*20019790SClaudio Imbrenda  * @vmaddr: the userspace virtual address that needs to be discarded
39*20019790SClaudio Imbrenda  *
40*20019790SClaudio Imbrenda  * If the given address maps to a swap entry, discard it.
41*20019790SClaudio Imbrenda  *
42*20019790SClaudio Imbrenda  * Context: needs to be called while holding the mmap lock.
43*20019790SClaudio Imbrenda  */
44*20019790SClaudio Imbrenda void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
45*20019790SClaudio Imbrenda {
46*20019790SClaudio Imbrenda 	struct vm_area_struct *vma;
47*20019790SClaudio Imbrenda 	spinlock_t *ptl;
48*20019790SClaudio Imbrenda 	pte_t *ptep;
49*20019790SClaudio Imbrenda 
50*20019790SClaudio Imbrenda 	mmap_assert_locked(mm);
51*20019790SClaudio Imbrenda 
52*20019790SClaudio Imbrenda 	/* Find the vm address for the guest address */
53*20019790SClaudio Imbrenda 	vma = vma_lookup(mm, vmaddr);
54*20019790SClaudio Imbrenda 	if (!vma || is_vm_hugetlb_page(vma))
55*20019790SClaudio Imbrenda 		return;
56*20019790SClaudio Imbrenda 
57*20019790SClaudio Imbrenda 	/* Get pointer to the page table entry */
58*20019790SClaudio Imbrenda 	ptep = get_locked_pte(mm, vmaddr, &ptl);
59*20019790SClaudio Imbrenda 	if (unlikely(!ptep))
60*20019790SClaudio Imbrenda 		return;
61*20019790SClaudio Imbrenda 	if (pte_swap(*ptep))
62*20019790SClaudio Imbrenda 		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
63*20019790SClaudio Imbrenda 	pte_unmap_unlock(ptep, ptl);
64*20019790SClaudio Imbrenda }
65*20019790SClaudio Imbrenda EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
66*20019790SClaudio Imbrenda 
67*20019790SClaudio Imbrenda /**
68*20019790SClaudio Imbrenda  * gmap_helper_discard() - discard user pages in the given range
69*20019790SClaudio Imbrenda  * @mm: the mm
70*20019790SClaudio Imbrenda  * @vmaddr: starting userspace address
71*20019790SClaudio Imbrenda  * @end: end address (first address outside the range)
72*20019790SClaudio Imbrenda  *
73*20019790SClaudio Imbrenda  * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
74*20019790SClaudio Imbrenda  *
75*20019790SClaudio Imbrenda  * Context: needs to be called while holding the mmap lock.
76*20019790SClaudio Imbrenda  */
77*20019790SClaudio Imbrenda void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
78*20019790SClaudio Imbrenda {
79*20019790SClaudio Imbrenda 	struct vm_area_struct *vma;
80*20019790SClaudio Imbrenda 
81*20019790SClaudio Imbrenda 	mmap_assert_locked(mm);
82*20019790SClaudio Imbrenda 
83*20019790SClaudio Imbrenda 	while (vmaddr < end) {
84*20019790SClaudio Imbrenda 		vma = find_vma_intersection(mm, vmaddr, end);
85*20019790SClaudio Imbrenda 		if (!vma)
86*20019790SClaudio Imbrenda 			return;
87*20019790SClaudio Imbrenda 		if (!is_vm_hugetlb_page(vma))
88*20019790SClaudio Imbrenda 			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
89*20019790SClaudio Imbrenda 		vmaddr = vma->vm_end;
90*20019790SClaudio Imbrenda 	}
91*20019790SClaudio Imbrenda }
92*20019790SClaudio Imbrenda EXPORT_SYMBOL_GPL(gmap_helper_discard);
93*20019790SClaudio Imbrenda 
94*20019790SClaudio Imbrenda static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
95*20019790SClaudio Imbrenda 				   unsigned long end, struct mm_walk *walk)
96*20019790SClaudio Imbrenda {
97*20019790SClaudio Imbrenda 	unsigned long *found_addr = walk->private;
98*20019790SClaudio Imbrenda 
99*20019790SClaudio Imbrenda 	/* Return 1 of the page is a zeropage. */
100*20019790SClaudio Imbrenda 	if (is_zero_pfn(pte_pfn(*pte))) {
101*20019790SClaudio Imbrenda 		/*
102*20019790SClaudio Imbrenda 		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
103*20019790SClaudio Imbrenda 		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
104*20019790SClaudio Imbrenda 		 * currently only works in COW mappings, which is also where
105*20019790SClaudio Imbrenda 		 * mm_forbids_zeropage() is checked.
106*20019790SClaudio Imbrenda 		 */
107*20019790SClaudio Imbrenda 		if (!is_cow_mapping(walk->vma->vm_flags))
108*20019790SClaudio Imbrenda 			return -EFAULT;
109*20019790SClaudio Imbrenda 
110*20019790SClaudio Imbrenda 		*found_addr = addr;
111*20019790SClaudio Imbrenda 		return 1;
112*20019790SClaudio Imbrenda 	}
113*20019790SClaudio Imbrenda 	return 0;
114*20019790SClaudio Imbrenda }
115*20019790SClaudio Imbrenda 
116*20019790SClaudio Imbrenda static const struct mm_walk_ops find_zeropage_ops = {
117*20019790SClaudio Imbrenda 	.pte_entry      = find_zeropage_pte_entry,
118*20019790SClaudio Imbrenda 	.walk_lock      = PGWALK_WRLOCK,
119*20019790SClaudio Imbrenda };
120*20019790SClaudio Imbrenda 
121*20019790SClaudio Imbrenda /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
122*20019790SClaudio Imbrenda  * @mm: the mm whose zero pages are to be unshared
123*20019790SClaudio Imbrenda  *
124*20019790SClaudio Imbrenda  * Unshare all shared zeropages, replacing them by anonymous pages. Note that
125*20019790SClaudio Imbrenda  * we cannot simply zap all shared zeropages, because this could later
126*20019790SClaudio Imbrenda  * trigger unexpected userfaultfd missing events.
127*20019790SClaudio Imbrenda  *
128*20019790SClaudio Imbrenda  * This must be called after mm->context.allow_cow_sharing was
129*20019790SClaudio Imbrenda  * set to 0, to avoid future mappings of shared zeropages.
130*20019790SClaudio Imbrenda  *
131*20019790SClaudio Imbrenda  * mm contracts with s390, that even if mm were to remove a page table,
132*20019790SClaudio Imbrenda  * and racing with walk_page_range_vma() calling pte_offset_map_lock()
133*20019790SClaudio Imbrenda  * would fail, it will never insert a page table containing empty zero
134*20019790SClaudio Imbrenda  * pages once mm_forbids_zeropage(mm) i.e.
135*20019790SClaudio Imbrenda  * mm->context.allow_cow_sharing is set to 0.
136*20019790SClaudio Imbrenda  */
137*20019790SClaudio Imbrenda static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
138*20019790SClaudio Imbrenda {
139*20019790SClaudio Imbrenda 	struct vm_area_struct *vma;
140*20019790SClaudio Imbrenda 	VMA_ITERATOR(vmi, mm, 0);
141*20019790SClaudio Imbrenda 	unsigned long addr;
142*20019790SClaudio Imbrenda 	vm_fault_t fault;
143*20019790SClaudio Imbrenda 	int rc;
144*20019790SClaudio Imbrenda 
145*20019790SClaudio Imbrenda 	for_each_vma(vmi, vma) {
146*20019790SClaudio Imbrenda 		/*
147*20019790SClaudio Imbrenda 		 * We could only look at COW mappings, but it's more future
148*20019790SClaudio Imbrenda 		 * proof to catch unexpected zeropages in other mappings and
149*20019790SClaudio Imbrenda 		 * fail.
150*20019790SClaudio Imbrenda 		 */
151*20019790SClaudio Imbrenda 		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
152*20019790SClaudio Imbrenda 			continue;
153*20019790SClaudio Imbrenda 		addr = vma->vm_start;
154*20019790SClaudio Imbrenda 
155*20019790SClaudio Imbrenda retry:
156*20019790SClaudio Imbrenda 		rc = walk_page_range_vma(vma, addr, vma->vm_end,
157*20019790SClaudio Imbrenda 					 &find_zeropage_ops, &addr);
158*20019790SClaudio Imbrenda 		if (rc < 0)
159*20019790SClaudio Imbrenda 			return rc;
160*20019790SClaudio Imbrenda 		else if (!rc)
161*20019790SClaudio Imbrenda 			continue;
162*20019790SClaudio Imbrenda 
163*20019790SClaudio Imbrenda 		/* addr was updated by find_zeropage_pte_entry() */
164*20019790SClaudio Imbrenda 		fault = handle_mm_fault(vma, addr,
165*20019790SClaudio Imbrenda 					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
166*20019790SClaudio Imbrenda 					NULL);
167*20019790SClaudio Imbrenda 		if (fault & VM_FAULT_OOM)
168*20019790SClaudio Imbrenda 			return -ENOMEM;
169*20019790SClaudio Imbrenda 		/*
170*20019790SClaudio Imbrenda 		 * See break_ksm(): even after handle_mm_fault() returned 0, we
171*20019790SClaudio Imbrenda 		 * must start the lookup from the current address, because
172*20019790SClaudio Imbrenda 		 * handle_mm_fault() may back out if there's any difficulty.
173*20019790SClaudio Imbrenda 		 *
174*20019790SClaudio Imbrenda 		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
175*20019790SClaudio Imbrenda 		 * maybe they could trigger in the future on concurrent
176*20019790SClaudio Imbrenda 		 * truncation. In that case, the shared zeropage would be gone
177*20019790SClaudio Imbrenda 		 * and we can simply retry and make progress.
178*20019790SClaudio Imbrenda 		 */
179*20019790SClaudio Imbrenda 		cond_resched();
180*20019790SClaudio Imbrenda 		goto retry;
181*20019790SClaudio Imbrenda 	}
182*20019790SClaudio Imbrenda 
183*20019790SClaudio Imbrenda 	return 0;
184*20019790SClaudio Imbrenda }
185*20019790SClaudio Imbrenda 
186*20019790SClaudio Imbrenda /**
187*20019790SClaudio Imbrenda  * gmap_helper_disable_cow_sharing() - disable all COW sharing
188*20019790SClaudio Imbrenda  *
189*20019790SClaudio Imbrenda  * Disable most COW-sharing of memory pages for the whole process:
190*20019790SClaudio Imbrenda  * (1) Disable KSM and unmerge/unshare any KSM pages.
191*20019790SClaudio Imbrenda  * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
192*20019790SClaudio Imbrenda  *
193*20019790SClaudio Imbrenda  * Not that we currently don't bother with COW-shared pages that are shared
194*20019790SClaudio Imbrenda  * with parent/child processes due to fork().
195*20019790SClaudio Imbrenda  */
196*20019790SClaudio Imbrenda int gmap_helper_disable_cow_sharing(void)
197*20019790SClaudio Imbrenda {
198*20019790SClaudio Imbrenda 	struct mm_struct *mm = current->mm;
199*20019790SClaudio Imbrenda 	int rc;
200*20019790SClaudio Imbrenda 
201*20019790SClaudio Imbrenda 	mmap_assert_write_locked(mm);
202*20019790SClaudio Imbrenda 
203*20019790SClaudio Imbrenda 	if (!mm->context.allow_cow_sharing)
204*20019790SClaudio Imbrenda 		return 0;
205*20019790SClaudio Imbrenda 
206*20019790SClaudio Imbrenda 	mm->context.allow_cow_sharing = 0;
207*20019790SClaudio Imbrenda 
208*20019790SClaudio Imbrenda 	/* Replace all shared zeropages by anonymous pages. */
209*20019790SClaudio Imbrenda 	rc = __gmap_helper_unshare_zeropages(mm);
210*20019790SClaudio Imbrenda 	/*
211*20019790SClaudio Imbrenda 	 * Make sure to disable KSM (if enabled for the whole process or
212*20019790SClaudio Imbrenda 	 * individual VMAs). Note that nothing currently hinders user space
213*20019790SClaudio Imbrenda 	 * from re-enabling it.
214*20019790SClaudio Imbrenda 	 */
215*20019790SClaudio Imbrenda 	if (!rc)
216*20019790SClaudio Imbrenda 		rc = ksm_disable(mm);
217*20019790SClaudio Imbrenda 	if (rc)
218*20019790SClaudio Imbrenda 		mm->context.allow_cow_sharing = 1;
219*20019790SClaudio Imbrenda 	return rc;
220*20019790SClaudio Imbrenda }
221*20019790SClaudio Imbrenda EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
222