1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Helper functions for KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2025 6 */ 7 8 #include <linux/export.h> 9 #include <linux/mm_types.h> 10 #include <linux/mmap_lock.h> 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/swap.h> 14 #include <linux/leafops.h> 15 #include <linux/pagewalk.h> 16 #include <linux/ksm.h> 17 #include <asm/gmap_helpers.h> 18 #include <asm/pgtable.h> 19 20 /** 21 * ptep_zap_softleaf_entry() - discard a software leaf entry. 22 * @mm: the mm 23 * @entry: the software leaf entry that needs to be zapped 24 * 25 * Discards the given software leaf entry. If the leaf entry was an actual 26 * swap entry (and not a migration entry, for example), the actual swapped 27 * page is also discarded from swap. 28 */ 29 static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) 30 { 31 if (softleaf_is_swap(entry)) 32 dec_mm_counter(mm, MM_SWAPENTS); 33 else if (softleaf_is_migration(entry)) 34 dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry))); 35 free_swap_and_cache(entry); 36 } 37 38 /** 39 * gmap_helper_zap_one_page() - discard a page if it was swapped. 40 * @mm: the mm 41 * @vmaddr: the userspace virtual address that needs to be discarded 42 * 43 * If the given address maps to a swap entry, discard it. 44 * 45 * Context: needs to be called while holding the mmap lock. 46 */ 47 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) 48 { 49 struct vm_area_struct *vma; 50 unsigned long pgstev; 51 spinlock_t *ptl; 52 pgste_t pgste; 53 pte_t *ptep; 54 55 mmap_assert_locked(mm); 56 57 /* Find the vm address for the guest address */ 58 vma = vma_lookup(mm, vmaddr); 59 if (!vma || is_vm_hugetlb_page(vma)) 60 return; 61 62 /* Get pointer to the page table entry */ 63 ptep = get_locked_pte(mm, vmaddr, &ptl); 64 if (unlikely(!ptep)) 65 return; 66 if (pte_swap(*ptep)) { 67 preempt_disable(); 68 pgste = pgste_get_lock(ptep); 69 pgstev = pgste_val(pgste); 70 71 if ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 72 (pgstev & _PGSTE_GPS_ZERO)) { 73 ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); 74 pte_clear(mm, vmaddr, ptep); 75 } 76 77 pgste_set_unlock(ptep, pgste); 78 preempt_enable(); 79 } 80 pte_unmap_unlock(ptep, ptl); 81 } 82 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); 83 84 /** 85 * gmap_helper_discard() - discard user pages in the given range 86 * @mm: the mm 87 * @vmaddr: starting userspace address 88 * @end: end address (first address outside the range) 89 * 90 * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. 91 * 92 * Context: needs to be called while holding the mmap lock. 93 */ 94 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) 95 { 96 struct vm_area_struct *vma; 97 98 mmap_assert_locked(mm); 99 100 while (vmaddr < end) { 101 vma = find_vma_intersection(mm, vmaddr, end); 102 if (!vma) 103 return; 104 if (!is_vm_hugetlb_page(vma)) 105 zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); 106 vmaddr = vma->vm_end; 107 } 108 } 109 EXPORT_SYMBOL_GPL(gmap_helper_discard); 110 111 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, 112 unsigned long end, struct mm_walk *walk) 113 { 114 unsigned long *found_addr = walk->private; 115 116 /* Return 1 of the page is a zeropage. */ 117 if (is_zero_pfn(pte_pfn(*pte))) { 118 /* 119 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the 120 * right thing and likely don't care: FAULT_FLAG_UNSHARE 121 * currently only works in COW mappings, which is also where 122 * mm_forbids_zeropage() is checked. 123 */ 124 if (!is_cow_mapping(walk->vma->vm_flags)) 125 return -EFAULT; 126 127 *found_addr = addr; 128 return 1; 129 } 130 return 0; 131 } 132 133 static const struct mm_walk_ops find_zeropage_ops = { 134 .pte_entry = find_zeropage_pte_entry, 135 .walk_lock = PGWALK_WRLOCK, 136 }; 137 138 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages 139 * @mm: the mm whose zero pages are to be unshared 140 * 141 * Unshare all shared zeropages, replacing them by anonymous pages. Note that 142 * we cannot simply zap all shared zeropages, because this could later 143 * trigger unexpected userfaultfd missing events. 144 * 145 * This must be called after mm->context.allow_cow_sharing was 146 * set to 0, to avoid future mappings of shared zeropages. 147 * 148 * mm contracts with s390, that even if mm were to remove a page table, 149 * and racing with walk_page_range_vma() calling pte_offset_map_lock() 150 * would fail, it will never insert a page table containing empty zero 151 * pages once mm_forbids_zeropage(mm) i.e. 152 * mm->context.allow_cow_sharing is set to 0. 153 */ 154 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) 155 { 156 struct vm_area_struct *vma; 157 VMA_ITERATOR(vmi, mm, 0); 158 unsigned long addr; 159 vm_fault_t fault; 160 int rc; 161 162 for_each_vma(vmi, vma) { 163 /* 164 * We could only look at COW mappings, but it's more future 165 * proof to catch unexpected zeropages in other mappings and 166 * fail. 167 */ 168 if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) 169 continue; 170 addr = vma->vm_start; 171 172 retry: 173 rc = walk_page_range_vma(vma, addr, vma->vm_end, 174 &find_zeropage_ops, &addr); 175 if (rc < 0) 176 return rc; 177 else if (!rc) 178 continue; 179 180 /* addr was updated by find_zeropage_pte_entry() */ 181 fault = handle_mm_fault(vma, addr, 182 FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, 183 NULL); 184 if (fault & VM_FAULT_OOM) 185 return -ENOMEM; 186 /* 187 * See break_ksm(): even after handle_mm_fault() returned 0, we 188 * must start the lookup from the current address, because 189 * handle_mm_fault() may back out if there's any difficulty. 190 * 191 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but 192 * maybe they could trigger in the future on concurrent 193 * truncation. In that case, the shared zeropage would be gone 194 * and we can simply retry and make progress. 195 */ 196 cond_resched(); 197 goto retry; 198 } 199 200 return 0; 201 } 202 203 /** 204 * gmap_helper_disable_cow_sharing() - disable all COW sharing 205 * 206 * Disable most COW-sharing of memory pages for the whole process: 207 * (1) Disable KSM and unmerge/unshare any KSM pages. 208 * (2) Disallow shared zeropages and unshare any zerpages that are mapped. 209 * 210 * Not that we currently don't bother with COW-shared pages that are shared 211 * with parent/child processes due to fork(). 212 */ 213 int gmap_helper_disable_cow_sharing(void) 214 { 215 struct mm_struct *mm = current->mm; 216 int rc; 217 218 mmap_assert_write_locked(mm); 219 220 if (!mm->context.allow_cow_sharing) 221 return 0; 222 223 mm->context.allow_cow_sharing = 0; 224 225 /* Replace all shared zeropages by anonymous pages. */ 226 rc = __gmap_helper_unshare_zeropages(mm); 227 /* 228 * Make sure to disable KSM (if enabled for the whole process or 229 * individual VMAs). Note that nothing currently hinders user space 230 * from re-enabling it. 231 */ 232 if (!rc) 233 rc = ksm_disable(mm); 234 if (rc) 235 mm->context.allow_cow_sharing = 1; 236 return rc; 237 } 238 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); 239