1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Helper functions for KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2025 6 */ 7 8 #include <linux/export.h> 9 #include <linux/mm_types.h> 10 #include <linux/mmap_lock.h> 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/swap.h> 14 #include <linux/swapops.h> 15 #include <linux/pagewalk.h> 16 #include <linux/ksm.h> 17 #include <asm/gmap_helpers.h> 18 19 /** 20 * ptep_zap_swap_entry() - discard a swap entry. 21 * @mm: the mm 22 * @entry: the swap entry that needs to be zapped 23 * 24 * Discards the given swap entry. If the swap entry was an actual swap 25 * entry (and not a migration entry, for example), the actual swapped 26 * page is also discarded from swap. 27 */ 28 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 29 { 30 if (!non_swap_entry(entry)) 31 dec_mm_counter(mm, MM_SWAPENTS); 32 else if (is_migration_entry(entry)) 33 dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); 34 free_swap_and_cache(entry); 35 } 36 37 /** 38 * gmap_helper_zap_one_page() - discard a page if it was swapped. 39 * @mm: the mm 40 * @vmaddr: the userspace virtual address that needs to be discarded 41 * 42 * If the given address maps to a swap entry, discard it. 43 * 44 * Context: needs to be called while holding the mmap lock. 45 */ 46 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) 47 { 48 struct vm_area_struct *vma; 49 spinlock_t *ptl; 50 pte_t *ptep; 51 52 mmap_assert_locked(mm); 53 54 /* Find the vm address for the guest address */ 55 vma = vma_lookup(mm, vmaddr); 56 if (!vma || is_vm_hugetlb_page(vma)) 57 return; 58 59 /* Get pointer to the page table entry */ 60 ptep = get_locked_pte(mm, vmaddr, &ptl); 61 if (unlikely(!ptep)) 62 return; 63 if (pte_swap(*ptep)) 64 ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); 65 pte_unmap_unlock(ptep, ptl); 66 } 67 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); 68 69 /** 70 * gmap_helper_discard() - discard user pages in the given range 71 * @mm: the mm 72 * @vmaddr: starting userspace address 73 * @end: end address (first address outside the range) 74 * 75 * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. 76 * 77 * Context: needs to be called while holding the mmap lock. 78 */ 79 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) 80 { 81 struct vm_area_struct *vma; 82 83 mmap_assert_locked(mm); 84 85 while (vmaddr < end) { 86 vma = find_vma_intersection(mm, vmaddr, end); 87 if (!vma) 88 return; 89 if (!is_vm_hugetlb_page(vma)) 90 zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); 91 vmaddr = vma->vm_end; 92 } 93 } 94 EXPORT_SYMBOL_GPL(gmap_helper_discard); 95 96 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, 97 unsigned long end, struct mm_walk *walk) 98 { 99 unsigned long *found_addr = walk->private; 100 101 /* Return 1 of the page is a zeropage. */ 102 if (is_zero_pfn(pte_pfn(*pte))) { 103 /* 104 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the 105 * right thing and likely don't care: FAULT_FLAG_UNSHARE 106 * currently only works in COW mappings, which is also where 107 * mm_forbids_zeropage() is checked. 108 */ 109 if (!is_cow_mapping(walk->vma->vm_flags)) 110 return -EFAULT; 111 112 *found_addr = addr; 113 return 1; 114 } 115 return 0; 116 } 117 118 static const struct mm_walk_ops find_zeropage_ops = { 119 .pte_entry = find_zeropage_pte_entry, 120 .walk_lock = PGWALK_WRLOCK, 121 }; 122 123 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages 124 * @mm: the mm whose zero pages are to be unshared 125 * 126 * Unshare all shared zeropages, replacing them by anonymous pages. Note that 127 * we cannot simply zap all shared zeropages, because this could later 128 * trigger unexpected userfaultfd missing events. 129 * 130 * This must be called after mm->context.allow_cow_sharing was 131 * set to 0, to avoid future mappings of shared zeropages. 132 * 133 * mm contracts with s390, that even if mm were to remove a page table, 134 * and racing with walk_page_range_vma() calling pte_offset_map_lock() 135 * would fail, it will never insert a page table containing empty zero 136 * pages once mm_forbids_zeropage(mm) i.e. 137 * mm->context.allow_cow_sharing is set to 0. 138 */ 139 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) 140 { 141 struct vm_area_struct *vma; 142 VMA_ITERATOR(vmi, mm, 0); 143 unsigned long addr; 144 vm_fault_t fault; 145 int rc; 146 147 for_each_vma(vmi, vma) { 148 /* 149 * We could only look at COW mappings, but it's more future 150 * proof to catch unexpected zeropages in other mappings and 151 * fail. 152 */ 153 if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) 154 continue; 155 addr = vma->vm_start; 156 157 retry: 158 rc = walk_page_range_vma(vma, addr, vma->vm_end, 159 &find_zeropage_ops, &addr); 160 if (rc < 0) 161 return rc; 162 else if (!rc) 163 continue; 164 165 /* addr was updated by find_zeropage_pte_entry() */ 166 fault = handle_mm_fault(vma, addr, 167 FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, 168 NULL); 169 if (fault & VM_FAULT_OOM) 170 return -ENOMEM; 171 /* 172 * See break_ksm(): even after handle_mm_fault() returned 0, we 173 * must start the lookup from the current address, because 174 * handle_mm_fault() may back out if there's any difficulty. 175 * 176 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but 177 * maybe they could trigger in the future on concurrent 178 * truncation. In that case, the shared zeropage would be gone 179 * and we can simply retry and make progress. 180 */ 181 cond_resched(); 182 goto retry; 183 } 184 185 return 0; 186 } 187 188 /** 189 * gmap_helper_disable_cow_sharing() - disable all COW sharing 190 * 191 * Disable most COW-sharing of memory pages for the whole process: 192 * (1) Disable KSM and unmerge/unshare any KSM pages. 193 * (2) Disallow shared zeropages and unshare any zerpages that are mapped. 194 * 195 * Not that we currently don't bother with COW-shared pages that are shared 196 * with parent/child processes due to fork(). 197 */ 198 int gmap_helper_disable_cow_sharing(void) 199 { 200 struct mm_struct *mm = current->mm; 201 int rc; 202 203 mmap_assert_write_locked(mm); 204 205 if (!mm->context.allow_cow_sharing) 206 return 0; 207 208 mm->context.allow_cow_sharing = 0; 209 210 /* Replace all shared zeropages by anonymous pages. */ 211 rc = __gmap_helper_unshare_zeropages(mm); 212 /* 213 * Make sure to disable KSM (if enabled for the whole process or 214 * individual VMAs). Note that nothing currently hinders user space 215 * from re-enabling it. 216 */ 217 if (!rc) 218 rc = ksm_disable(mm); 219 if (rc) 220 mm->context.allow_cow_sharing = 1; 221 return rc; 222 } 223 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); 224