1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Helper functions for KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2025 6 */ 7 #include <linux/mm_types.h> 8 #include <linux/mmap_lock.h> 9 #include <linux/mm.h> 10 #include <linux/hugetlb.h> 11 #include <linux/swap.h> 12 #include <linux/swapops.h> 13 #include <linux/pagewalk.h> 14 #include <linux/ksm.h> 15 #include <asm/gmap_helpers.h> 16 17 /** 18 * ptep_zap_swap_entry() - discard a swap entry. 19 * @mm: the mm 20 * @entry: the swap entry that needs to be zapped 21 * 22 * Discards the given swap entry. If the swap entry was an actual swap 23 * entry (and not a migration entry, for example), the actual swapped 24 * page is also discarded from swap. 25 */ 26 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 27 { 28 if (!non_swap_entry(entry)) 29 dec_mm_counter(mm, MM_SWAPENTS); 30 else if (is_migration_entry(entry)) 31 dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); 32 free_swap_and_cache(entry); 33 } 34 35 /** 36 * gmap_helper_zap_one_page() - discard a page if it was swapped. 37 * @mm: the mm 38 * @vmaddr: the userspace virtual address that needs to be discarded 39 * 40 * If the given address maps to a swap entry, discard it. 41 * 42 * Context: needs to be called while holding the mmap lock. 43 */ 44 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) 45 { 46 struct vm_area_struct *vma; 47 spinlock_t *ptl; 48 pte_t *ptep; 49 50 mmap_assert_locked(mm); 51 52 /* Find the vm address for the guest address */ 53 vma = vma_lookup(mm, vmaddr); 54 if (!vma || is_vm_hugetlb_page(vma)) 55 return; 56 57 /* Get pointer to the page table entry */ 58 ptep = get_locked_pte(mm, vmaddr, &ptl); 59 if (unlikely(!ptep)) 60 return; 61 if (pte_swap(*ptep)) 62 ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); 63 pte_unmap_unlock(ptep, ptl); 64 } 65 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); 66 67 /** 68 * gmap_helper_discard() - discard user pages in the given range 69 * @mm: the mm 70 * @vmaddr: starting userspace address 71 * @end: end address (first address outside the range) 72 * 73 * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. 74 * 75 * Context: needs to be called while holding the mmap lock. 76 */ 77 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) 78 { 79 struct vm_area_struct *vma; 80 81 mmap_assert_locked(mm); 82 83 while (vmaddr < end) { 84 vma = find_vma_intersection(mm, vmaddr, end); 85 if (!vma) 86 return; 87 if (!is_vm_hugetlb_page(vma)) 88 zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); 89 vmaddr = vma->vm_end; 90 } 91 } 92 EXPORT_SYMBOL_GPL(gmap_helper_discard); 93 94 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, 95 unsigned long end, struct mm_walk *walk) 96 { 97 unsigned long *found_addr = walk->private; 98 99 /* Return 1 of the page is a zeropage. */ 100 if (is_zero_pfn(pte_pfn(*pte))) { 101 /* 102 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the 103 * right thing and likely don't care: FAULT_FLAG_UNSHARE 104 * currently only works in COW mappings, which is also where 105 * mm_forbids_zeropage() is checked. 106 */ 107 if (!is_cow_mapping(walk->vma->vm_flags)) 108 return -EFAULT; 109 110 *found_addr = addr; 111 return 1; 112 } 113 return 0; 114 } 115 116 static const struct mm_walk_ops find_zeropage_ops = { 117 .pte_entry = find_zeropage_pte_entry, 118 .walk_lock = PGWALK_WRLOCK, 119 }; 120 121 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages 122 * @mm: the mm whose zero pages are to be unshared 123 * 124 * Unshare all shared zeropages, replacing them by anonymous pages. Note that 125 * we cannot simply zap all shared zeropages, because this could later 126 * trigger unexpected userfaultfd missing events. 127 * 128 * This must be called after mm->context.allow_cow_sharing was 129 * set to 0, to avoid future mappings of shared zeropages. 130 * 131 * mm contracts with s390, that even if mm were to remove a page table, 132 * and racing with walk_page_range_vma() calling pte_offset_map_lock() 133 * would fail, it will never insert a page table containing empty zero 134 * pages once mm_forbids_zeropage(mm) i.e. 135 * mm->context.allow_cow_sharing is set to 0. 136 */ 137 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) 138 { 139 struct vm_area_struct *vma; 140 VMA_ITERATOR(vmi, mm, 0); 141 unsigned long addr; 142 vm_fault_t fault; 143 int rc; 144 145 for_each_vma(vmi, vma) { 146 /* 147 * We could only look at COW mappings, but it's more future 148 * proof to catch unexpected zeropages in other mappings and 149 * fail. 150 */ 151 if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) 152 continue; 153 addr = vma->vm_start; 154 155 retry: 156 rc = walk_page_range_vma(vma, addr, vma->vm_end, 157 &find_zeropage_ops, &addr); 158 if (rc < 0) 159 return rc; 160 else if (!rc) 161 continue; 162 163 /* addr was updated by find_zeropage_pte_entry() */ 164 fault = handle_mm_fault(vma, addr, 165 FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, 166 NULL); 167 if (fault & VM_FAULT_OOM) 168 return -ENOMEM; 169 /* 170 * See break_ksm(): even after handle_mm_fault() returned 0, we 171 * must start the lookup from the current address, because 172 * handle_mm_fault() may back out if there's any difficulty. 173 * 174 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but 175 * maybe they could trigger in the future on concurrent 176 * truncation. In that case, the shared zeropage would be gone 177 * and we can simply retry and make progress. 178 */ 179 cond_resched(); 180 goto retry; 181 } 182 183 return 0; 184 } 185 186 /** 187 * gmap_helper_disable_cow_sharing() - disable all COW sharing 188 * 189 * Disable most COW-sharing of memory pages for the whole process: 190 * (1) Disable KSM and unmerge/unshare any KSM pages. 191 * (2) Disallow shared zeropages and unshare any zerpages that are mapped. 192 * 193 * Not that we currently don't bother with COW-shared pages that are shared 194 * with parent/child processes due to fork(). 195 */ 196 int gmap_helper_disable_cow_sharing(void) 197 { 198 struct mm_struct *mm = current->mm; 199 int rc; 200 201 mmap_assert_write_locked(mm); 202 203 if (!mm->context.allow_cow_sharing) 204 return 0; 205 206 mm->context.allow_cow_sharing = 0; 207 208 /* Replace all shared zeropages by anonymous pages. */ 209 rc = __gmap_helper_unshare_zeropages(mm); 210 /* 211 * Make sure to disable KSM (if enabled for the whole process or 212 * individual VMAs). Note that nothing currently hinders user space 213 * from re-enabling it. 214 */ 215 if (!rc) 216 rc = ksm_disable(mm); 217 if (rc) 218 mm->context.allow_cow_sharing = 1; 219 return rc; 220 } 221 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); 222