1 /* 2 * mm/mprotect.c 3 * 4 * (C) Copyright 1994 Linus Torvalds 5 * (C) Copyright 2002 Christoph Hellwig 6 * 7 * Address space accounting code <alan@lxorguk.ukuu.org.uk> 8 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/shm.h> 14 #include <linux/mman.h> 15 #include <linux/fs.h> 16 #include <linux/highmem.h> 17 #include <linux/security.h> 18 #include <linux/mempolicy.h> 19 #include <linux/personality.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 #include <linux/swapops.h> 23 #include <linux/mmu_notifier.h> 24 #include <linux/migrate.h> 25 #include <linux/perf_event.h> 26 #include <asm/uaccess.h> 27 #include <asm/pgtable.h> 28 #include <asm/cacheflush.h> 29 #include <asm/tlbflush.h> 30 31 #ifndef pgprot_modify 32 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 33 { 34 return newprot; 35 } 36 #endif 37 38 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 39 unsigned long addr, unsigned long end, pgprot_t newprot, 40 int dirty_accountable, int prot_numa, bool *ret_all_same_node) 41 { 42 struct mm_struct *mm = vma->vm_mm; 43 pte_t *pte, oldpte; 44 spinlock_t *ptl; 45 unsigned long pages = 0; 46 bool all_same_node = true; 47 int last_nid = -1; 48 49 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 50 arch_enter_lazy_mmu_mode(); 51 do { 52 oldpte = *pte; 53 if (pte_present(oldpte)) { 54 pte_t ptent; 55 bool updated = false; 56 57 ptent = ptep_modify_prot_start(mm, addr, pte); 58 if (!prot_numa) { 59 ptent = pte_modify(ptent, newprot); 60 updated = true; 61 } else { 62 struct page *page; 63 64 page = vm_normal_page(vma, addr, oldpte); 65 if (page) { 66 int this_nid = page_to_nid(page); 67 if (last_nid == -1) 68 last_nid = this_nid; 69 if (last_nid != this_nid) 70 all_same_node = false; 71 72 /* only check non-shared pages */ 73 if (!pte_numa(oldpte) && 74 page_mapcount(page) == 1) { 75 ptent = pte_mknuma(ptent); 76 updated = true; 77 } 78 } 79 } 80 81 /* 82 * Avoid taking write faults for pages we know to be 83 * dirty. 84 */ 85 if (dirty_accountable && pte_dirty(ptent)) { 86 ptent = pte_mkwrite(ptent); 87 updated = true; 88 } 89 90 if (updated) 91 pages++; 92 ptep_modify_prot_commit(mm, addr, pte, ptent); 93 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 94 swp_entry_t entry = pte_to_swp_entry(oldpte); 95 96 if (is_write_migration_entry(entry)) { 97 /* 98 * A protection check is difficult so 99 * just be safe and disable write 100 */ 101 make_migration_entry_read(&entry); 102 set_pte_at(mm, addr, pte, 103 swp_entry_to_pte(entry)); 104 } 105 pages++; 106 } 107 } while (pte++, addr += PAGE_SIZE, addr != end); 108 arch_leave_lazy_mmu_mode(); 109 pte_unmap_unlock(pte - 1, ptl); 110 111 *ret_all_same_node = all_same_node; 112 return pages; 113 } 114 115 #ifdef CONFIG_NUMA_BALANCING 116 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, 117 pmd_t *pmd) 118 { 119 spin_lock(&mm->page_table_lock); 120 set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); 121 spin_unlock(&mm->page_table_lock); 122 } 123 #else 124 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, 125 pmd_t *pmd) 126 { 127 BUG(); 128 } 129 #endif /* CONFIG_NUMA_BALANCING */ 130 131 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, 132 unsigned long addr, unsigned long end, pgprot_t newprot, 133 int dirty_accountable, int prot_numa) 134 { 135 pmd_t *pmd; 136 unsigned long next; 137 unsigned long pages = 0; 138 bool all_same_node; 139 140 pmd = pmd_offset(pud, addr); 141 do { 142 next = pmd_addr_end(addr, end); 143 if (pmd_trans_huge(*pmd)) { 144 if (next - addr != HPAGE_PMD_SIZE) 145 split_huge_page_pmd(vma, addr, pmd); 146 else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) { 147 pages += HPAGE_PMD_NR; 148 continue; 149 } 150 /* fall through */ 151 } 152 if (pmd_none_or_clear_bad(pmd)) 153 continue; 154 pages += change_pte_range(vma, pmd, addr, next, newprot, 155 dirty_accountable, prot_numa, &all_same_node); 156 157 /* 158 * If we are changing protections for NUMA hinting faults then 159 * set pmd_numa if the examined pages were all on the same 160 * node. This allows a regular PMD to be handled as one fault 161 * and effectively batches the taking of the PTL 162 */ 163 if (prot_numa && all_same_node) 164 change_pmd_protnuma(vma->vm_mm, addr, pmd); 165 } while (pmd++, addr = next, addr != end); 166 167 return pages; 168 } 169 170 static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 171 unsigned long addr, unsigned long end, pgprot_t newprot, 172 int dirty_accountable, int prot_numa) 173 { 174 pud_t *pud; 175 unsigned long next; 176 unsigned long pages = 0; 177 178 pud = pud_offset(pgd, addr); 179 do { 180 next = pud_addr_end(addr, end); 181 if (pud_none_or_clear_bad(pud)) 182 continue; 183 pages += change_pmd_range(vma, pud, addr, next, newprot, 184 dirty_accountable, prot_numa); 185 } while (pud++, addr = next, addr != end); 186 187 return pages; 188 } 189 190 static unsigned long change_protection_range(struct vm_area_struct *vma, 191 unsigned long addr, unsigned long end, pgprot_t newprot, 192 int dirty_accountable, int prot_numa) 193 { 194 struct mm_struct *mm = vma->vm_mm; 195 pgd_t *pgd; 196 unsigned long next; 197 unsigned long start = addr; 198 unsigned long pages = 0; 199 200 BUG_ON(addr >= end); 201 pgd = pgd_offset(mm, addr); 202 flush_cache_range(vma, addr, end); 203 do { 204 next = pgd_addr_end(addr, end); 205 if (pgd_none_or_clear_bad(pgd)) 206 continue; 207 pages += change_pud_range(vma, pgd, addr, next, newprot, 208 dirty_accountable, prot_numa); 209 } while (pgd++, addr = next, addr != end); 210 211 /* Only flush the TLB if we actually modified any entries: */ 212 if (pages) 213 flush_tlb_range(vma, start, end); 214 215 return pages; 216 } 217 218 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 219 unsigned long end, pgprot_t newprot, 220 int dirty_accountable, int prot_numa) 221 { 222 struct mm_struct *mm = vma->vm_mm; 223 unsigned long pages; 224 225 mmu_notifier_invalidate_range_start(mm, start, end); 226 if (is_vm_hugetlb_page(vma)) 227 pages = hugetlb_change_protection(vma, start, end, newprot); 228 else 229 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); 230 mmu_notifier_invalidate_range_end(mm, start, end); 231 232 return pages; 233 } 234 235 int 236 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, 237 unsigned long start, unsigned long end, unsigned long newflags) 238 { 239 struct mm_struct *mm = vma->vm_mm; 240 unsigned long oldflags = vma->vm_flags; 241 long nrpages = (end - start) >> PAGE_SHIFT; 242 unsigned long charged = 0; 243 pgoff_t pgoff; 244 int error; 245 int dirty_accountable = 0; 246 247 if (newflags == oldflags) { 248 *pprev = vma; 249 return 0; 250 } 251 252 /* 253 * If we make a private mapping writable we increase our commit; 254 * but (without finer accounting) cannot reduce our commit if we 255 * make it unwritable again. hugetlb mapping were accounted for 256 * even if read-only so there is no need to account for them here 257 */ 258 if (newflags & VM_WRITE) { 259 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| 260 VM_SHARED|VM_NORESERVE))) { 261 charged = nrpages; 262 if (security_vm_enough_memory_mm(mm, charged)) 263 return -ENOMEM; 264 newflags |= VM_ACCOUNT; 265 } 266 } 267 268 /* 269 * First try to merge with previous and/or next vma. 270 */ 271 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 272 *pprev = vma_merge(mm, *pprev, start, end, newflags, 273 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); 274 if (*pprev) { 275 vma = *pprev; 276 goto success; 277 } 278 279 *pprev = vma; 280 281 if (start != vma->vm_start) { 282 error = split_vma(mm, vma, start, 1); 283 if (error) 284 goto fail; 285 } 286 287 if (end != vma->vm_end) { 288 error = split_vma(mm, vma, end, 0); 289 if (error) 290 goto fail; 291 } 292 293 success: 294 /* 295 * vm_flags and vm_page_prot are protected by the mmap_sem 296 * held in write mode. 297 */ 298 vma->vm_flags = newflags; 299 vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, 300 vm_get_page_prot(newflags)); 301 302 if (vma_wants_writenotify(vma)) { 303 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); 304 dirty_accountable = 1; 305 } 306 307 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0); 308 309 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 310 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 311 perf_event_mmap(vma); 312 return 0; 313 314 fail: 315 vm_unacct_memory(charged); 316 return error; 317 } 318 319 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 320 unsigned long, prot) 321 { 322 unsigned long vm_flags, nstart, end, tmp, reqprot; 323 struct vm_area_struct *vma, *prev; 324 int error = -EINVAL; 325 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 326 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 327 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 328 return -EINVAL; 329 330 if (start & ~PAGE_MASK) 331 return -EINVAL; 332 if (!len) 333 return 0; 334 len = PAGE_ALIGN(len); 335 end = start + len; 336 if (end <= start) 337 return -ENOMEM; 338 if (!arch_validate_prot(prot)) 339 return -EINVAL; 340 341 reqprot = prot; 342 /* 343 * Does the application expect PROT_READ to imply PROT_EXEC: 344 */ 345 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 346 prot |= PROT_EXEC; 347 348 vm_flags = calc_vm_prot_bits(prot); 349 350 down_write(¤t->mm->mmap_sem); 351 352 vma = find_vma(current->mm, start); 353 error = -ENOMEM; 354 if (!vma) 355 goto out; 356 prev = vma->vm_prev; 357 if (unlikely(grows & PROT_GROWSDOWN)) { 358 if (vma->vm_start >= end) 359 goto out; 360 start = vma->vm_start; 361 error = -EINVAL; 362 if (!(vma->vm_flags & VM_GROWSDOWN)) 363 goto out; 364 } 365 else { 366 if (vma->vm_start > start) 367 goto out; 368 if (unlikely(grows & PROT_GROWSUP)) { 369 end = vma->vm_end; 370 error = -EINVAL; 371 if (!(vma->vm_flags & VM_GROWSUP)) 372 goto out; 373 } 374 } 375 if (start > vma->vm_start) 376 prev = vma; 377 378 for (nstart = start ; ; ) { 379 unsigned long newflags; 380 381 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 382 383 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 384 385 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 386 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { 387 error = -EACCES; 388 goto out; 389 } 390 391 error = security_file_mprotect(vma, reqprot, prot); 392 if (error) 393 goto out; 394 395 tmp = vma->vm_end; 396 if (tmp > end) 397 tmp = end; 398 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); 399 if (error) 400 goto out; 401 nstart = tmp; 402 403 if (nstart < prev->vm_end) 404 nstart = prev->vm_end; 405 if (nstart >= end) 406 goto out; 407 408 vma = prev->vm_next; 409 if (!vma || vma->vm_start != nstart) { 410 error = -ENOMEM; 411 goto out; 412 } 413 } 414 out: 415 up_write(¤t->mm->mmap_sem); 416 return error; 417 } 418