1 /* 2 * mm/mprotect.c 3 * 4 * (C) Copyright 1994 Linus Torvalds 5 * (C) Copyright 2002 Christoph Hellwig 6 * 7 * Address space accounting code <alan@lxorguk.ukuu.org.uk> 8 * (C) Copyright 2002 Red Hat Inc, All Rights Reserved 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/hugetlb.h> 13 #include <linux/shm.h> 14 #include <linux/mman.h> 15 #include <linux/fs.h> 16 #include <linux/highmem.h> 17 #include <linux/security.h> 18 #include <linux/mempolicy.h> 19 #include <linux/personality.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 #include <linux/swapops.h> 23 #include <linux/mmu_notifier.h> 24 #include <linux/migrate.h> 25 #include <linux/perf_event.h> 26 #include <linux/ksm.h> 27 #include <asm/uaccess.h> 28 #include <asm/pgtable.h> 29 #include <asm/cacheflush.h> 30 #include <asm/tlbflush.h> 31 32 #ifndef pgprot_modify 33 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 34 { 35 return newprot; 36 } 37 #endif 38 39 /* 40 * For a prot_numa update we only hold mmap_sem for read so there is a 41 * potential race with faulting where a pmd was temporarily none. This 42 * function checks for a transhuge pmd under the appropriate lock. It 43 * returns a pte if it was successfully locked or NULL if it raced with 44 * a transhuge insertion. 45 */ 46 static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, 47 unsigned long addr, int prot_numa, spinlock_t **ptl) 48 { 49 pte_t *pte; 50 spinlock_t *pmdl; 51 52 /* !prot_numa is protected by mmap_sem held for write */ 53 if (!prot_numa) 54 return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); 55 56 pmdl = pmd_lock(vma->vm_mm, pmd); 57 if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { 58 spin_unlock(pmdl); 59 return NULL; 60 } 61 62 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); 63 spin_unlock(pmdl); 64 return pte; 65 } 66 67 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 68 unsigned long addr, unsigned long end, pgprot_t newprot, 69 int dirty_accountable, int prot_numa) 70 { 71 struct mm_struct *mm = vma->vm_mm; 72 pte_t *pte, oldpte; 73 spinlock_t *ptl; 74 unsigned long pages = 0; 75 76 pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); 77 if (!pte) 78 return 0; 79 80 arch_enter_lazy_mmu_mode(); 81 do { 82 oldpte = *pte; 83 if (pte_present(oldpte)) { 84 pte_t ptent; 85 bool updated = false; 86 87 if (!prot_numa) { 88 ptent = ptep_modify_prot_start(mm, addr, pte); 89 if (pte_numa(ptent)) 90 ptent = pte_mknonnuma(ptent); 91 ptent = pte_modify(ptent, newprot); 92 /* 93 * Avoid taking write faults for pages we 94 * know to be dirty. 95 */ 96 if (dirty_accountable && pte_dirty(ptent)) 97 ptent = pte_mkwrite(ptent); 98 ptep_modify_prot_commit(mm, addr, pte, ptent); 99 updated = true; 100 } else { 101 struct page *page; 102 103 page = vm_normal_page(vma, addr, oldpte); 104 if (page && !PageKsm(page)) { 105 if (!pte_numa(oldpte)) { 106 ptep_set_numa(mm, addr, pte); 107 updated = true; 108 } 109 } 110 } 111 if (updated) 112 pages++; 113 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 114 swp_entry_t entry = pte_to_swp_entry(oldpte); 115 116 if (is_write_migration_entry(entry)) { 117 pte_t newpte; 118 /* 119 * A protection check is difficult so 120 * just be safe and disable write 121 */ 122 make_migration_entry_read(&entry); 123 newpte = swp_entry_to_pte(entry); 124 if (pte_swp_soft_dirty(oldpte)) 125 newpte = pte_swp_mksoft_dirty(newpte); 126 set_pte_at(mm, addr, pte, newpte); 127 128 pages++; 129 } 130 } 131 } while (pte++, addr += PAGE_SIZE, addr != end); 132 arch_leave_lazy_mmu_mode(); 133 pte_unmap_unlock(pte - 1, ptl); 134 135 return pages; 136 } 137 138 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, 139 pud_t *pud, unsigned long addr, unsigned long end, 140 pgprot_t newprot, int dirty_accountable, int prot_numa) 141 { 142 pmd_t *pmd; 143 struct mm_struct *mm = vma->vm_mm; 144 unsigned long next; 145 unsigned long pages = 0; 146 unsigned long nr_huge_updates = 0; 147 unsigned long mni_start = 0; 148 149 pmd = pmd_offset(pud, addr); 150 do { 151 unsigned long this_pages; 152 153 next = pmd_addr_end(addr, end); 154 if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd)) 155 continue; 156 157 /* invoke the mmu notifier if the pmd is populated */ 158 if (!mni_start) { 159 mni_start = addr; 160 mmu_notifier_invalidate_range_start(mm, mni_start, end); 161 } 162 163 if (pmd_trans_huge(*pmd)) { 164 if (next - addr != HPAGE_PMD_SIZE) 165 split_huge_page_pmd(vma, addr, pmd); 166 else { 167 int nr_ptes = change_huge_pmd(vma, pmd, addr, 168 newprot, prot_numa); 169 170 if (nr_ptes) { 171 if (nr_ptes == HPAGE_PMD_NR) { 172 pages += HPAGE_PMD_NR; 173 nr_huge_updates++; 174 } 175 176 /* huge pmd was handled */ 177 continue; 178 } 179 } 180 /* fall through, the trans huge pmd just split */ 181 } 182 this_pages = change_pte_range(vma, pmd, addr, next, newprot, 183 dirty_accountable, prot_numa); 184 pages += this_pages; 185 } while (pmd++, addr = next, addr != end); 186 187 if (mni_start) 188 mmu_notifier_invalidate_range_end(mm, mni_start, end); 189 190 if (nr_huge_updates) 191 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); 192 return pages; 193 } 194 195 static inline unsigned long change_pud_range(struct vm_area_struct *vma, 196 pgd_t *pgd, unsigned long addr, unsigned long end, 197 pgprot_t newprot, int dirty_accountable, int prot_numa) 198 { 199 pud_t *pud; 200 unsigned long next; 201 unsigned long pages = 0; 202 203 pud = pud_offset(pgd, addr); 204 do { 205 next = pud_addr_end(addr, end); 206 if (pud_none_or_clear_bad(pud)) 207 continue; 208 pages += change_pmd_range(vma, pud, addr, next, newprot, 209 dirty_accountable, prot_numa); 210 } while (pud++, addr = next, addr != end); 211 212 return pages; 213 } 214 215 static unsigned long change_protection_range(struct vm_area_struct *vma, 216 unsigned long addr, unsigned long end, pgprot_t newprot, 217 int dirty_accountable, int prot_numa) 218 { 219 struct mm_struct *mm = vma->vm_mm; 220 pgd_t *pgd; 221 unsigned long next; 222 unsigned long start = addr; 223 unsigned long pages = 0; 224 225 BUG_ON(addr >= end); 226 pgd = pgd_offset(mm, addr); 227 flush_cache_range(vma, addr, end); 228 set_tlb_flush_pending(mm); 229 do { 230 next = pgd_addr_end(addr, end); 231 if (pgd_none_or_clear_bad(pgd)) 232 continue; 233 pages += change_pud_range(vma, pgd, addr, next, newprot, 234 dirty_accountable, prot_numa); 235 } while (pgd++, addr = next, addr != end); 236 237 /* Only flush the TLB if we actually modified any entries: */ 238 if (pages) 239 flush_tlb_range(vma, start, end); 240 clear_tlb_flush_pending(mm); 241 242 return pages; 243 } 244 245 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, 246 unsigned long end, pgprot_t newprot, 247 int dirty_accountable, int prot_numa) 248 { 249 unsigned long pages; 250 251 if (is_vm_hugetlb_page(vma)) 252 pages = hugetlb_change_protection(vma, start, end, newprot); 253 else 254 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); 255 256 return pages; 257 } 258 259 int 260 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, 261 unsigned long start, unsigned long end, unsigned long newflags) 262 { 263 struct mm_struct *mm = vma->vm_mm; 264 unsigned long oldflags = vma->vm_flags; 265 long nrpages = (end - start) >> PAGE_SHIFT; 266 unsigned long charged = 0; 267 pgoff_t pgoff; 268 int error; 269 int dirty_accountable = 0; 270 271 if (newflags == oldflags) { 272 *pprev = vma; 273 return 0; 274 } 275 276 /* 277 * If we make a private mapping writable we increase our commit; 278 * but (without finer accounting) cannot reduce our commit if we 279 * make it unwritable again. hugetlb mapping were accounted for 280 * even if read-only so there is no need to account for them here 281 */ 282 if (newflags & VM_WRITE) { 283 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| 284 VM_SHARED|VM_NORESERVE))) { 285 charged = nrpages; 286 if (security_vm_enough_memory_mm(mm, charged)) 287 return -ENOMEM; 288 newflags |= VM_ACCOUNT; 289 } 290 } 291 292 /* 293 * First try to merge with previous and/or next vma. 294 */ 295 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 296 *pprev = vma_merge(mm, *pprev, start, end, newflags, 297 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); 298 if (*pprev) { 299 vma = *pprev; 300 goto success; 301 } 302 303 *pprev = vma; 304 305 if (start != vma->vm_start) { 306 error = split_vma(mm, vma, start, 1); 307 if (error) 308 goto fail; 309 } 310 311 if (end != vma->vm_end) { 312 error = split_vma(mm, vma, end, 0); 313 if (error) 314 goto fail; 315 } 316 317 success: 318 /* 319 * vm_flags and vm_page_prot are protected by the mmap_sem 320 * held in write mode. 321 */ 322 vma->vm_flags = newflags; 323 vma->vm_page_prot = pgprot_modify(vma->vm_page_prot, 324 vm_get_page_prot(newflags)); 325 326 if (vma_wants_writenotify(vma)) { 327 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); 328 dirty_accountable = 1; 329 } 330 331 change_protection(vma, start, end, vma->vm_page_prot, 332 dirty_accountable, 0); 333 334 vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); 335 vm_stat_account(mm, newflags, vma->vm_file, nrpages); 336 perf_event_mmap(vma); 337 return 0; 338 339 fail: 340 vm_unacct_memory(charged); 341 return error; 342 } 343 344 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, 345 unsigned long, prot) 346 { 347 unsigned long vm_flags, nstart, end, tmp, reqprot; 348 struct vm_area_struct *vma, *prev; 349 int error = -EINVAL; 350 const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); 351 prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); 352 if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ 353 return -EINVAL; 354 355 if (start & ~PAGE_MASK) 356 return -EINVAL; 357 if (!len) 358 return 0; 359 len = PAGE_ALIGN(len); 360 end = start + len; 361 if (end <= start) 362 return -ENOMEM; 363 if (!arch_validate_prot(prot)) 364 return -EINVAL; 365 366 reqprot = prot; 367 /* 368 * Does the application expect PROT_READ to imply PROT_EXEC: 369 */ 370 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 371 prot |= PROT_EXEC; 372 373 vm_flags = calc_vm_prot_bits(prot); 374 375 down_write(¤t->mm->mmap_sem); 376 377 vma = find_vma(current->mm, start); 378 error = -ENOMEM; 379 if (!vma) 380 goto out; 381 prev = vma->vm_prev; 382 if (unlikely(grows & PROT_GROWSDOWN)) { 383 if (vma->vm_start >= end) 384 goto out; 385 start = vma->vm_start; 386 error = -EINVAL; 387 if (!(vma->vm_flags & VM_GROWSDOWN)) 388 goto out; 389 } else { 390 if (vma->vm_start > start) 391 goto out; 392 if (unlikely(grows & PROT_GROWSUP)) { 393 end = vma->vm_end; 394 error = -EINVAL; 395 if (!(vma->vm_flags & VM_GROWSUP)) 396 goto out; 397 } 398 } 399 if (start > vma->vm_start) 400 prev = vma; 401 402 for (nstart = start ; ; ) { 403 unsigned long newflags; 404 405 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 406 407 newflags = vm_flags; 408 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); 409 410 /* newflags >> 4 shift VM_MAY% in place of VM_% */ 411 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { 412 error = -EACCES; 413 goto out; 414 } 415 416 error = security_file_mprotect(vma, reqprot, prot); 417 if (error) 418 goto out; 419 420 tmp = vma->vm_end; 421 if (tmp > end) 422 tmp = end; 423 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); 424 if (error) 425 goto out; 426 nstart = tmp; 427 428 if (nstart < prev->vm_end) 429 nstart = prev->vm_end; 430 if (nstart >= end) 431 goto out; 432 433 vma = prev->vm_next; 434 if (!vma || vma->vm_start != nstart) { 435 error = -ENOMEM; 436 goto out; 437 } 438 } 439 out: 440 up_write(¤t->mm->mmap_sem); 441 return error; 442 } 443