1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/mm/mlock.c 4 * 5 * (C) Copyright 1995 Linus Torvalds 6 * (C) Copyright 2002 Christoph Hellwig 7 */ 8 9 #include <linux/capability.h> 10 #include <linux/mman.h> 11 #include <linux/mm.h> 12 #include <linux/sched/user.h> 13 #include <linux/swap.h> 14 #include <linux/swapops.h> 15 #include <linux/pagemap.h> 16 #include <linux/pagevec.h> 17 #include <linux/pagewalk.h> 18 #include <linux/mempolicy.h> 19 #include <linux/syscalls.h> 20 #include <linux/sched.h> 21 #include <linux/export.h> 22 #include <linux/rmap.h> 23 #include <linux/mmzone.h> 24 #include <linux/hugetlb.h> 25 #include <linux/memcontrol.h> 26 #include <linux/mm_inline.h> 27 #include <linux/secretmem.h> 28 29 #include "internal.h" 30 31 static DEFINE_PER_CPU(struct pagevec, mlock_pvec); 32 33 bool can_do_mlock(void) 34 { 35 if (rlimit(RLIMIT_MEMLOCK) != 0) 36 return true; 37 if (capable(CAP_IPC_LOCK)) 38 return true; 39 return false; 40 } 41 EXPORT_SYMBOL(can_do_mlock); 42 43 /* 44 * Mlocked pages are marked with PageMlocked() flag for efficient testing 45 * in vmscan and, possibly, the fault path; and to support semi-accurate 46 * statistics. 47 * 48 * An mlocked page [PageMlocked(page)] is unevictable. As such, it will 49 * be placed on the LRU "unevictable" list, rather than the [in]active lists. 50 * The unevictable list is an LRU sibling list to the [in]active lists. 51 * PageUnevictable is set to indicate the unevictable state. 52 */ 53 54 static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec) 55 { 56 /* There is nothing more we can do while it's off LRU */ 57 if (!TestClearPageLRU(page)) 58 return lruvec; 59 60 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 61 62 if (unlikely(page_evictable(page))) { 63 /* 64 * This is a little surprising, but quite possible: 65 * PageMlocked must have got cleared already by another CPU. 66 * Could this page be on the Unevictable LRU? I'm not sure, 67 * but move it now if so. 68 */ 69 if (PageUnevictable(page)) { 70 del_page_from_lru_list(page, lruvec); 71 ClearPageUnevictable(page); 72 add_page_to_lru_list(page, lruvec); 73 __count_vm_events(UNEVICTABLE_PGRESCUED, 74 thp_nr_pages(page)); 75 } 76 goto out; 77 } 78 79 if (PageUnevictable(page)) { 80 if (PageMlocked(page)) 81 page->mlock_count++; 82 goto out; 83 } 84 85 del_page_from_lru_list(page, lruvec); 86 ClearPageActive(page); 87 SetPageUnevictable(page); 88 page->mlock_count = !!PageMlocked(page); 89 add_page_to_lru_list(page, lruvec); 90 __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); 91 out: 92 SetPageLRU(page); 93 return lruvec; 94 } 95 96 static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec) 97 { 98 VM_BUG_ON_PAGE(PageLRU(page), page); 99 100 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 101 102 /* As above, this is a little surprising, but possible */ 103 if (unlikely(page_evictable(page))) 104 goto out; 105 106 SetPageUnevictable(page); 107 page->mlock_count = !!PageMlocked(page); 108 __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); 109 out: 110 add_page_to_lru_list(page, lruvec); 111 SetPageLRU(page); 112 return lruvec; 113 } 114 115 static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec) 116 { 117 int nr_pages = thp_nr_pages(page); 118 bool isolated = false; 119 120 if (!TestClearPageLRU(page)) 121 goto munlock; 122 123 isolated = true; 124 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 125 126 if (PageUnevictable(page)) { 127 /* Then mlock_count is maintained, but might undercount */ 128 if (page->mlock_count) 129 page->mlock_count--; 130 if (page->mlock_count) 131 goto out; 132 } 133 /* else assume that was the last mlock: reclaim will fix it if not */ 134 135 munlock: 136 if (TestClearPageMlocked(page)) { 137 __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); 138 if (isolated || !PageUnevictable(page)) 139 __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); 140 else 141 __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); 142 } 143 144 /* page_evictable() has to be checked *after* clearing Mlocked */ 145 if (isolated && PageUnevictable(page) && page_evictable(page)) { 146 del_page_from_lru_list(page, lruvec); 147 ClearPageUnevictable(page); 148 add_page_to_lru_list(page, lruvec); 149 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); 150 } 151 out: 152 if (isolated) 153 SetPageLRU(page); 154 return lruvec; 155 } 156 157 /* 158 * Flags held in the low bits of a struct page pointer on the mlock_pvec. 159 */ 160 #define LRU_PAGE 0x1 161 #define NEW_PAGE 0x2 162 static inline struct page *mlock_lru(struct page *page) 163 { 164 return (struct page *)((unsigned long)page + LRU_PAGE); 165 } 166 167 static inline struct page *mlock_new(struct page *page) 168 { 169 return (struct page *)((unsigned long)page + NEW_PAGE); 170 } 171 172 /* 173 * mlock_pagevec() is derived from pagevec_lru_move_fn(): 174 * perhaps that can make use of such page pointer flags in future, 175 * but for now just keep it for mlock. We could use three separate 176 * pagevecs instead, but one feels better (munlocking a full pagevec 177 * does not need to drain mlocking pagevecs first). 178 */ 179 static void mlock_pagevec(struct pagevec *pvec) 180 { 181 struct lruvec *lruvec = NULL; 182 unsigned long mlock; 183 struct page *page; 184 int i; 185 186 for (i = 0; i < pagevec_count(pvec); i++) { 187 page = pvec->pages[i]; 188 mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE); 189 page = (struct page *)((unsigned long)page - mlock); 190 pvec->pages[i] = page; 191 192 if (mlock & LRU_PAGE) 193 lruvec = __mlock_page(page, lruvec); 194 else if (mlock & NEW_PAGE) 195 lruvec = __mlock_new_page(page, lruvec); 196 else 197 lruvec = __munlock_page(page, lruvec); 198 } 199 200 if (lruvec) 201 unlock_page_lruvec_irq(lruvec); 202 release_pages(pvec->pages, pvec->nr); 203 pagevec_reinit(pvec); 204 } 205 206 void mlock_page_drain(int cpu) 207 { 208 struct pagevec *pvec; 209 210 pvec = &per_cpu(mlock_pvec, cpu); 211 if (pagevec_count(pvec)) 212 mlock_pagevec(pvec); 213 } 214 215 bool need_mlock_page_drain(int cpu) 216 { 217 return pagevec_count(&per_cpu(mlock_pvec, cpu)); 218 } 219 220 /** 221 * mlock_folio - mlock a folio already on (or temporarily off) LRU 222 * @folio: folio to be mlocked. 223 */ 224 void mlock_folio(struct folio *folio) 225 { 226 struct pagevec *pvec = &get_cpu_var(mlock_pvec); 227 228 if (!folio_test_set_mlocked(folio)) { 229 int nr_pages = folio_nr_pages(folio); 230 231 zone_stat_mod_folio(folio, NR_MLOCK, nr_pages); 232 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 233 } 234 235 folio_get(folio); 236 if (!pagevec_add(pvec, mlock_lru(&folio->page)) || 237 folio_test_large(folio) || lru_cache_disabled()) 238 mlock_pagevec(pvec); 239 put_cpu_var(mlock_pvec); 240 } 241 242 /** 243 * mlock_new_page - mlock a newly allocated page not yet on LRU 244 * @page: page to be mlocked, either a normal page or a THP head. 245 */ 246 void mlock_new_page(struct page *page) 247 { 248 struct pagevec *pvec = &get_cpu_var(mlock_pvec); 249 int nr_pages = thp_nr_pages(page); 250 251 SetPageMlocked(page); 252 mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); 253 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 254 255 get_page(page); 256 if (!pagevec_add(pvec, mlock_new(page)) || 257 PageHead(page) || lru_cache_disabled()) 258 mlock_pagevec(pvec); 259 put_cpu_var(mlock_pvec); 260 } 261 262 /** 263 * munlock_page - munlock a page 264 * @page: page to be munlocked, either a normal page or a THP head. 265 */ 266 void munlock_page(struct page *page) 267 { 268 struct pagevec *pvec = &get_cpu_var(mlock_pvec); 269 270 /* 271 * TestClearPageMlocked(page) must be left to __munlock_page(), 272 * which will check whether the page is multiply mlocked. 273 */ 274 275 get_page(page); 276 if (!pagevec_add(pvec, page) || 277 PageHead(page) || lru_cache_disabled()) 278 mlock_pagevec(pvec); 279 put_cpu_var(mlock_pvec); 280 } 281 282 static int mlock_pte_range(pmd_t *pmd, unsigned long addr, 283 unsigned long end, struct mm_walk *walk) 284 285 { 286 struct vm_area_struct *vma = walk->vma; 287 spinlock_t *ptl; 288 pte_t *start_pte, *pte; 289 struct page *page; 290 291 ptl = pmd_trans_huge_lock(pmd, vma); 292 if (ptl) { 293 if (!pmd_present(*pmd)) 294 goto out; 295 if (is_huge_zero_pmd(*pmd)) 296 goto out; 297 page = pmd_page(*pmd); 298 if (vma->vm_flags & VM_LOCKED) 299 mlock_folio(page_folio(page)); 300 else 301 munlock_page(page); 302 goto out; 303 } 304 305 start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 306 for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) { 307 if (!pte_present(*pte)) 308 continue; 309 page = vm_normal_page(vma, addr, *pte); 310 if (!page) 311 continue; 312 if (PageTransCompound(page)) 313 continue; 314 if (vma->vm_flags & VM_LOCKED) 315 mlock_folio(page_folio(page)); 316 else 317 munlock_page(page); 318 } 319 pte_unmap(start_pte); 320 out: 321 spin_unlock(ptl); 322 cond_resched(); 323 return 0; 324 } 325 326 /* 327 * mlock_vma_pages_range() - mlock any pages already in the range, 328 * or munlock all pages in the range. 329 * @vma - vma containing range to be mlock()ed or munlock()ed 330 * @start - start address in @vma of the range 331 * @end - end of range in @vma 332 * @newflags - the new set of flags for @vma. 333 * 334 * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED; 335 * called for munlock() and munlockall(), to clear VM_LOCKED from @vma. 336 */ 337 static void mlock_vma_pages_range(struct vm_area_struct *vma, 338 unsigned long start, unsigned long end, vm_flags_t newflags) 339 { 340 static const struct mm_walk_ops mlock_walk_ops = { 341 .pmd_entry = mlock_pte_range, 342 }; 343 344 /* 345 * There is a slight chance that concurrent page migration, 346 * or page reclaim finding a page of this now-VM_LOCKED vma, 347 * will call mlock_vma_page() and raise page's mlock_count: 348 * double counting, leaving the page unevictable indefinitely. 349 * Communicate this danger to mlock_vma_page() with VM_IO, 350 * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas. 351 * mmap_lock is held in write mode here, so this weird 352 * combination should not be visible to other mmap_lock users; 353 * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED. 354 */ 355 if (newflags & VM_LOCKED) 356 newflags |= VM_IO; 357 WRITE_ONCE(vma->vm_flags, newflags); 358 359 lru_add_drain(); 360 walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL); 361 lru_add_drain(); 362 363 if (newflags & VM_IO) { 364 newflags &= ~VM_IO; 365 WRITE_ONCE(vma->vm_flags, newflags); 366 } 367 } 368 369 /* 370 * mlock_fixup - handle mlock[all]/munlock[all] requests. 371 * 372 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and 373 * munlock is a no-op. However, for some special vmas, we go ahead and 374 * populate the ptes. 375 * 376 * For vmas that pass the filters, merge/split as appropriate. 377 */ 378 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, 379 unsigned long start, unsigned long end, vm_flags_t newflags) 380 { 381 struct mm_struct *mm = vma->vm_mm; 382 pgoff_t pgoff; 383 int nr_pages; 384 int ret = 0; 385 vm_flags_t oldflags = vma->vm_flags; 386 387 if (newflags == oldflags || (oldflags & VM_SPECIAL) || 388 is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || 389 vma_is_dax(vma) || vma_is_secretmem(vma)) 390 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ 391 goto out; 392 393 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 394 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 395 vma->vm_file, pgoff, vma_policy(vma), 396 vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 397 if (*prev) { 398 vma = *prev; 399 goto success; 400 } 401 402 if (start != vma->vm_start) { 403 ret = split_vma(mm, vma, start, 1); 404 if (ret) 405 goto out; 406 } 407 408 if (end != vma->vm_end) { 409 ret = split_vma(mm, vma, end, 0); 410 if (ret) 411 goto out; 412 } 413 414 success: 415 /* 416 * Keep track of amount of locked VM. 417 */ 418 nr_pages = (end - start) >> PAGE_SHIFT; 419 if (!(newflags & VM_LOCKED)) 420 nr_pages = -nr_pages; 421 else if (oldflags & VM_LOCKED) 422 nr_pages = 0; 423 mm->locked_vm += nr_pages; 424 425 /* 426 * vm_flags is protected by the mmap_lock held in write mode. 427 * It's okay if try_to_unmap_one unmaps a page just after we 428 * set VM_LOCKED, populate_vma_page_range will bring it back. 429 */ 430 431 if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { 432 /* No work to do, and mlocking twice would be wrong */ 433 vma->vm_flags = newflags; 434 } else { 435 mlock_vma_pages_range(vma, start, end, newflags); 436 } 437 out: 438 *prev = vma; 439 return ret; 440 } 441 442 static int apply_vma_lock_flags(unsigned long start, size_t len, 443 vm_flags_t flags) 444 { 445 unsigned long nstart, end, tmp; 446 struct vm_area_struct *vma, *prev; 447 int error; 448 449 VM_BUG_ON(offset_in_page(start)); 450 VM_BUG_ON(len != PAGE_ALIGN(len)); 451 end = start + len; 452 if (end < start) 453 return -EINVAL; 454 if (end == start) 455 return 0; 456 vma = find_vma(current->mm, start); 457 if (!vma || vma->vm_start > start) 458 return -ENOMEM; 459 460 prev = vma->vm_prev; 461 if (start > vma->vm_start) 462 prev = vma; 463 464 for (nstart = start ; ; ) { 465 vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 466 467 newflags |= flags; 468 469 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 470 tmp = vma->vm_end; 471 if (tmp > end) 472 tmp = end; 473 error = mlock_fixup(vma, &prev, nstart, tmp, newflags); 474 if (error) 475 break; 476 nstart = tmp; 477 if (nstart < prev->vm_end) 478 nstart = prev->vm_end; 479 if (nstart >= end) 480 break; 481 482 vma = prev->vm_next; 483 if (!vma || vma->vm_start != nstart) { 484 error = -ENOMEM; 485 break; 486 } 487 } 488 return error; 489 } 490 491 /* 492 * Go through vma areas and sum size of mlocked 493 * vma pages, as return value. 494 * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT) 495 * is also counted. 496 * Return value: previously mlocked page counts 497 */ 498 static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm, 499 unsigned long start, size_t len) 500 { 501 struct vm_area_struct *vma; 502 unsigned long count = 0; 503 504 if (mm == NULL) 505 mm = current->mm; 506 507 vma = find_vma(mm, start); 508 if (vma == NULL) 509 return 0; 510 511 for (; vma ; vma = vma->vm_next) { 512 if (start >= vma->vm_end) 513 continue; 514 if (start + len <= vma->vm_start) 515 break; 516 if (vma->vm_flags & VM_LOCKED) { 517 if (start > vma->vm_start) 518 count -= (start - vma->vm_start); 519 if (start + len < vma->vm_end) { 520 count += start + len - vma->vm_start; 521 break; 522 } 523 count += vma->vm_end - vma->vm_start; 524 } 525 } 526 527 return count >> PAGE_SHIFT; 528 } 529 530 /* 531 * convert get_user_pages() return value to posix mlock() error 532 */ 533 static int __mlock_posix_error_return(long retval) 534 { 535 if (retval == -EFAULT) 536 retval = -ENOMEM; 537 else if (retval == -ENOMEM) 538 retval = -EAGAIN; 539 return retval; 540 } 541 542 static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) 543 { 544 unsigned long locked; 545 unsigned long lock_limit; 546 int error = -ENOMEM; 547 548 start = untagged_addr(start); 549 550 if (!can_do_mlock()) 551 return -EPERM; 552 553 len = PAGE_ALIGN(len + (offset_in_page(start))); 554 start &= PAGE_MASK; 555 556 lock_limit = rlimit(RLIMIT_MEMLOCK); 557 lock_limit >>= PAGE_SHIFT; 558 locked = len >> PAGE_SHIFT; 559 560 if (mmap_write_lock_killable(current->mm)) 561 return -EINTR; 562 563 locked += current->mm->locked_vm; 564 if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { 565 /* 566 * It is possible that the regions requested intersect with 567 * previously mlocked areas, that part area in "mm->locked_vm" 568 * should not be counted to new mlock increment count. So check 569 * and adjust locked count if necessary. 570 */ 571 locked -= count_mm_mlocked_page_nr(current->mm, 572 start, len); 573 } 574 575 /* check against resource limits */ 576 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) 577 error = apply_vma_lock_flags(start, len, flags); 578 579 mmap_write_unlock(current->mm); 580 if (error) 581 return error; 582 583 error = __mm_populate(start, len, 0); 584 if (error) 585 return __mlock_posix_error_return(error); 586 return 0; 587 } 588 589 SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) 590 { 591 return do_mlock(start, len, VM_LOCKED); 592 } 593 594 SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) 595 { 596 vm_flags_t vm_flags = VM_LOCKED; 597 598 if (flags & ~MLOCK_ONFAULT) 599 return -EINVAL; 600 601 if (flags & MLOCK_ONFAULT) 602 vm_flags |= VM_LOCKONFAULT; 603 604 return do_mlock(start, len, vm_flags); 605 } 606 607 SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) 608 { 609 int ret; 610 611 start = untagged_addr(start); 612 613 len = PAGE_ALIGN(len + (offset_in_page(start))); 614 start &= PAGE_MASK; 615 616 if (mmap_write_lock_killable(current->mm)) 617 return -EINTR; 618 ret = apply_vma_lock_flags(start, len, 0); 619 mmap_write_unlock(current->mm); 620 621 return ret; 622 } 623 624 /* 625 * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) 626 * and translate into the appropriate modifications to mm->def_flags and/or the 627 * flags for all current VMAs. 628 * 629 * There are a couple of subtleties with this. If mlockall() is called multiple 630 * times with different flags, the values do not necessarily stack. If mlockall 631 * is called once including the MCL_FUTURE flag and then a second time without 632 * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. 633 */ 634 static int apply_mlockall_flags(int flags) 635 { 636 struct vm_area_struct *vma, *prev = NULL; 637 vm_flags_t to_add = 0; 638 639 current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; 640 if (flags & MCL_FUTURE) { 641 current->mm->def_flags |= VM_LOCKED; 642 643 if (flags & MCL_ONFAULT) 644 current->mm->def_flags |= VM_LOCKONFAULT; 645 646 if (!(flags & MCL_CURRENT)) 647 goto out; 648 } 649 650 if (flags & MCL_CURRENT) { 651 to_add |= VM_LOCKED; 652 if (flags & MCL_ONFAULT) 653 to_add |= VM_LOCKONFAULT; 654 } 655 656 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { 657 vm_flags_t newflags; 658 659 newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 660 newflags |= to_add; 661 662 /* Ignore errors */ 663 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 664 cond_resched(); 665 } 666 out: 667 return 0; 668 } 669 670 SYSCALL_DEFINE1(mlockall, int, flags) 671 { 672 unsigned long lock_limit; 673 int ret; 674 675 if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || 676 flags == MCL_ONFAULT) 677 return -EINVAL; 678 679 if (!can_do_mlock()) 680 return -EPERM; 681 682 lock_limit = rlimit(RLIMIT_MEMLOCK); 683 lock_limit >>= PAGE_SHIFT; 684 685 if (mmap_write_lock_killable(current->mm)) 686 return -EINTR; 687 688 ret = -ENOMEM; 689 if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || 690 capable(CAP_IPC_LOCK)) 691 ret = apply_mlockall_flags(flags); 692 mmap_write_unlock(current->mm); 693 if (!ret && (flags & MCL_CURRENT)) 694 mm_populate(0, TASK_SIZE); 695 696 return ret; 697 } 698 699 SYSCALL_DEFINE0(munlockall) 700 { 701 int ret; 702 703 if (mmap_write_lock_killable(current->mm)) 704 return -EINTR; 705 ret = apply_mlockall_flags(0); 706 mmap_write_unlock(current->mm); 707 return ret; 708 } 709 710 /* 711 * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB 712 * shm segments) get accounted against the user_struct instead. 713 */ 714 static DEFINE_SPINLOCK(shmlock_user_lock); 715 716 int user_shm_lock(size_t size, struct ucounts *ucounts) 717 { 718 unsigned long lock_limit, locked; 719 long memlock; 720 int allowed = 0; 721 722 locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 723 lock_limit = rlimit(RLIMIT_MEMLOCK); 724 if (lock_limit == RLIM_INFINITY) 725 allowed = 1; 726 lock_limit >>= PAGE_SHIFT; 727 spin_lock(&shmlock_user_lock); 728 memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 729 730 if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { 731 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 732 goto out; 733 } 734 if (!get_ucounts(ucounts)) { 735 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 736 allowed = 0; 737 goto out; 738 } 739 allowed = 1; 740 out: 741 spin_unlock(&shmlock_user_lock); 742 return allowed; 743 } 744 745 void user_shm_unlock(size_t size, struct ucounts *ucounts) 746 { 747 spin_lock(&shmlock_user_lock); 748 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); 749 spin_unlock(&shmlock_user_lock); 750 put_ucounts(ucounts); 751 } 752