116d69265SAndrew Morton #include <linux/mm.h> 230992c97SMatt Mackall #include <linux/slab.h> 330992c97SMatt Mackall #include <linux/string.h> 43b32123dSGideon Israel Dsouza #include <linux/compiler.h> 5b95f1b31SPaul Gortmaker #include <linux/export.h> 696840aa0SDavi Arnaut #include <linux/err.h> 73b8f14b4SAdrian Bunk #include <linux/sched.h> 8eb36c587SAl Viro #include <linux/security.h> 99800339bSShaohua Li #include <linux/swap.h> 1033806f06SShaohua Li #include <linux/swapops.h> 1100619bccSJerome Marchand #include <linux/mman.h> 1200619bccSJerome Marchand #include <linux/hugetlb.h> 1339f1f78dSAl Viro #include <linux/vmalloc.h> 1400619bccSJerome Marchand 15a4bb1e43SAndrzej Hajda #include <asm/sections.h> 1696840aa0SDavi Arnaut #include <asm/uaccess.h> 1730992c97SMatt Mackall 186038def0SNamhyung Kim #include "internal.h" 196038def0SNamhyung Kim 20a4bb1e43SAndrzej Hajda static inline int is_kernel_rodata(unsigned long addr) 21a4bb1e43SAndrzej Hajda { 22a4bb1e43SAndrzej Hajda return addr >= (unsigned long)__start_rodata && 23a4bb1e43SAndrzej Hajda addr < (unsigned long)__end_rodata; 24a4bb1e43SAndrzej Hajda } 25a4bb1e43SAndrzej Hajda 26a4bb1e43SAndrzej Hajda /** 27a4bb1e43SAndrzej Hajda * kfree_const - conditionally free memory 28a4bb1e43SAndrzej Hajda * @x: pointer to the memory 29a4bb1e43SAndrzej Hajda * 30a4bb1e43SAndrzej Hajda * Function calls kfree only if @x is not in .rodata section. 31a4bb1e43SAndrzej Hajda */ 32a4bb1e43SAndrzej Hajda void kfree_const(const void *x) 33a4bb1e43SAndrzej Hajda { 34a4bb1e43SAndrzej Hajda if (!is_kernel_rodata((unsigned long)x)) 35a4bb1e43SAndrzej Hajda kfree(x); 36a4bb1e43SAndrzej Hajda } 37a4bb1e43SAndrzej Hajda EXPORT_SYMBOL(kfree_const); 38a4bb1e43SAndrzej Hajda 3930992c97SMatt Mackall /** 4030992c97SMatt Mackall * kstrdup - allocate space for and copy an existing string 4130992c97SMatt Mackall * @s: the string to duplicate 4230992c97SMatt Mackall * @gfp: the GFP mask used in the kmalloc() call when allocating memory 4330992c97SMatt Mackall */ 4430992c97SMatt Mackall char *kstrdup(const char *s, gfp_t gfp) 4530992c97SMatt Mackall { 4630992c97SMatt Mackall size_t len; 4730992c97SMatt Mackall char *buf; 4830992c97SMatt Mackall 4930992c97SMatt Mackall if (!s) 5030992c97SMatt Mackall return NULL; 5130992c97SMatt Mackall 5230992c97SMatt Mackall len = strlen(s) + 1; 531d2c8eeaSChristoph Hellwig buf = kmalloc_track_caller(len, gfp); 5430992c97SMatt Mackall if (buf) 5530992c97SMatt Mackall memcpy(buf, s, len); 5630992c97SMatt Mackall return buf; 5730992c97SMatt Mackall } 5830992c97SMatt Mackall EXPORT_SYMBOL(kstrdup); 5996840aa0SDavi Arnaut 601a2f67b4SAlexey Dobriyan /** 61a4bb1e43SAndrzej Hajda * kstrdup_const - conditionally duplicate an existing const string 62a4bb1e43SAndrzej Hajda * @s: the string to duplicate 63a4bb1e43SAndrzej Hajda * @gfp: the GFP mask used in the kmalloc() call when allocating memory 64a4bb1e43SAndrzej Hajda * 65a4bb1e43SAndrzej Hajda * Function returns source string if it is in .rodata section otherwise it 66a4bb1e43SAndrzej Hajda * fallbacks to kstrdup. 67a4bb1e43SAndrzej Hajda * Strings allocated by kstrdup_const should be freed by kfree_const. 68a4bb1e43SAndrzej Hajda */ 69a4bb1e43SAndrzej Hajda const char *kstrdup_const(const char *s, gfp_t gfp) 70a4bb1e43SAndrzej Hajda { 71a4bb1e43SAndrzej Hajda if (is_kernel_rodata((unsigned long)s)) 72a4bb1e43SAndrzej Hajda return s; 73a4bb1e43SAndrzej Hajda 74a4bb1e43SAndrzej Hajda return kstrdup(s, gfp); 75a4bb1e43SAndrzej Hajda } 76a4bb1e43SAndrzej Hajda EXPORT_SYMBOL(kstrdup_const); 77a4bb1e43SAndrzej Hajda 78a4bb1e43SAndrzej Hajda /** 791e66df3eSJeremy Fitzhardinge * kstrndup - allocate space for and copy an existing string 801e66df3eSJeremy Fitzhardinge * @s: the string to duplicate 811e66df3eSJeremy Fitzhardinge * @max: read at most @max chars from @s 821e66df3eSJeremy Fitzhardinge * @gfp: the GFP mask used in the kmalloc() call when allocating memory 831e66df3eSJeremy Fitzhardinge */ 841e66df3eSJeremy Fitzhardinge char *kstrndup(const char *s, size_t max, gfp_t gfp) 851e66df3eSJeremy Fitzhardinge { 861e66df3eSJeremy Fitzhardinge size_t len; 871e66df3eSJeremy Fitzhardinge char *buf; 881e66df3eSJeremy Fitzhardinge 891e66df3eSJeremy Fitzhardinge if (!s) 901e66df3eSJeremy Fitzhardinge return NULL; 911e66df3eSJeremy Fitzhardinge 921e66df3eSJeremy Fitzhardinge len = strnlen(s, max); 931e66df3eSJeremy Fitzhardinge buf = kmalloc_track_caller(len+1, gfp); 941e66df3eSJeremy Fitzhardinge if (buf) { 951e66df3eSJeremy Fitzhardinge memcpy(buf, s, len); 961e66df3eSJeremy Fitzhardinge buf[len] = '\0'; 971e66df3eSJeremy Fitzhardinge } 981e66df3eSJeremy Fitzhardinge return buf; 991e66df3eSJeremy Fitzhardinge } 1001e66df3eSJeremy Fitzhardinge EXPORT_SYMBOL(kstrndup); 1011e66df3eSJeremy Fitzhardinge 1021e66df3eSJeremy Fitzhardinge /** 1031a2f67b4SAlexey Dobriyan * kmemdup - duplicate region of memory 1041a2f67b4SAlexey Dobriyan * 1051a2f67b4SAlexey Dobriyan * @src: memory region to duplicate 1061a2f67b4SAlexey Dobriyan * @len: memory region length 1071a2f67b4SAlexey Dobriyan * @gfp: GFP mask to use 1081a2f67b4SAlexey Dobriyan */ 1091a2f67b4SAlexey Dobriyan void *kmemdup(const void *src, size_t len, gfp_t gfp) 1101a2f67b4SAlexey Dobriyan { 1111a2f67b4SAlexey Dobriyan void *p; 1121a2f67b4SAlexey Dobriyan 1131d2c8eeaSChristoph Hellwig p = kmalloc_track_caller(len, gfp); 1141a2f67b4SAlexey Dobriyan if (p) 1151a2f67b4SAlexey Dobriyan memcpy(p, src, len); 1161a2f67b4SAlexey Dobriyan return p; 1171a2f67b4SAlexey Dobriyan } 1181a2f67b4SAlexey Dobriyan EXPORT_SYMBOL(kmemdup); 1191a2f67b4SAlexey Dobriyan 120ef2ad80cSChristoph Lameter /** 121610a77e0SLi Zefan * memdup_user - duplicate memory region from user space 122610a77e0SLi Zefan * 123610a77e0SLi Zefan * @src: source address in user space 124610a77e0SLi Zefan * @len: number of bytes to copy 125610a77e0SLi Zefan * 126610a77e0SLi Zefan * Returns an ERR_PTR() on failure. 127610a77e0SLi Zefan */ 128610a77e0SLi Zefan void *memdup_user(const void __user *src, size_t len) 129610a77e0SLi Zefan { 130610a77e0SLi Zefan void *p; 131610a77e0SLi Zefan 132610a77e0SLi Zefan /* 133610a77e0SLi Zefan * Always use GFP_KERNEL, since copy_from_user() can sleep and 134610a77e0SLi Zefan * cause pagefault, which makes it pointless to use GFP_NOFS 135610a77e0SLi Zefan * or GFP_ATOMIC. 136610a77e0SLi Zefan */ 137610a77e0SLi Zefan p = kmalloc_track_caller(len, GFP_KERNEL); 138610a77e0SLi Zefan if (!p) 139610a77e0SLi Zefan return ERR_PTR(-ENOMEM); 140610a77e0SLi Zefan 141610a77e0SLi Zefan if (copy_from_user(p, src, len)) { 142610a77e0SLi Zefan kfree(p); 143610a77e0SLi Zefan return ERR_PTR(-EFAULT); 144610a77e0SLi Zefan } 145610a77e0SLi Zefan 146610a77e0SLi Zefan return p; 147610a77e0SLi Zefan } 148610a77e0SLi Zefan EXPORT_SYMBOL(memdup_user); 149610a77e0SLi Zefan 15096840aa0SDavi Arnaut /* 15196840aa0SDavi Arnaut * strndup_user - duplicate an existing string from user space 15296840aa0SDavi Arnaut * @s: The string to duplicate 15396840aa0SDavi Arnaut * @n: Maximum number of bytes to copy, including the trailing NUL. 15496840aa0SDavi Arnaut */ 15596840aa0SDavi Arnaut char *strndup_user(const char __user *s, long n) 15696840aa0SDavi Arnaut { 15796840aa0SDavi Arnaut char *p; 15896840aa0SDavi Arnaut long length; 15996840aa0SDavi Arnaut 16096840aa0SDavi Arnaut length = strnlen_user(s, n); 16196840aa0SDavi Arnaut 16296840aa0SDavi Arnaut if (!length) 16396840aa0SDavi Arnaut return ERR_PTR(-EFAULT); 16496840aa0SDavi Arnaut 16596840aa0SDavi Arnaut if (length > n) 16696840aa0SDavi Arnaut return ERR_PTR(-EINVAL); 16796840aa0SDavi Arnaut 16890d74045SJulia Lawall p = memdup_user(s, length); 16996840aa0SDavi Arnaut 17090d74045SJulia Lawall if (IS_ERR(p)) 17190d74045SJulia Lawall return p; 17296840aa0SDavi Arnaut 17396840aa0SDavi Arnaut p[length - 1] = '\0'; 17496840aa0SDavi Arnaut 17596840aa0SDavi Arnaut return p; 17696840aa0SDavi Arnaut } 17796840aa0SDavi Arnaut EXPORT_SYMBOL(strndup_user); 17816d69265SAndrew Morton 179e9d408e1SAl Viro /** 180e9d408e1SAl Viro * memdup_user_nul - duplicate memory region from user space and NUL-terminate 181e9d408e1SAl Viro * 182e9d408e1SAl Viro * @src: source address in user space 183e9d408e1SAl Viro * @len: number of bytes to copy 184e9d408e1SAl Viro * 185e9d408e1SAl Viro * Returns an ERR_PTR() on failure. 186e9d408e1SAl Viro */ 187e9d408e1SAl Viro void *memdup_user_nul(const void __user *src, size_t len) 188e9d408e1SAl Viro { 189e9d408e1SAl Viro char *p; 190e9d408e1SAl Viro 191e9d408e1SAl Viro /* 192e9d408e1SAl Viro * Always use GFP_KERNEL, since copy_from_user() can sleep and 193e9d408e1SAl Viro * cause pagefault, which makes it pointless to use GFP_NOFS 194e9d408e1SAl Viro * or GFP_ATOMIC. 195e9d408e1SAl Viro */ 196e9d408e1SAl Viro p = kmalloc_track_caller(len + 1, GFP_KERNEL); 197e9d408e1SAl Viro if (!p) 198e9d408e1SAl Viro return ERR_PTR(-ENOMEM); 199e9d408e1SAl Viro 200e9d408e1SAl Viro if (copy_from_user(p, src, len)) { 201e9d408e1SAl Viro kfree(p); 202e9d408e1SAl Viro return ERR_PTR(-EFAULT); 203e9d408e1SAl Viro } 204e9d408e1SAl Viro p[len] = '\0'; 205e9d408e1SAl Viro 206e9d408e1SAl Viro return p; 207e9d408e1SAl Viro } 208e9d408e1SAl Viro EXPORT_SYMBOL(memdup_user_nul); 209e9d408e1SAl Viro 2106038def0SNamhyung Kim void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, 2116038def0SNamhyung Kim struct vm_area_struct *prev, struct rb_node *rb_parent) 2126038def0SNamhyung Kim { 2136038def0SNamhyung Kim struct vm_area_struct *next; 2146038def0SNamhyung Kim 2156038def0SNamhyung Kim vma->vm_prev = prev; 2166038def0SNamhyung Kim if (prev) { 2176038def0SNamhyung Kim next = prev->vm_next; 2186038def0SNamhyung Kim prev->vm_next = vma; 2196038def0SNamhyung Kim } else { 2206038def0SNamhyung Kim mm->mmap = vma; 2216038def0SNamhyung Kim if (rb_parent) 2226038def0SNamhyung Kim next = rb_entry(rb_parent, 2236038def0SNamhyung Kim struct vm_area_struct, vm_rb); 2246038def0SNamhyung Kim else 2256038def0SNamhyung Kim next = NULL; 2266038def0SNamhyung Kim } 2276038def0SNamhyung Kim vma->vm_next = next; 2286038def0SNamhyung Kim if (next) 2296038def0SNamhyung Kim next->vm_prev = vma; 2306038def0SNamhyung Kim } 2316038def0SNamhyung Kim 232b7643757SSiddhesh Poyarekar /* Check if the vma is being used as a stack by this task */ 233*d17af505SAndy Lutomirski int vma_is_stack_for_current(struct vm_area_struct *vma) 234b7643757SSiddhesh Poyarekar { 235*d17af505SAndy Lutomirski struct task_struct * __maybe_unused t = current; 236*d17af505SAndy Lutomirski 237b7643757SSiddhesh Poyarekar return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); 238b7643757SSiddhesh Poyarekar } 239b7643757SSiddhesh Poyarekar 240efc1a3b1SDavid Howells #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) 24116d69265SAndrew Morton void arch_pick_mmap_layout(struct mm_struct *mm) 24216d69265SAndrew Morton { 24316d69265SAndrew Morton mm->mmap_base = TASK_UNMAPPED_BASE; 24416d69265SAndrew Morton mm->get_unmapped_area = arch_get_unmapped_area; 24516d69265SAndrew Morton } 24616d69265SAndrew Morton #endif 247912985dcSRusty Russell 24845888a0cSXiao Guangrong /* 24945888a0cSXiao Guangrong * Like get_user_pages_fast() except its IRQ-safe in that it won't fall 25045888a0cSXiao Guangrong * back to the regular GUP. 25125985edcSLucas De Marchi * If the architecture not support this function, simply return with no 25245888a0cSXiao Guangrong * page pinned 25345888a0cSXiao Guangrong */ 2543b32123dSGideon Israel Dsouza int __weak __get_user_pages_fast(unsigned long start, 25545888a0cSXiao Guangrong int nr_pages, int write, struct page **pages) 25645888a0cSXiao Guangrong { 25745888a0cSXiao Guangrong return 0; 25845888a0cSXiao Guangrong } 25945888a0cSXiao Guangrong EXPORT_SYMBOL_GPL(__get_user_pages_fast); 26045888a0cSXiao Guangrong 2619de100d0SAndy Grover /** 2629de100d0SAndy Grover * get_user_pages_fast() - pin user pages in memory 2639de100d0SAndy Grover * @start: starting user address 2649de100d0SAndy Grover * @nr_pages: number of pages from start to pin 2659de100d0SAndy Grover * @write: whether pages will be written to 2669de100d0SAndy Grover * @pages: array that receives pointers to the pages pinned. 2679de100d0SAndy Grover * Should be at least nr_pages long. 2689de100d0SAndy Grover * 2699de100d0SAndy Grover * Returns number of pages pinned. This may be fewer than the number 2709de100d0SAndy Grover * requested. If nr_pages is 0 or negative, returns 0. If no pages 2719de100d0SAndy Grover * were pinned, returns -errno. 272d2bf6be8SNick Piggin * 273d2bf6be8SNick Piggin * get_user_pages_fast provides equivalent functionality to get_user_pages, 274d2bf6be8SNick Piggin * operating on current and current->mm, with force=0 and vma=NULL. However 275d2bf6be8SNick Piggin * unlike get_user_pages, it must be called without mmap_sem held. 276d2bf6be8SNick Piggin * 277d2bf6be8SNick Piggin * get_user_pages_fast may take mmap_sem and page table locks, so no 278d2bf6be8SNick Piggin * assumptions can be made about lack of locking. get_user_pages_fast is to be 279d2bf6be8SNick Piggin * implemented in a way that is advantageous (vs get_user_pages()) when the 280d2bf6be8SNick Piggin * user memory area is already faulted in and present in ptes. However if the 281d2bf6be8SNick Piggin * pages have to be faulted in, it may turn out to be slightly slower so 282d2bf6be8SNick Piggin * callers need to carefully consider what to use. On many architectures, 283d2bf6be8SNick Piggin * get_user_pages_fast simply falls back to get_user_pages. 2849de100d0SAndy Grover */ 2853b32123dSGideon Israel Dsouza int __weak get_user_pages_fast(unsigned long start, 286912985dcSRusty Russell int nr_pages, int write, struct page **pages) 287912985dcSRusty Russell { 288cde70140SDave Hansen return get_user_pages_unlocked(start, nr_pages, write, 0, pages); 289912985dcSRusty Russell } 290912985dcSRusty Russell EXPORT_SYMBOL_GPL(get_user_pages_fast); 291ca2b84cbSEduard - Gabriel Munteanu 292eb36c587SAl Viro unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, 293eb36c587SAl Viro unsigned long len, unsigned long prot, 2949fbeb5abSMichal Hocko unsigned long flag, unsigned long pgoff) 295eb36c587SAl Viro { 296eb36c587SAl Viro unsigned long ret; 297eb36c587SAl Viro struct mm_struct *mm = current->mm; 29841badc15SMichel Lespinasse unsigned long populate; 299eb36c587SAl Viro 300eb36c587SAl Viro ret = security_mmap_file(file, prot, flag); 301eb36c587SAl Viro if (!ret) { 302dc0ef0dfSMichal Hocko if (down_write_killable(&mm->mmap_sem)) 303dc0ef0dfSMichal Hocko return -EINTR; 304bebeb3d6SMichel Lespinasse ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, 305bebeb3d6SMichel Lespinasse &populate); 306eb36c587SAl Viro up_write(&mm->mmap_sem); 30741badc15SMichel Lespinasse if (populate) 30841badc15SMichel Lespinasse mm_populate(ret, populate); 309eb36c587SAl Viro } 310eb36c587SAl Viro return ret; 311eb36c587SAl Viro } 312eb36c587SAl Viro 313eb36c587SAl Viro unsigned long vm_mmap(struct file *file, unsigned long addr, 314eb36c587SAl Viro unsigned long len, unsigned long prot, 315eb36c587SAl Viro unsigned long flag, unsigned long offset) 316eb36c587SAl Viro { 317eb36c587SAl Viro if (unlikely(offset + PAGE_ALIGN(len) < offset)) 318eb36c587SAl Viro return -EINVAL; 319ea53cde0SAlexander Kuleshov if (unlikely(offset_in_page(offset))) 320eb36c587SAl Viro return -EINVAL; 321eb36c587SAl Viro 3229fbeb5abSMichal Hocko return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); 323eb36c587SAl Viro } 324eb36c587SAl Viro EXPORT_SYMBOL(vm_mmap); 325eb36c587SAl Viro 32639f1f78dSAl Viro void kvfree(const void *addr) 32739f1f78dSAl Viro { 32839f1f78dSAl Viro if (is_vmalloc_addr(addr)) 32939f1f78dSAl Viro vfree(addr); 33039f1f78dSAl Viro else 33139f1f78dSAl Viro kfree(addr); 33239f1f78dSAl Viro } 33339f1f78dSAl Viro EXPORT_SYMBOL(kvfree); 33439f1f78dSAl Viro 335e39155eaSKirill A. Shutemov static inline void *__page_rmapping(struct page *page) 336e39155eaSKirill A. Shutemov { 337e39155eaSKirill A. Shutemov unsigned long mapping; 338e39155eaSKirill A. Shutemov 339e39155eaSKirill A. Shutemov mapping = (unsigned long)page->mapping; 340e39155eaSKirill A. Shutemov mapping &= ~PAGE_MAPPING_FLAGS; 341e39155eaSKirill A. Shutemov 342e39155eaSKirill A. Shutemov return (void *)mapping; 343e39155eaSKirill A. Shutemov } 344e39155eaSKirill A. Shutemov 345e39155eaSKirill A. Shutemov /* Neutral page->mapping pointer to address_space or anon_vma or other */ 346e39155eaSKirill A. Shutemov void *page_rmapping(struct page *page) 347e39155eaSKirill A. Shutemov { 348e39155eaSKirill A. Shutemov page = compound_head(page); 349e39155eaSKirill A. Shutemov return __page_rmapping(page); 350e39155eaSKirill A. Shutemov } 351e39155eaSKirill A. Shutemov 3521aa8aea5SAndrew Morton /* 3531aa8aea5SAndrew Morton * Return true if this page is mapped into pagetables. 3541aa8aea5SAndrew Morton * For compound page it returns true if any subpage of compound page is mapped. 3551aa8aea5SAndrew Morton */ 3561aa8aea5SAndrew Morton bool page_mapped(struct page *page) 3571aa8aea5SAndrew Morton { 3581aa8aea5SAndrew Morton int i; 3591aa8aea5SAndrew Morton 3601aa8aea5SAndrew Morton if (likely(!PageCompound(page))) 3611aa8aea5SAndrew Morton return atomic_read(&page->_mapcount) >= 0; 3621aa8aea5SAndrew Morton page = compound_head(page); 3631aa8aea5SAndrew Morton if (atomic_read(compound_mapcount_ptr(page)) >= 0) 3641aa8aea5SAndrew Morton return true; 3651aa8aea5SAndrew Morton if (PageHuge(page)) 3661aa8aea5SAndrew Morton return false; 3671aa8aea5SAndrew Morton for (i = 0; i < hpage_nr_pages(page); i++) { 3681aa8aea5SAndrew Morton if (atomic_read(&page[i]._mapcount) >= 0) 3691aa8aea5SAndrew Morton return true; 3701aa8aea5SAndrew Morton } 3711aa8aea5SAndrew Morton return false; 3721aa8aea5SAndrew Morton } 3731aa8aea5SAndrew Morton EXPORT_SYMBOL(page_mapped); 3741aa8aea5SAndrew Morton 375e39155eaSKirill A. Shutemov struct anon_vma *page_anon_vma(struct page *page) 376e39155eaSKirill A. Shutemov { 377e39155eaSKirill A. Shutemov unsigned long mapping; 378e39155eaSKirill A. Shutemov 379e39155eaSKirill A. Shutemov page = compound_head(page); 380e39155eaSKirill A. Shutemov mapping = (unsigned long)page->mapping; 381e39155eaSKirill A. Shutemov if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) 382e39155eaSKirill A. Shutemov return NULL; 383e39155eaSKirill A. Shutemov return __page_rmapping(page); 384e39155eaSKirill A. Shutemov } 385e39155eaSKirill A. Shutemov 3869800339bSShaohua Li struct address_space *page_mapping(struct page *page) 3879800339bSShaohua Li { 3881c290f64SKirill A. Shutemov struct address_space *mapping; 3891c290f64SKirill A. Shutemov 3901c290f64SKirill A. Shutemov page = compound_head(page); 3919800339bSShaohua Li 39203e5ac2fSMikulas Patocka /* This happens if someone calls flush_dcache_page on slab page */ 39303e5ac2fSMikulas Patocka if (unlikely(PageSlab(page))) 39403e5ac2fSMikulas Patocka return NULL; 39503e5ac2fSMikulas Patocka 39633806f06SShaohua Li if (unlikely(PageSwapCache(page))) { 39733806f06SShaohua Li swp_entry_t entry; 39833806f06SShaohua Li 39933806f06SShaohua Li entry.val = page_private(page); 400e39155eaSKirill A. Shutemov return swap_address_space(entry); 401e39155eaSKirill A. Shutemov } 402e39155eaSKirill A. Shutemov 4031c290f64SKirill A. Shutemov mapping = page->mapping; 404bda807d4SMinchan Kim if ((unsigned long)mapping & PAGE_MAPPING_ANON) 405e39155eaSKirill A. Shutemov return NULL; 406bda807d4SMinchan Kim 407bda807d4SMinchan Kim return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); 4089800339bSShaohua Li } 409bda807d4SMinchan Kim EXPORT_SYMBOL(page_mapping); 4109800339bSShaohua Li 411b20ce5e0SKirill A. Shutemov /* Slow path of page_mapcount() for compound pages */ 412b20ce5e0SKirill A. Shutemov int __page_mapcount(struct page *page) 413b20ce5e0SKirill A. Shutemov { 414b20ce5e0SKirill A. Shutemov int ret; 415b20ce5e0SKirill A. Shutemov 416b20ce5e0SKirill A. Shutemov ret = atomic_read(&page->_mapcount) + 1; 417dd78feddSKirill A. Shutemov /* 418dd78feddSKirill A. Shutemov * For file THP page->_mapcount contains total number of mapping 419dd78feddSKirill A. Shutemov * of the page: no need to look into compound_mapcount. 420dd78feddSKirill A. Shutemov */ 421dd78feddSKirill A. Shutemov if (!PageAnon(page) && !PageHuge(page)) 422dd78feddSKirill A. Shutemov return ret; 423b20ce5e0SKirill A. Shutemov page = compound_head(page); 424b20ce5e0SKirill A. Shutemov ret += atomic_read(compound_mapcount_ptr(page)) + 1; 425b20ce5e0SKirill A. Shutemov if (PageDoubleMap(page)) 426b20ce5e0SKirill A. Shutemov ret--; 427b20ce5e0SKirill A. Shutemov return ret; 428b20ce5e0SKirill A. Shutemov } 429b20ce5e0SKirill A. Shutemov EXPORT_SYMBOL_GPL(__page_mapcount); 430b20ce5e0SKirill A. Shutemov 43139a1aa8eSAndrey Ryabinin int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; 43239a1aa8eSAndrey Ryabinin int sysctl_overcommit_ratio __read_mostly = 50; 43339a1aa8eSAndrey Ryabinin unsigned long sysctl_overcommit_kbytes __read_mostly; 43439a1aa8eSAndrey Ryabinin int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 43539a1aa8eSAndrey Ryabinin unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ 43639a1aa8eSAndrey Ryabinin unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ 43739a1aa8eSAndrey Ryabinin 43849f0ce5fSJerome Marchand int overcommit_ratio_handler(struct ctl_table *table, int write, 43949f0ce5fSJerome Marchand void __user *buffer, size_t *lenp, 44049f0ce5fSJerome Marchand loff_t *ppos) 44149f0ce5fSJerome Marchand { 44249f0ce5fSJerome Marchand int ret; 44349f0ce5fSJerome Marchand 44449f0ce5fSJerome Marchand ret = proc_dointvec(table, write, buffer, lenp, ppos); 44549f0ce5fSJerome Marchand if (ret == 0 && write) 44649f0ce5fSJerome Marchand sysctl_overcommit_kbytes = 0; 44749f0ce5fSJerome Marchand return ret; 44849f0ce5fSJerome Marchand } 44949f0ce5fSJerome Marchand 45049f0ce5fSJerome Marchand int overcommit_kbytes_handler(struct ctl_table *table, int write, 45149f0ce5fSJerome Marchand void __user *buffer, size_t *lenp, 45249f0ce5fSJerome Marchand loff_t *ppos) 45349f0ce5fSJerome Marchand { 45449f0ce5fSJerome Marchand int ret; 45549f0ce5fSJerome Marchand 45649f0ce5fSJerome Marchand ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 45749f0ce5fSJerome Marchand if (ret == 0 && write) 45849f0ce5fSJerome Marchand sysctl_overcommit_ratio = 0; 45949f0ce5fSJerome Marchand return ret; 46049f0ce5fSJerome Marchand } 46149f0ce5fSJerome Marchand 46200619bccSJerome Marchand /* 46300619bccSJerome Marchand * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used 46400619bccSJerome Marchand */ 46500619bccSJerome Marchand unsigned long vm_commit_limit(void) 46600619bccSJerome Marchand { 46749f0ce5fSJerome Marchand unsigned long allowed; 46849f0ce5fSJerome Marchand 46949f0ce5fSJerome Marchand if (sysctl_overcommit_kbytes) 47049f0ce5fSJerome Marchand allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10); 47149f0ce5fSJerome Marchand else 47249f0ce5fSJerome Marchand allowed = ((totalram_pages - hugetlb_total_pages()) 47349f0ce5fSJerome Marchand * sysctl_overcommit_ratio / 100); 47449f0ce5fSJerome Marchand allowed += total_swap_pages; 47549f0ce5fSJerome Marchand 47649f0ce5fSJerome Marchand return allowed; 47700619bccSJerome Marchand } 47800619bccSJerome Marchand 47939a1aa8eSAndrey Ryabinin /* 48039a1aa8eSAndrey Ryabinin * Make sure vm_committed_as in one cacheline and not cacheline shared with 48139a1aa8eSAndrey Ryabinin * other variables. It can be updated by several CPUs frequently. 48239a1aa8eSAndrey Ryabinin */ 48339a1aa8eSAndrey Ryabinin struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp; 48439a1aa8eSAndrey Ryabinin 48539a1aa8eSAndrey Ryabinin /* 48639a1aa8eSAndrey Ryabinin * The global memory commitment made in the system can be a metric 48739a1aa8eSAndrey Ryabinin * that can be used to drive ballooning decisions when Linux is hosted 48839a1aa8eSAndrey Ryabinin * as a guest. On Hyper-V, the host implements a policy engine for dynamically 48939a1aa8eSAndrey Ryabinin * balancing memory across competing virtual machines that are hosted. 49039a1aa8eSAndrey Ryabinin * Several metrics drive this policy engine including the guest reported 49139a1aa8eSAndrey Ryabinin * memory commitment. 49239a1aa8eSAndrey Ryabinin */ 49339a1aa8eSAndrey Ryabinin unsigned long vm_memory_committed(void) 49439a1aa8eSAndrey Ryabinin { 49539a1aa8eSAndrey Ryabinin return percpu_counter_read_positive(&vm_committed_as); 49639a1aa8eSAndrey Ryabinin } 49739a1aa8eSAndrey Ryabinin EXPORT_SYMBOL_GPL(vm_memory_committed); 49839a1aa8eSAndrey Ryabinin 49939a1aa8eSAndrey Ryabinin /* 50039a1aa8eSAndrey Ryabinin * Check that a process has enough memory to allocate a new virtual 50139a1aa8eSAndrey Ryabinin * mapping. 0 means there is enough memory for the allocation to 50239a1aa8eSAndrey Ryabinin * succeed and -ENOMEM implies there is not. 50339a1aa8eSAndrey Ryabinin * 50439a1aa8eSAndrey Ryabinin * We currently support three overcommit policies, which are set via the 50539a1aa8eSAndrey Ryabinin * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 50639a1aa8eSAndrey Ryabinin * 50739a1aa8eSAndrey Ryabinin * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 50839a1aa8eSAndrey Ryabinin * Additional code 2002 Jul 20 by Robert Love. 50939a1aa8eSAndrey Ryabinin * 51039a1aa8eSAndrey Ryabinin * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 51139a1aa8eSAndrey Ryabinin * 51239a1aa8eSAndrey Ryabinin * Note this is a helper function intended to be used by LSMs which 51339a1aa8eSAndrey Ryabinin * wish to use this logic. 51439a1aa8eSAndrey Ryabinin */ 51539a1aa8eSAndrey Ryabinin int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 51639a1aa8eSAndrey Ryabinin { 51739a1aa8eSAndrey Ryabinin long free, allowed, reserve; 51839a1aa8eSAndrey Ryabinin 51939a1aa8eSAndrey Ryabinin VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < 52039a1aa8eSAndrey Ryabinin -(s64)vm_committed_as_batch * num_online_cpus(), 52139a1aa8eSAndrey Ryabinin "memory commitment underflow"); 52239a1aa8eSAndrey Ryabinin 52339a1aa8eSAndrey Ryabinin vm_acct_memory(pages); 52439a1aa8eSAndrey Ryabinin 52539a1aa8eSAndrey Ryabinin /* 52639a1aa8eSAndrey Ryabinin * Sometimes we want to use more memory than we have 52739a1aa8eSAndrey Ryabinin */ 52839a1aa8eSAndrey Ryabinin if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 52939a1aa8eSAndrey Ryabinin return 0; 53039a1aa8eSAndrey Ryabinin 53139a1aa8eSAndrey Ryabinin if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 53239a1aa8eSAndrey Ryabinin free = global_page_state(NR_FREE_PAGES); 53311fb9989SMel Gorman free += global_node_page_state(NR_FILE_PAGES); 53439a1aa8eSAndrey Ryabinin 53539a1aa8eSAndrey Ryabinin /* 53639a1aa8eSAndrey Ryabinin * shmem pages shouldn't be counted as free in this 53739a1aa8eSAndrey Ryabinin * case, they can't be purged, only swapped out, and 53839a1aa8eSAndrey Ryabinin * that won't affect the overall amount of available 53939a1aa8eSAndrey Ryabinin * memory in the system. 54039a1aa8eSAndrey Ryabinin */ 54111fb9989SMel Gorman free -= global_node_page_state(NR_SHMEM); 54239a1aa8eSAndrey Ryabinin 54339a1aa8eSAndrey Ryabinin free += get_nr_swap_pages(); 54439a1aa8eSAndrey Ryabinin 54539a1aa8eSAndrey Ryabinin /* 54639a1aa8eSAndrey Ryabinin * Any slabs which are created with the 54739a1aa8eSAndrey Ryabinin * SLAB_RECLAIM_ACCOUNT flag claim to have contents 54839a1aa8eSAndrey Ryabinin * which are reclaimable, under pressure. The dentry 54939a1aa8eSAndrey Ryabinin * cache and most inode caches should fall into this 55039a1aa8eSAndrey Ryabinin */ 55139a1aa8eSAndrey Ryabinin free += global_page_state(NR_SLAB_RECLAIMABLE); 55239a1aa8eSAndrey Ryabinin 55339a1aa8eSAndrey Ryabinin /* 55439a1aa8eSAndrey Ryabinin * Leave reserved pages. The pages are not for anonymous pages. 55539a1aa8eSAndrey Ryabinin */ 55639a1aa8eSAndrey Ryabinin if (free <= totalreserve_pages) 55739a1aa8eSAndrey Ryabinin goto error; 55839a1aa8eSAndrey Ryabinin else 55939a1aa8eSAndrey Ryabinin free -= totalreserve_pages; 56039a1aa8eSAndrey Ryabinin 56139a1aa8eSAndrey Ryabinin /* 56239a1aa8eSAndrey Ryabinin * Reserve some for root 56339a1aa8eSAndrey Ryabinin */ 56439a1aa8eSAndrey Ryabinin if (!cap_sys_admin) 56539a1aa8eSAndrey Ryabinin free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 56639a1aa8eSAndrey Ryabinin 56739a1aa8eSAndrey Ryabinin if (free > pages) 56839a1aa8eSAndrey Ryabinin return 0; 56939a1aa8eSAndrey Ryabinin 57039a1aa8eSAndrey Ryabinin goto error; 57139a1aa8eSAndrey Ryabinin } 57239a1aa8eSAndrey Ryabinin 57339a1aa8eSAndrey Ryabinin allowed = vm_commit_limit(); 57439a1aa8eSAndrey Ryabinin /* 57539a1aa8eSAndrey Ryabinin * Reserve some for root 57639a1aa8eSAndrey Ryabinin */ 57739a1aa8eSAndrey Ryabinin if (!cap_sys_admin) 57839a1aa8eSAndrey Ryabinin allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 57939a1aa8eSAndrey Ryabinin 58039a1aa8eSAndrey Ryabinin /* 58139a1aa8eSAndrey Ryabinin * Don't let a single process grow so big a user can't recover 58239a1aa8eSAndrey Ryabinin */ 58339a1aa8eSAndrey Ryabinin if (mm) { 58439a1aa8eSAndrey Ryabinin reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); 58539a1aa8eSAndrey Ryabinin allowed -= min_t(long, mm->total_vm / 32, reserve); 58639a1aa8eSAndrey Ryabinin } 58739a1aa8eSAndrey Ryabinin 58839a1aa8eSAndrey Ryabinin if (percpu_counter_read_positive(&vm_committed_as) < allowed) 58939a1aa8eSAndrey Ryabinin return 0; 59039a1aa8eSAndrey Ryabinin error: 59139a1aa8eSAndrey Ryabinin vm_unacct_memory(pages); 59239a1aa8eSAndrey Ryabinin 59339a1aa8eSAndrey Ryabinin return -ENOMEM; 59439a1aa8eSAndrey Ryabinin } 59539a1aa8eSAndrey Ryabinin 596a9090253SWilliam Roberts /** 597a9090253SWilliam Roberts * get_cmdline() - copy the cmdline value to a buffer. 598a9090253SWilliam Roberts * @task: the task whose cmdline value to copy. 599a9090253SWilliam Roberts * @buffer: the buffer to copy to. 600a9090253SWilliam Roberts * @buflen: the length of the buffer. Larger cmdline values are truncated 601a9090253SWilliam Roberts * to this length. 602a9090253SWilliam Roberts * Returns the size of the cmdline field copied. Note that the copy does 603a9090253SWilliam Roberts * not guarantee an ending NULL byte. 604a9090253SWilliam Roberts */ 605a9090253SWilliam Roberts int get_cmdline(struct task_struct *task, char *buffer, int buflen) 606a9090253SWilliam Roberts { 607a9090253SWilliam Roberts int res = 0; 608a9090253SWilliam Roberts unsigned int len; 609a9090253SWilliam Roberts struct mm_struct *mm = get_task_mm(task); 610a3b609efSMateusz Guzik unsigned long arg_start, arg_end, env_start, env_end; 611a9090253SWilliam Roberts if (!mm) 612a9090253SWilliam Roberts goto out; 613a9090253SWilliam Roberts if (!mm->arg_end) 614a9090253SWilliam Roberts goto out_mm; /* Shh! No looking before we're done */ 615a9090253SWilliam Roberts 616a3b609efSMateusz Guzik down_read(&mm->mmap_sem); 617a3b609efSMateusz Guzik arg_start = mm->arg_start; 618a3b609efSMateusz Guzik arg_end = mm->arg_end; 619a3b609efSMateusz Guzik env_start = mm->env_start; 620a3b609efSMateusz Guzik env_end = mm->env_end; 621a3b609efSMateusz Guzik up_read(&mm->mmap_sem); 622a3b609efSMateusz Guzik 623a3b609efSMateusz Guzik len = arg_end - arg_start; 624a9090253SWilliam Roberts 625a9090253SWilliam Roberts if (len > buflen) 626a9090253SWilliam Roberts len = buflen; 627a9090253SWilliam Roberts 628a3b609efSMateusz Guzik res = access_process_vm(task, arg_start, buffer, len, 0); 629a9090253SWilliam Roberts 630a9090253SWilliam Roberts /* 631a9090253SWilliam Roberts * If the nul at the end of args has been overwritten, then 632a9090253SWilliam Roberts * assume application is using setproctitle(3). 633a9090253SWilliam Roberts */ 634a9090253SWilliam Roberts if (res > 0 && buffer[res-1] != '\0' && len < buflen) { 635a9090253SWilliam Roberts len = strnlen(buffer, res); 636a9090253SWilliam Roberts if (len < res) { 637a9090253SWilliam Roberts res = len; 638a9090253SWilliam Roberts } else { 639a3b609efSMateusz Guzik len = env_end - env_start; 640a9090253SWilliam Roberts if (len > buflen - res) 641a9090253SWilliam Roberts len = buflen - res; 642a3b609efSMateusz Guzik res += access_process_vm(task, env_start, 643a9090253SWilliam Roberts buffer+res, len, 0); 644a9090253SWilliam Roberts res = strnlen(buffer, res); 645a9090253SWilliam Roberts } 646a9090253SWilliam Roberts } 647a9090253SWilliam Roberts out_mm: 648a9090253SWilliam Roberts mmput(mm); 649a9090253SWilliam Roberts out: 650a9090253SWilliam Roberts return res; 651a9090253SWilliam Roberts } 652