11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/nommu.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Replacement code for mm functions to support CPU's that don't 51da177e4SLinus Torvalds * have any form of memory management unit (thus no virtual memory). 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * See Documentation/nommu-mmap.txt 81da177e4SLinus Torvalds * 98feae131SDavid Howells * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com> 101da177e4SLinus Torvalds * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 111da177e4SLinus Torvalds * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 121da177e4SLinus Torvalds * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 1329c185e5SPaul Mundt * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 16b1de0d13SMitchel Humpherys #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 17b1de0d13SMitchel Humpherys 18b95f1b31SPaul Gortmaker #include <linux/export.h> 191da177e4SLinus Torvalds #include <linux/mm.h> 206e84f315SIngo Molnar #include <linux/sched/mm.h> 21615d6e87SDavidlohr Bueso #include <linux/vmacache.h> 221da177e4SLinus Torvalds #include <linux/mman.h> 231da177e4SLinus Torvalds #include <linux/swap.h> 241da177e4SLinus Torvalds #include <linux/file.h> 251da177e4SLinus Torvalds #include <linux/highmem.h> 261da177e4SLinus Torvalds #include <linux/pagemap.h> 271da177e4SLinus Torvalds #include <linux/slab.h> 281da177e4SLinus Torvalds #include <linux/vmalloc.h> 291da177e4SLinus Torvalds #include <linux/blkdev.h> 301da177e4SLinus Torvalds #include <linux/backing-dev.h> 313b32123dSGideon Israel Dsouza #include <linux/compiler.h> 321da177e4SLinus Torvalds #include <linux/mount.h> 331da177e4SLinus Torvalds #include <linux/personality.h> 341da177e4SLinus Torvalds #include <linux/security.h> 351da177e4SLinus Torvalds #include <linux/syscalls.h> 36120a795dSAl Viro #include <linux/audit.h> 37b1de0d13SMitchel Humpherys #include <linux/printk.h> 381da177e4SLinus Torvalds 397c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 401da177e4SLinus Torvalds #include <asm/tlb.h> 411da177e4SLinus Torvalds #include <asm/tlbflush.h> 42eb8cdec4SBernd Schmidt #include <asm/mmu_context.h> 438feae131SDavid Howells #include "internal.h" 448feae131SDavid Howells 451da177e4SLinus Torvalds void *high_memory; 46944b6874SArnd Bergmann EXPORT_SYMBOL(high_memory); 471da177e4SLinus Torvalds struct page *mem_map; 481da177e4SLinus Torvalds unsigned long max_mapnr; 495b8bf307Sgchen gchen EXPORT_SYMBOL(max_mapnr); 504266c97aSHugh Dickins unsigned long highest_memmap_pfn; 51fc4d5c29SDavid Howells int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 521da177e4SLinus Torvalds int heap_stack_gap = 0; 531da177e4SLinus Torvalds 5433e5d769SDavid Howells atomic_long_t mmap_pages_allocated; 558feae131SDavid Howells 561da177e4SLinus Torvalds EXPORT_SYMBOL(mem_map); 571da177e4SLinus Torvalds 588feae131SDavid Howells /* list of mapped, potentially shareable regions */ 598feae131SDavid Howells static struct kmem_cache *vm_region_jar; 608feae131SDavid Howells struct rb_root nommu_region_tree = RB_ROOT; 618feae131SDavid Howells DECLARE_RWSEM(nommu_region_sem); 621da177e4SLinus Torvalds 63f0f37e2fSAlexey Dobriyan const struct vm_operations_struct generic_file_vm_ops = { 641da177e4SLinus Torvalds }; 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds /* 671da177e4SLinus Torvalds * Return the total memory allocated for this pointer, not 681da177e4SLinus Torvalds * just what the caller asked for. 691da177e4SLinus Torvalds * 701da177e4SLinus Torvalds * Doesn't have to be accurate, i.e. may have races. 711da177e4SLinus Torvalds */ 721da177e4SLinus Torvalds unsigned int kobjsize(const void *objp) 731da177e4SLinus Torvalds { 741da177e4SLinus Torvalds struct page *page; 751da177e4SLinus Torvalds 764016a139SMichael Hennerich /* 774016a139SMichael Hennerich * If the object we have should not have ksize performed on it, 784016a139SMichael Hennerich * return size of 0 794016a139SMichael Hennerich */ 805a1603beSPaul Mundt if (!objp || !virt_addr_valid(objp)) 816cfd53fcSPaul Mundt return 0; 826cfd53fcSPaul Mundt 836cfd53fcSPaul Mundt page = virt_to_head_page(objp); 846cfd53fcSPaul Mundt 856cfd53fcSPaul Mundt /* 866cfd53fcSPaul Mundt * If the allocator sets PageSlab, we know the pointer came from 876cfd53fcSPaul Mundt * kmalloc(). 886cfd53fcSPaul Mundt */ 891da177e4SLinus Torvalds if (PageSlab(page)) 901da177e4SLinus Torvalds return ksize(objp); 911da177e4SLinus Torvalds 926cfd53fcSPaul Mundt /* 93ab2e83eaSPaul Mundt * If it's not a compound page, see if we have a matching VMA 94ab2e83eaSPaul Mundt * region. This test is intentionally done in reverse order, 95ab2e83eaSPaul Mundt * so if there's no VMA, we still fall through and hand back 96ab2e83eaSPaul Mundt * PAGE_SIZE for 0-order pages. 97ab2e83eaSPaul Mundt */ 98ab2e83eaSPaul Mundt if (!PageCompound(page)) { 99ab2e83eaSPaul Mundt struct vm_area_struct *vma; 100ab2e83eaSPaul Mundt 101ab2e83eaSPaul Mundt vma = find_vma(current->mm, (unsigned long)objp); 102ab2e83eaSPaul Mundt if (vma) 103ab2e83eaSPaul Mundt return vma->vm_end - vma->vm_start; 104ab2e83eaSPaul Mundt } 105ab2e83eaSPaul Mundt 106ab2e83eaSPaul Mundt /* 1076cfd53fcSPaul Mundt * The ksize() function is only guaranteed to work for pointers 1085a1603beSPaul Mundt * returned by kmalloc(). So handle arbitrary pointers here. 1096cfd53fcSPaul Mundt */ 1105a1603beSPaul Mundt return PAGE_SIZE << compound_order(page); 1111da177e4SLinus Torvalds } 1121da177e4SLinus Torvalds 1130d731759SLorenzo Stoakes static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 11428a35716SMichel Lespinasse unsigned long start, unsigned long nr_pages, 11528a35716SMichel Lespinasse unsigned int foll_flags, struct page **pages, 11628a35716SMichel Lespinasse struct vm_area_struct **vmas, int *nonblocking) 1171da177e4SLinus Torvalds { 118910e46daSSonic Zhang struct vm_area_struct *vma; 1197b4d5b8bSDavid Howells unsigned long vm_flags; 1207b4d5b8bSDavid Howells int i; 1217b4d5b8bSDavid Howells 1227b4d5b8bSDavid Howells /* calculate required read or write permissions. 12358fa879eSHugh Dickins * If FOLL_FORCE is set, we only require the "MAY" flags. 1247b4d5b8bSDavid Howells */ 12558fa879eSHugh Dickins vm_flags = (foll_flags & FOLL_WRITE) ? 12658fa879eSHugh Dickins (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 12758fa879eSHugh Dickins vm_flags &= (foll_flags & FOLL_FORCE) ? 12858fa879eSHugh Dickins (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 1291da177e4SLinus Torvalds 1309d73777eSPeter Zijlstra for (i = 0; i < nr_pages; i++) { 1317561e8caSDavid Howells vma = find_vma(mm, start); 132910e46daSSonic Zhang if (!vma) 1337b4d5b8bSDavid Howells goto finish_or_fault; 1347b4d5b8bSDavid Howells 1357b4d5b8bSDavid Howells /* protect what we can, including chardevs */ 1361c3aff1cSHugh Dickins if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || 1371c3aff1cSHugh Dickins !(vm_flags & vma->vm_flags)) 1387b4d5b8bSDavid Howells goto finish_or_fault; 139910e46daSSonic Zhang 1401da177e4SLinus Torvalds if (pages) { 1411da177e4SLinus Torvalds pages[i] = virt_to_page(start); 1421da177e4SLinus Torvalds if (pages[i]) 14309cbfeafSKirill A. Shutemov get_page(pages[i]); 1441da177e4SLinus Torvalds } 1451da177e4SLinus Torvalds if (vmas) 146910e46daSSonic Zhang vmas[i] = vma; 147e1ee65d8SDavid Howells start = (start + PAGE_SIZE) & PAGE_MASK; 1481da177e4SLinus Torvalds } 1497b4d5b8bSDavid Howells 1507b4d5b8bSDavid Howells return i; 1517b4d5b8bSDavid Howells 1527b4d5b8bSDavid Howells finish_or_fault: 1537b4d5b8bSDavid Howells return i ? : -EFAULT; 1541da177e4SLinus Torvalds } 155b291f000SNick Piggin 156b291f000SNick Piggin /* 157b291f000SNick Piggin * get a list of pages in an address range belonging to the specified process 158b291f000SNick Piggin * and indicate the VMA that covers each page 159b291f000SNick Piggin * - this is potentially dodgy as we may end incrementing the page count of a 160b291f000SNick Piggin * slab page or a secondary page from a compound page 161b291f000SNick Piggin * - don't permit access to VMAs that don't support it, such as I/O mappings 162b291f000SNick Piggin */ 163c12d2da5SIngo Molnar long get_user_pages(unsigned long start, unsigned long nr_pages, 164768ae309SLorenzo Stoakes unsigned int gup_flags, struct page **pages, 16528a35716SMichel Lespinasse struct vm_area_struct **vmas) 166b291f000SNick Piggin { 167768ae309SLorenzo Stoakes return __get_user_pages(current, current->mm, start, nr_pages, 168768ae309SLorenzo Stoakes gup_flags, pages, vmas, NULL); 169b291f000SNick Piggin } 170c12d2da5SIngo Molnar EXPORT_SYMBOL(get_user_pages); 17166aa2b4bSGreg Ungerer 172c12d2da5SIngo Molnar long get_user_pages_locked(unsigned long start, unsigned long nr_pages, 1733b913179SLorenzo Stoakes unsigned int gup_flags, struct page **pages, 174f0818f47SAndrea Arcangeli int *locked) 175f0818f47SAndrea Arcangeli { 176768ae309SLorenzo Stoakes return get_user_pages(start, nr_pages, gup_flags, pages, NULL); 177f0818f47SAndrea Arcangeli } 178c12d2da5SIngo Molnar EXPORT_SYMBOL(get_user_pages_locked); 179f0818f47SAndrea Arcangeli 1808b7457efSLorenzo Stoakes static long __get_user_pages_unlocked(struct task_struct *tsk, 1818b7457efSLorenzo Stoakes struct mm_struct *mm, unsigned long start, 1828b7457efSLorenzo Stoakes unsigned long nr_pages, struct page **pages, 1838b7457efSLorenzo Stoakes unsigned int gup_flags) 184f0818f47SAndrea Arcangeli { 185f0818f47SAndrea Arcangeli long ret; 186f0818f47SAndrea Arcangeli down_read(&mm->mmap_sem); 187cde70140SDave Hansen ret = __get_user_pages(tsk, mm, start, nr_pages, gup_flags, pages, 188cde70140SDave Hansen NULL, NULL); 189f0818f47SAndrea Arcangeli up_read(&mm->mmap_sem); 190f0818f47SAndrea Arcangeli return ret; 191f0818f47SAndrea Arcangeli } 1920fd71a56SAndrea Arcangeli 193c12d2da5SIngo Molnar long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, 194c164154fSLorenzo Stoakes struct page **pages, unsigned int gup_flags) 1950fd71a56SAndrea Arcangeli { 196cde70140SDave Hansen return __get_user_pages_unlocked(current, current->mm, start, nr_pages, 197c164154fSLorenzo Stoakes pages, gup_flags); 1980fd71a56SAndrea Arcangeli } 199c12d2da5SIngo Molnar EXPORT_SYMBOL(get_user_pages_unlocked); 200f0818f47SAndrea Arcangeli 201dfc2f91aSPaul Mundt /** 202dfc2f91aSPaul Mundt * follow_pfn - look up PFN at a user virtual address 203dfc2f91aSPaul Mundt * @vma: memory mapping 204dfc2f91aSPaul Mundt * @address: user virtual address 205dfc2f91aSPaul Mundt * @pfn: location to store found PFN 206dfc2f91aSPaul Mundt * 207dfc2f91aSPaul Mundt * Only IO mappings and raw PFN mappings are allowed. 208dfc2f91aSPaul Mundt * 209dfc2f91aSPaul Mundt * Returns zero and the pfn at @pfn on success, -ve otherwise. 210dfc2f91aSPaul Mundt */ 211dfc2f91aSPaul Mundt int follow_pfn(struct vm_area_struct *vma, unsigned long address, 212dfc2f91aSPaul Mundt unsigned long *pfn) 213dfc2f91aSPaul Mundt { 214dfc2f91aSPaul Mundt if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) 215dfc2f91aSPaul Mundt return -EINVAL; 216dfc2f91aSPaul Mundt 217dfc2f91aSPaul Mundt *pfn = address >> PAGE_SHIFT; 218dfc2f91aSPaul Mundt return 0; 219dfc2f91aSPaul Mundt } 220dfc2f91aSPaul Mundt EXPORT_SYMBOL(follow_pfn); 221dfc2f91aSPaul Mundt 222f1c4069eSJoonsoo Kim LIST_HEAD(vmap_area_list); 2231da177e4SLinus Torvalds 224b3bdda02SChristoph Lameter void vfree(const void *addr) 2251da177e4SLinus Torvalds { 2261da177e4SLinus Torvalds kfree(addr); 2271da177e4SLinus Torvalds } 228b5073173SPaul Mundt EXPORT_SYMBOL(vfree); 2291da177e4SLinus Torvalds 230dd0fc66fSAl Viro void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 2311da177e4SLinus Torvalds { 2321da177e4SLinus Torvalds /* 2338518609dSRobert P. J. Day * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 2348518609dSRobert P. J. Day * returns only a logical address. 2351da177e4SLinus Torvalds */ 23684097518SNick Piggin return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 2371da177e4SLinus Torvalds } 238b5073173SPaul Mundt EXPORT_SYMBOL(__vmalloc); 2391da177e4SLinus Torvalds 240a7c3e901SMichal Hocko void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags) 241a7c3e901SMichal Hocko { 242a7c3e901SMichal Hocko return __vmalloc(size, flags, PAGE_KERNEL); 243a7c3e901SMichal Hocko } 244a7c3e901SMichal Hocko 245f905bc44SPaul Mundt void *vmalloc_user(unsigned long size) 246f905bc44SPaul Mundt { 247f905bc44SPaul Mundt void *ret; 248f905bc44SPaul Mundt 24919809c2dSMichal Hocko ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 250f905bc44SPaul Mundt if (ret) { 251f905bc44SPaul Mundt struct vm_area_struct *vma; 252f905bc44SPaul Mundt 253f905bc44SPaul Mundt down_write(¤t->mm->mmap_sem); 254f905bc44SPaul Mundt vma = find_vma(current->mm, (unsigned long)ret); 255f905bc44SPaul Mundt if (vma) 256f905bc44SPaul Mundt vma->vm_flags |= VM_USERMAP; 257f905bc44SPaul Mundt up_write(¤t->mm->mmap_sem); 258f905bc44SPaul Mundt } 259f905bc44SPaul Mundt 260f905bc44SPaul Mundt return ret; 261f905bc44SPaul Mundt } 262f905bc44SPaul Mundt EXPORT_SYMBOL(vmalloc_user); 263f905bc44SPaul Mundt 264b3bdda02SChristoph Lameter struct page *vmalloc_to_page(const void *addr) 2651da177e4SLinus Torvalds { 2661da177e4SLinus Torvalds return virt_to_page(addr); 2671da177e4SLinus Torvalds } 268b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_to_page); 2691da177e4SLinus Torvalds 270b3bdda02SChristoph Lameter unsigned long vmalloc_to_pfn(const void *addr) 2711da177e4SLinus Torvalds { 2721da177e4SLinus Torvalds return page_to_pfn(virt_to_page(addr)); 2731da177e4SLinus Torvalds } 274b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_to_pfn); 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds long vread(char *buf, char *addr, unsigned long count) 2771da177e4SLinus Torvalds { 2789bde916bSChen Gang /* Don't allow overflow */ 2799bde916bSChen Gang if ((unsigned long) buf + count < count) 2809bde916bSChen Gang count = -(unsigned long) buf; 2819bde916bSChen Gang 2821da177e4SLinus Torvalds memcpy(buf, addr, count); 2831da177e4SLinus Torvalds return count; 2841da177e4SLinus Torvalds } 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds long vwrite(char *buf, char *addr, unsigned long count) 2871da177e4SLinus Torvalds { 2881da177e4SLinus Torvalds /* Don't allow overflow */ 2891da177e4SLinus Torvalds if ((unsigned long) addr + count < count) 2901da177e4SLinus Torvalds count = -(unsigned long) addr; 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds memcpy(addr, buf, count); 293ac714904SChoi Gi-yong return count; 2941da177e4SLinus Torvalds } 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds /* 297e1c05067SMasahiro Yamada * vmalloc - allocate virtually contiguous memory 2981da177e4SLinus Torvalds * 2991da177e4SLinus Torvalds * @size: allocation size 3001da177e4SLinus Torvalds * 3011da177e4SLinus Torvalds * Allocate enough pages to cover @size from the page level 302e1c05067SMasahiro Yamada * allocator and map them into contiguous kernel virtual space. 3031da177e4SLinus Torvalds * 304c1c8897fSMichael Opdenacker * For tight control over page level allocator and protection flags 3051da177e4SLinus Torvalds * use __vmalloc() instead. 3061da177e4SLinus Torvalds */ 3071da177e4SLinus Torvalds void *vmalloc(unsigned long size) 3081da177e4SLinus Torvalds { 3091da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 3101da177e4SLinus Torvalds } 311f6138882SAndrew Morton EXPORT_SYMBOL(vmalloc); 312f6138882SAndrew Morton 313e1ca7788SDave Young /* 314e1c05067SMasahiro Yamada * vzalloc - allocate virtually contiguous memory with zero fill 315e1ca7788SDave Young * 316e1ca7788SDave Young * @size: allocation size 317e1ca7788SDave Young * 318e1ca7788SDave Young * Allocate enough pages to cover @size from the page level 319e1c05067SMasahiro Yamada * allocator and map them into contiguous kernel virtual space. 320e1ca7788SDave Young * The memory allocated is set to zero. 321e1ca7788SDave Young * 322e1ca7788SDave Young * For tight control over page level allocator and protection flags 323e1ca7788SDave Young * use __vmalloc() instead. 324e1ca7788SDave Young */ 325e1ca7788SDave Young void *vzalloc(unsigned long size) 326e1ca7788SDave Young { 327e1ca7788SDave Young return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 328e1ca7788SDave Young PAGE_KERNEL); 329e1ca7788SDave Young } 330e1ca7788SDave Young EXPORT_SYMBOL(vzalloc); 331e1ca7788SDave Young 332e1ca7788SDave Young /** 333e1ca7788SDave Young * vmalloc_node - allocate memory on a specific node 334e1ca7788SDave Young * @size: allocation size 335e1ca7788SDave Young * @node: numa node 336e1ca7788SDave Young * 337e1ca7788SDave Young * Allocate enough pages to cover @size from the page level 338e1ca7788SDave Young * allocator and map them into contiguous kernel virtual space. 339e1ca7788SDave Young * 340e1ca7788SDave Young * For tight control over page level allocator and protection flags 341e1ca7788SDave Young * use __vmalloc() instead. 342e1ca7788SDave Young */ 343f6138882SAndrew Morton void *vmalloc_node(unsigned long size, int node) 344f6138882SAndrew Morton { 345f6138882SAndrew Morton return vmalloc(size); 346f6138882SAndrew Morton } 3479a14f653SPaul Mundt EXPORT_SYMBOL(vmalloc_node); 348e1ca7788SDave Young 349e1ca7788SDave Young /** 350e1ca7788SDave Young * vzalloc_node - allocate memory on a specific node with zero fill 351e1ca7788SDave Young * @size: allocation size 352e1ca7788SDave Young * @node: numa node 353e1ca7788SDave Young * 354e1ca7788SDave Young * Allocate enough pages to cover @size from the page level 355e1ca7788SDave Young * allocator and map them into contiguous kernel virtual space. 356e1ca7788SDave Young * The memory allocated is set to zero. 357e1ca7788SDave Young * 358e1ca7788SDave Young * For tight control over page level allocator and protection flags 359e1ca7788SDave Young * use __vmalloc() instead. 360e1ca7788SDave Young */ 361e1ca7788SDave Young void *vzalloc_node(unsigned long size, int node) 362e1ca7788SDave Young { 363e1ca7788SDave Young return vzalloc(size); 364e1ca7788SDave Young } 365e1ca7788SDave Young EXPORT_SYMBOL(vzalloc_node); 3661da177e4SLinus Torvalds 3671af446edSPaul Mundt /** 3681af446edSPaul Mundt * vmalloc_exec - allocate virtually contiguous, executable memory 3691af446edSPaul Mundt * @size: allocation size 3701af446edSPaul Mundt * 3711af446edSPaul Mundt * Kernel-internal function to allocate enough pages to cover @size 3721af446edSPaul Mundt * the page level allocator and map them into contiguous and 3731af446edSPaul Mundt * executable kernel virtual space. 3741af446edSPaul Mundt * 3751af446edSPaul Mundt * For tight control over page level allocator and protection flags 3761af446edSPaul Mundt * use __vmalloc() instead. 3771af446edSPaul Mundt */ 3781af446edSPaul Mundt 3791af446edSPaul Mundt void *vmalloc_exec(unsigned long size) 3801af446edSPaul Mundt { 3811af446edSPaul Mundt return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 3821af446edSPaul Mundt } 3831af446edSPaul Mundt 384b5073173SPaul Mundt /** 385b5073173SPaul Mundt * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 3861da177e4SLinus Torvalds * @size: allocation size 3871da177e4SLinus Torvalds * 3881da177e4SLinus Torvalds * Allocate enough 32bit PA addressable pages to cover @size from the 389e1c05067SMasahiro Yamada * page level allocator and map them into contiguous kernel virtual space. 3901da177e4SLinus Torvalds */ 3911da177e4SLinus Torvalds void *vmalloc_32(unsigned long size) 3921da177e4SLinus Torvalds { 3931da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 3941da177e4SLinus Torvalds } 395b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_32); 396b5073173SPaul Mundt 397b5073173SPaul Mundt /** 398b5073173SPaul Mundt * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 399b5073173SPaul Mundt * @size: allocation size 400b5073173SPaul Mundt * 401b5073173SPaul Mundt * The resulting memory area is 32bit addressable and zeroed so it can be 402b5073173SPaul Mundt * mapped to userspace without leaking data. 403f905bc44SPaul Mundt * 404f905bc44SPaul Mundt * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 405f905bc44SPaul Mundt * remap_vmalloc_range() are permissible. 406b5073173SPaul Mundt */ 407b5073173SPaul Mundt void *vmalloc_32_user(unsigned long size) 408b5073173SPaul Mundt { 409f905bc44SPaul Mundt /* 410f905bc44SPaul Mundt * We'll have to sort out the ZONE_DMA bits for 64-bit, 411f905bc44SPaul Mundt * but for now this can simply use vmalloc_user() directly. 412f905bc44SPaul Mundt */ 413f905bc44SPaul Mundt return vmalloc_user(size); 414b5073173SPaul Mundt } 415b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_32_user); 4161da177e4SLinus Torvalds 4171da177e4SLinus Torvalds void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 4181da177e4SLinus Torvalds { 4191da177e4SLinus Torvalds BUG(); 4201da177e4SLinus Torvalds return NULL; 4211da177e4SLinus Torvalds } 422b5073173SPaul Mundt EXPORT_SYMBOL(vmap); 4231da177e4SLinus Torvalds 424b3bdda02SChristoph Lameter void vunmap(const void *addr) 4251da177e4SLinus Torvalds { 4261da177e4SLinus Torvalds BUG(); 4271da177e4SLinus Torvalds } 428b5073173SPaul Mundt EXPORT_SYMBOL(vunmap); 4291da177e4SLinus Torvalds 430eb6434d9SPaul Mundt void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 431eb6434d9SPaul Mundt { 432eb6434d9SPaul Mundt BUG(); 433eb6434d9SPaul Mundt return NULL; 434eb6434d9SPaul Mundt } 435eb6434d9SPaul Mundt EXPORT_SYMBOL(vm_map_ram); 436eb6434d9SPaul Mundt 437eb6434d9SPaul Mundt void vm_unmap_ram(const void *mem, unsigned int count) 438eb6434d9SPaul Mundt { 439eb6434d9SPaul Mundt BUG(); 440eb6434d9SPaul Mundt } 441eb6434d9SPaul Mundt EXPORT_SYMBOL(vm_unmap_ram); 442eb6434d9SPaul Mundt 443eb6434d9SPaul Mundt void vm_unmap_aliases(void) 444eb6434d9SPaul Mundt { 445eb6434d9SPaul Mundt } 446eb6434d9SPaul Mundt EXPORT_SYMBOL_GPL(vm_unmap_aliases); 447eb6434d9SPaul Mundt 4481da177e4SLinus Torvalds /* 4491eeb66a1SChristoph Hellwig * Implement a stub for vmalloc_sync_all() if the architecture chose not to 4501eeb66a1SChristoph Hellwig * have one. 4511eeb66a1SChristoph Hellwig */ 4523b32123dSGideon Israel Dsouza void __weak vmalloc_sync_all(void) 4531eeb66a1SChristoph Hellwig { 4541eeb66a1SChristoph Hellwig } 4551eeb66a1SChristoph Hellwig 456cd12909cSDavid Vrabel struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes) 45729c185e5SPaul Mundt { 45829c185e5SPaul Mundt BUG(); 45929c185e5SPaul Mundt return NULL; 46029c185e5SPaul Mundt } 46129c185e5SPaul Mundt EXPORT_SYMBOL_GPL(alloc_vm_area); 46229c185e5SPaul Mundt 46329c185e5SPaul Mundt void free_vm_area(struct vm_struct *area) 46429c185e5SPaul Mundt { 46529c185e5SPaul Mundt BUG(); 46629c185e5SPaul Mundt } 46729c185e5SPaul Mundt EXPORT_SYMBOL_GPL(free_vm_area); 46829c185e5SPaul Mundt 469b5073173SPaul Mundt int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 470b5073173SPaul Mundt struct page *page) 471b5073173SPaul Mundt { 472b5073173SPaul Mundt return -EINVAL; 473b5073173SPaul Mundt } 474b5073173SPaul Mundt EXPORT_SYMBOL(vm_insert_page); 475b5073173SPaul Mundt 476*a667d745SSouptick Joarder int vm_map_pages(struct vm_area_struct *vma, struct page **pages, 477*a667d745SSouptick Joarder unsigned long num) 478*a667d745SSouptick Joarder { 479*a667d745SSouptick Joarder return -EINVAL; 480*a667d745SSouptick Joarder } 481*a667d745SSouptick Joarder EXPORT_SYMBOL(vm_map_pages); 482*a667d745SSouptick Joarder 483*a667d745SSouptick Joarder int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, 484*a667d745SSouptick Joarder unsigned long num) 485*a667d745SSouptick Joarder { 486*a667d745SSouptick Joarder return -EINVAL; 487*a667d745SSouptick Joarder } 488*a667d745SSouptick Joarder EXPORT_SYMBOL(vm_map_pages_zero); 489*a667d745SSouptick Joarder 4901eeb66a1SChristoph Hellwig /* 4911da177e4SLinus Torvalds * sys_brk() for the most part doesn't need the global kernel 4921da177e4SLinus Torvalds * lock, except when an application is doing something nasty 4931da177e4SLinus Torvalds * like trying to un-brk an area that has already been mapped 4941da177e4SLinus Torvalds * to a regular file. in this case, the unmapping will need 4951da177e4SLinus Torvalds * to invoke file system routines that need the global lock. 4961da177e4SLinus Torvalds */ 4976a6160a7SHeiko Carstens SYSCALL_DEFINE1(brk, unsigned long, brk) 4981da177e4SLinus Torvalds { 4991da177e4SLinus Torvalds struct mm_struct *mm = current->mm; 5001da177e4SLinus Torvalds 5011da177e4SLinus Torvalds if (brk < mm->start_brk || brk > mm->context.end_brk) 5021da177e4SLinus Torvalds return mm->brk; 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds if (mm->brk == brk) 5051da177e4SLinus Torvalds return mm->brk; 5061da177e4SLinus Torvalds 5071da177e4SLinus Torvalds /* 5081da177e4SLinus Torvalds * Always allow shrinking brk 5091da177e4SLinus Torvalds */ 5101da177e4SLinus Torvalds if (brk <= mm->brk) { 5111da177e4SLinus Torvalds mm->brk = brk; 5121da177e4SLinus Torvalds return brk; 5131da177e4SLinus Torvalds } 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds /* 5161da177e4SLinus Torvalds * Ok, looks good - let it rip. 5171da177e4SLinus Torvalds */ 518cfe79c00SMike Frysinger flush_icache_range(mm->brk, brk); 5191da177e4SLinus Torvalds return mm->brk = brk; 5201da177e4SLinus Torvalds } 5211da177e4SLinus Torvalds 5228feae131SDavid Howells /* 5233edf41d8Sseokhoon.yoon * initialise the percpu counter for VM and region record slabs 5248feae131SDavid Howells */ 5258feae131SDavid Howells void __init mmap_init(void) 5261da177e4SLinus Torvalds { 52700a62ce9SKOSAKI Motohiro int ret; 52800a62ce9SKOSAKI Motohiro 529908c7f19STejun Heo ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 53000a62ce9SKOSAKI Motohiro VM_BUG_ON(ret); 5315d097056SVladimir Davydov vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); 5328feae131SDavid Howells } 5331da177e4SLinus Torvalds 5348feae131SDavid Howells /* 5358feae131SDavid Howells * validate the region tree 5368feae131SDavid Howells * - the caller must hold the region lock 5378feae131SDavid Howells */ 5388feae131SDavid Howells #ifdef CONFIG_DEBUG_NOMMU_REGIONS 5398feae131SDavid Howells static noinline void validate_nommu_regions(void) 5408feae131SDavid Howells { 5418feae131SDavid Howells struct vm_region *region, *last; 5428feae131SDavid Howells struct rb_node *p, *lastp; 5431da177e4SLinus Torvalds 5448feae131SDavid Howells lastp = rb_first(&nommu_region_tree); 5458feae131SDavid Howells if (!lastp) 5468feae131SDavid Howells return; 5478feae131SDavid Howells 5488feae131SDavid Howells last = rb_entry(lastp, struct vm_region, vm_rb); 549c9427bc0SGeliang Tang BUG_ON(last->vm_end <= last->vm_start); 550c9427bc0SGeliang Tang BUG_ON(last->vm_top < last->vm_end); 5518feae131SDavid Howells 5528feae131SDavid Howells while ((p = rb_next(lastp))) { 5538feae131SDavid Howells region = rb_entry(p, struct vm_region, vm_rb); 5548feae131SDavid Howells last = rb_entry(lastp, struct vm_region, vm_rb); 5558feae131SDavid Howells 556c9427bc0SGeliang Tang BUG_ON(region->vm_end <= region->vm_start); 557c9427bc0SGeliang Tang BUG_ON(region->vm_top < region->vm_end); 558c9427bc0SGeliang Tang BUG_ON(region->vm_start < last->vm_top); 5598feae131SDavid Howells 5608feae131SDavid Howells lastp = p; 5611da177e4SLinus Torvalds } 5621da177e4SLinus Torvalds } 5638feae131SDavid Howells #else 56433e5d769SDavid Howells static void validate_nommu_regions(void) 56533e5d769SDavid Howells { 56633e5d769SDavid Howells } 5678feae131SDavid Howells #endif 5688feae131SDavid Howells 5698feae131SDavid Howells /* 5708feae131SDavid Howells * add a region into the global tree 5718feae131SDavid Howells */ 5728feae131SDavid Howells static void add_nommu_region(struct vm_region *region) 5738feae131SDavid Howells { 5748feae131SDavid Howells struct vm_region *pregion; 5758feae131SDavid Howells struct rb_node **p, *parent; 5768feae131SDavid Howells 5778feae131SDavid Howells validate_nommu_regions(); 5788feae131SDavid Howells 5798feae131SDavid Howells parent = NULL; 5808feae131SDavid Howells p = &nommu_region_tree.rb_node; 5818feae131SDavid Howells while (*p) { 5828feae131SDavid Howells parent = *p; 5838feae131SDavid Howells pregion = rb_entry(parent, struct vm_region, vm_rb); 5848feae131SDavid Howells if (region->vm_start < pregion->vm_start) 5858feae131SDavid Howells p = &(*p)->rb_left; 5868feae131SDavid Howells else if (region->vm_start > pregion->vm_start) 5878feae131SDavid Howells p = &(*p)->rb_right; 5888feae131SDavid Howells else if (pregion == region) 5898feae131SDavid Howells return; 5908feae131SDavid Howells else 5918feae131SDavid Howells BUG(); 5928feae131SDavid Howells } 5938feae131SDavid Howells 5948feae131SDavid Howells rb_link_node(®ion->vm_rb, parent, p); 5958feae131SDavid Howells rb_insert_color(®ion->vm_rb, &nommu_region_tree); 5968feae131SDavid Howells 5978feae131SDavid Howells validate_nommu_regions(); 5988feae131SDavid Howells } 5998feae131SDavid Howells 6008feae131SDavid Howells /* 6018feae131SDavid Howells * delete a region from the global tree 6028feae131SDavid Howells */ 6038feae131SDavid Howells static void delete_nommu_region(struct vm_region *region) 6048feae131SDavid Howells { 6058feae131SDavid Howells BUG_ON(!nommu_region_tree.rb_node); 6068feae131SDavid Howells 6078feae131SDavid Howells validate_nommu_regions(); 6088feae131SDavid Howells rb_erase(®ion->vm_rb, &nommu_region_tree); 6098feae131SDavid Howells validate_nommu_regions(); 6108feae131SDavid Howells } 6118feae131SDavid Howells 6128feae131SDavid Howells /* 6138feae131SDavid Howells * free a contiguous series of pages 6148feae131SDavid Howells */ 6158feae131SDavid Howells static void free_page_series(unsigned long from, unsigned long to) 6168feae131SDavid Howells { 6178feae131SDavid Howells for (; from < to; from += PAGE_SIZE) { 6188feae131SDavid Howells struct page *page = virt_to_page(from); 6198feae131SDavid Howells 62033e5d769SDavid Howells atomic_long_dec(&mmap_pages_allocated); 6218feae131SDavid Howells put_page(page); 6228feae131SDavid Howells } 6238feae131SDavid Howells } 6248feae131SDavid Howells 6258feae131SDavid Howells /* 6268feae131SDavid Howells * release a reference to a region 62733e5d769SDavid Howells * - the caller must hold the region semaphore for writing, which this releases 628dd8632a1SPaul Mundt * - the region may not have been added to the tree yet, in which case vm_top 6298feae131SDavid Howells * will equal vm_start 6308feae131SDavid Howells */ 6318feae131SDavid Howells static void __put_nommu_region(struct vm_region *region) 6328feae131SDavid Howells __releases(nommu_region_sem) 6338feae131SDavid Howells { 6348feae131SDavid Howells BUG_ON(!nommu_region_tree.rb_node); 6358feae131SDavid Howells 6361e2ae599SDavid Howells if (--region->vm_usage == 0) { 637dd8632a1SPaul Mundt if (region->vm_top > region->vm_start) 6388feae131SDavid Howells delete_nommu_region(region); 6398feae131SDavid Howells up_write(&nommu_region_sem); 6408feae131SDavid Howells 6418feae131SDavid Howells if (region->vm_file) 6428feae131SDavid Howells fput(region->vm_file); 6438feae131SDavid Howells 6448feae131SDavid Howells /* IO memory and memory shared directly out of the pagecache 6458feae131SDavid Howells * from ramfs/tmpfs mustn't be released here */ 64622cc877bSLeon Romanovsky if (region->vm_flags & VM_MAPPED_COPY) 647dd8632a1SPaul Mundt free_page_series(region->vm_start, region->vm_top); 6488feae131SDavid Howells kmem_cache_free(vm_region_jar, region); 6498feae131SDavid Howells } else { 6508feae131SDavid Howells up_write(&nommu_region_sem); 6518feae131SDavid Howells } 6528feae131SDavid Howells } 6538feae131SDavid Howells 6548feae131SDavid Howells /* 6558feae131SDavid Howells * release a reference to a region 6568feae131SDavid Howells */ 6578feae131SDavid Howells static void put_nommu_region(struct vm_region *region) 6588feae131SDavid Howells { 6598feae131SDavid Howells down_write(&nommu_region_sem); 6608feae131SDavid Howells __put_nommu_region(region); 6618feae131SDavid Howells } 6621da177e4SLinus Torvalds 6633034097aSDavid Howells /* 6643034097aSDavid Howells * add a VMA into a process's mm_struct in the appropriate place in the list 6658feae131SDavid Howells * and tree and add to the address space's page tree also if not an anonymous 6668feae131SDavid Howells * page 6673034097aSDavid Howells * - should be called with mm->mmap_sem held writelocked 6683034097aSDavid Howells */ 6698feae131SDavid Howells static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) 6703034097aSDavid Howells { 6716038def0SNamhyung Kim struct vm_area_struct *pvma, *prev; 6728feae131SDavid Howells struct address_space *mapping; 6736038def0SNamhyung Kim struct rb_node **p, *parent, *rb_prev; 6743034097aSDavid Howells 6758feae131SDavid Howells BUG_ON(!vma->vm_region); 6768feae131SDavid Howells 6778feae131SDavid Howells mm->map_count++; 6788feae131SDavid Howells vma->vm_mm = mm; 6798feae131SDavid Howells 6808feae131SDavid Howells /* add the VMA to the mapping */ 6818feae131SDavid Howells if (vma->vm_file) { 6828feae131SDavid Howells mapping = vma->vm_file->f_mapping; 6838feae131SDavid Howells 68483cde9e8SDavidlohr Bueso i_mmap_lock_write(mapping); 6858feae131SDavid Howells flush_dcache_mmap_lock(mapping); 6866b2dbba8SMichel Lespinasse vma_interval_tree_insert(vma, &mapping->i_mmap); 6878feae131SDavid Howells flush_dcache_mmap_unlock(mapping); 68883cde9e8SDavidlohr Bueso i_mmap_unlock_write(mapping); 6898feae131SDavid Howells } 6908feae131SDavid Howells 6918feae131SDavid Howells /* add the VMA to the tree */ 6926038def0SNamhyung Kim parent = rb_prev = NULL; 6938feae131SDavid Howells p = &mm->mm_rb.rb_node; 6948feae131SDavid Howells while (*p) { 6958feae131SDavid Howells parent = *p; 6968feae131SDavid Howells pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 6978feae131SDavid Howells 6988feae131SDavid Howells /* sort by: start addr, end addr, VMA struct addr in that order 6998feae131SDavid Howells * (the latter is necessary as we may get identical VMAs) */ 7008feae131SDavid Howells if (vma->vm_start < pvma->vm_start) 7018feae131SDavid Howells p = &(*p)->rb_left; 7026038def0SNamhyung Kim else if (vma->vm_start > pvma->vm_start) { 7036038def0SNamhyung Kim rb_prev = parent; 7048feae131SDavid Howells p = &(*p)->rb_right; 7056038def0SNamhyung Kim } else if (vma->vm_end < pvma->vm_end) 7068feae131SDavid Howells p = &(*p)->rb_left; 7076038def0SNamhyung Kim else if (vma->vm_end > pvma->vm_end) { 7086038def0SNamhyung Kim rb_prev = parent; 7098feae131SDavid Howells p = &(*p)->rb_right; 7106038def0SNamhyung Kim } else if (vma < pvma) 7118feae131SDavid Howells p = &(*p)->rb_left; 7126038def0SNamhyung Kim else if (vma > pvma) { 7136038def0SNamhyung Kim rb_prev = parent; 7148feae131SDavid Howells p = &(*p)->rb_right; 7156038def0SNamhyung Kim } else 7168feae131SDavid Howells BUG(); 7178feae131SDavid Howells } 7188feae131SDavid Howells 7198feae131SDavid Howells rb_link_node(&vma->vm_rb, parent, p); 7208feae131SDavid Howells rb_insert_color(&vma->vm_rb, &mm->mm_rb); 7218feae131SDavid Howells 7228feae131SDavid Howells /* add VMA to the VMA list also */ 7236038def0SNamhyung Kim prev = NULL; 7246038def0SNamhyung Kim if (rb_prev) 7256038def0SNamhyung Kim prev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); 7263034097aSDavid Howells 7276038def0SNamhyung Kim __vma_link_list(mm, vma, prev, parent); 7288feae131SDavid Howells } 7298feae131SDavid Howells 7308feae131SDavid Howells /* 7318feae131SDavid Howells * delete a VMA from its owning mm_struct and address space 7328feae131SDavid Howells */ 7338feae131SDavid Howells static void delete_vma_from_mm(struct vm_area_struct *vma) 7348feae131SDavid Howells { 735615d6e87SDavidlohr Bueso int i; 7368feae131SDavid Howells struct address_space *mapping; 7378feae131SDavid Howells struct mm_struct *mm = vma->vm_mm; 738615d6e87SDavidlohr Bueso struct task_struct *curr = current; 7398feae131SDavid Howells 7408feae131SDavid Howells mm->map_count--; 741615d6e87SDavidlohr Bueso for (i = 0; i < VMACACHE_SIZE; i++) { 742615d6e87SDavidlohr Bueso /* if the vma is cached, invalidate the entire cache */ 743314ff785SIngo Molnar if (curr->vmacache.vmas[i] == vma) { 744e020d5bdSSteven Miao vmacache_invalidate(mm); 745615d6e87SDavidlohr Bueso break; 746615d6e87SDavidlohr Bueso } 747615d6e87SDavidlohr Bueso } 7488feae131SDavid Howells 7498feae131SDavid Howells /* remove the VMA from the mapping */ 7508feae131SDavid Howells if (vma->vm_file) { 7518feae131SDavid Howells mapping = vma->vm_file->f_mapping; 7528feae131SDavid Howells 75383cde9e8SDavidlohr Bueso i_mmap_lock_write(mapping); 7548feae131SDavid Howells flush_dcache_mmap_lock(mapping); 7556b2dbba8SMichel Lespinasse vma_interval_tree_remove(vma, &mapping->i_mmap); 7568feae131SDavid Howells flush_dcache_mmap_unlock(mapping); 75783cde9e8SDavidlohr Bueso i_mmap_unlock_write(mapping); 7588feae131SDavid Howells } 7598feae131SDavid Howells 7608feae131SDavid Howells /* remove from the MM's tree and list */ 7618feae131SDavid Howells rb_erase(&vma->vm_rb, &mm->mm_rb); 762b951bf2cSNamhyung Kim 763b951bf2cSNamhyung Kim if (vma->vm_prev) 764b951bf2cSNamhyung Kim vma->vm_prev->vm_next = vma->vm_next; 765b951bf2cSNamhyung Kim else 766b951bf2cSNamhyung Kim mm->mmap = vma->vm_next; 767b951bf2cSNamhyung Kim 768b951bf2cSNamhyung Kim if (vma->vm_next) 769b951bf2cSNamhyung Kim vma->vm_next->vm_prev = vma->vm_prev; 7708feae131SDavid Howells } 7718feae131SDavid Howells 7728feae131SDavid Howells /* 7738feae131SDavid Howells * destroy a VMA record 7748feae131SDavid Howells */ 7758feae131SDavid Howells static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) 7768feae131SDavid Howells { 7778feae131SDavid Howells if (vma->vm_ops && vma->vm_ops->close) 7788feae131SDavid Howells vma->vm_ops->close(vma); 779e9714acfSKonstantin Khlebnikov if (vma->vm_file) 7808feae131SDavid Howells fput(vma->vm_file); 7818feae131SDavid Howells put_nommu_region(vma->vm_region); 7823928d4f5SLinus Torvalds vm_area_free(vma); 7833034097aSDavid Howells } 7843034097aSDavid Howells 7853034097aSDavid Howells /* 7863034097aSDavid Howells * look up the first VMA in which addr resides, NULL if none 7873034097aSDavid Howells * - should be called with mm->mmap_sem at least held readlocked 7883034097aSDavid Howells */ 7893034097aSDavid Howells struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 7903034097aSDavid Howells { 7918feae131SDavid Howells struct vm_area_struct *vma; 7923034097aSDavid Howells 7938feae131SDavid Howells /* check the cache first */ 794615d6e87SDavidlohr Bueso vma = vmacache_find(mm, addr); 795615d6e87SDavidlohr Bueso if (likely(vma)) 7968feae131SDavid Howells return vma; 7978feae131SDavid Howells 798e922c4c5SNamhyung Kim /* trawl the list (there may be multiple mappings in which addr 7998feae131SDavid Howells * resides) */ 800e922c4c5SNamhyung Kim for (vma = mm->mmap; vma; vma = vma->vm_next) { 8018feae131SDavid Howells if (vma->vm_start > addr) 8028feae131SDavid Howells return NULL; 8038feae131SDavid Howells if (vma->vm_end > addr) { 804615d6e87SDavidlohr Bueso vmacache_update(addr, vma); 8058feae131SDavid Howells return vma; 8063034097aSDavid Howells } 8078feae131SDavid Howells } 8083034097aSDavid Howells 8093034097aSDavid Howells return NULL; 8103034097aSDavid Howells } 8113034097aSDavid Howells EXPORT_SYMBOL(find_vma); 8123034097aSDavid Howells 8133034097aSDavid Howells /* 814930e652aSDavid Howells * find a VMA 815930e652aSDavid Howells * - we don't extend stack VMAs under NOMMU conditions 816930e652aSDavid Howells */ 817930e652aSDavid Howells struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 818930e652aSDavid Howells { 8197561e8caSDavid Howells return find_vma(mm, addr); 820930e652aSDavid Howells } 821930e652aSDavid Howells 8228feae131SDavid Howells /* 8238feae131SDavid Howells * expand a stack to a given address 8248feae131SDavid Howells * - not supported under NOMMU conditions 8258feae131SDavid Howells */ 82657c8f63eSGreg Ungerer int expand_stack(struct vm_area_struct *vma, unsigned long address) 82757c8f63eSGreg Ungerer { 82857c8f63eSGreg Ungerer return -ENOMEM; 82957c8f63eSGreg Ungerer } 83057c8f63eSGreg Ungerer 831930e652aSDavid Howells /* 8326fa5f80bSDavid Howells * look up the first VMA exactly that exactly matches addr 8336fa5f80bSDavid Howells * - should be called with mm->mmap_sem at least held readlocked 8346fa5f80bSDavid Howells */ 8358feae131SDavid Howells static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 8368feae131SDavid Howells unsigned long addr, 8378feae131SDavid Howells unsigned long len) 8381da177e4SLinus Torvalds { 8391da177e4SLinus Torvalds struct vm_area_struct *vma; 8408feae131SDavid Howells unsigned long end = addr + len; 8411da177e4SLinus Torvalds 8428feae131SDavid Howells /* check the cache first */ 843615d6e87SDavidlohr Bueso vma = vmacache_find_exact(mm, addr, end); 844615d6e87SDavidlohr Bueso if (vma) 8451da177e4SLinus Torvalds return vma; 8468feae131SDavid Howells 847e922c4c5SNamhyung Kim /* trawl the list (there may be multiple mappings in which addr 8488feae131SDavid Howells * resides) */ 849e922c4c5SNamhyung Kim for (vma = mm->mmap; vma; vma = vma->vm_next) { 8508feae131SDavid Howells if (vma->vm_start < addr) 8518feae131SDavid Howells continue; 8528feae131SDavid Howells if (vma->vm_start > addr) 8538feae131SDavid Howells return NULL; 8548feae131SDavid Howells if (vma->vm_end == end) { 855615d6e87SDavidlohr Bueso vmacache_update(addr, vma); 8568feae131SDavid Howells return vma; 8578feae131SDavid Howells } 8581da177e4SLinus Torvalds } 8591da177e4SLinus Torvalds 8601da177e4SLinus Torvalds return NULL; 8611da177e4SLinus Torvalds } 8621da177e4SLinus Torvalds 8633034097aSDavid Howells /* 8641da177e4SLinus Torvalds * determine whether a mapping should be permitted and, if so, what sort of 8651da177e4SLinus Torvalds * mapping we're capable of supporting 8661da177e4SLinus Torvalds */ 8671da177e4SLinus Torvalds static int validate_mmap_request(struct file *file, 8681da177e4SLinus Torvalds unsigned long addr, 8691da177e4SLinus Torvalds unsigned long len, 8701da177e4SLinus Torvalds unsigned long prot, 8711da177e4SLinus Torvalds unsigned long flags, 8721da177e4SLinus Torvalds unsigned long pgoff, 8731da177e4SLinus Torvalds unsigned long *_capabilities) 8741da177e4SLinus Torvalds { 8758feae131SDavid Howells unsigned long capabilities, rlen; 8761da177e4SLinus Torvalds int ret; 8771da177e4SLinus Torvalds 8781da177e4SLinus Torvalds /* do the simple checks first */ 87922cc877bSLeon Romanovsky if (flags & MAP_FIXED) 8801da177e4SLinus Torvalds return -EINVAL; 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds if ((flags & MAP_TYPE) != MAP_PRIVATE && 8831da177e4SLinus Torvalds (flags & MAP_TYPE) != MAP_SHARED) 8841da177e4SLinus Torvalds return -EINVAL; 8851da177e4SLinus Torvalds 886f81cff0dSMike Frysinger if (!len) 8871da177e4SLinus Torvalds return -EINVAL; 8881da177e4SLinus Torvalds 889f81cff0dSMike Frysinger /* Careful about overflows.. */ 8908feae131SDavid Howells rlen = PAGE_ALIGN(len); 8918feae131SDavid Howells if (!rlen || rlen > TASK_SIZE) 892f81cff0dSMike Frysinger return -ENOMEM; 893f81cff0dSMike Frysinger 8941da177e4SLinus Torvalds /* offset overflow? */ 8958feae131SDavid Howells if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff) 896f81cff0dSMike Frysinger return -EOVERFLOW; 8971da177e4SLinus Torvalds 8981da177e4SLinus Torvalds if (file) { 8991da177e4SLinus Torvalds /* files must support mmap */ 90072c2d531SAl Viro if (!file->f_op->mmap) 9011da177e4SLinus Torvalds return -ENODEV; 9021da177e4SLinus Torvalds 9031da177e4SLinus Torvalds /* work out if what we've got could possibly be shared 9041da177e4SLinus Torvalds * - we support chardevs that provide their own "memory" 9051da177e4SLinus Torvalds * - we support files/blockdevs that are memory backed 9061da177e4SLinus Torvalds */ 907b4caecd4SChristoph Hellwig if (file->f_op->mmap_capabilities) { 908b4caecd4SChristoph Hellwig capabilities = file->f_op->mmap_capabilities(file); 909b4caecd4SChristoph Hellwig } else { 9101da177e4SLinus Torvalds /* no explicit capabilities set, so assume some 9111da177e4SLinus Torvalds * defaults */ 912496ad9aaSAl Viro switch (file_inode(file)->i_mode & S_IFMT) { 9131da177e4SLinus Torvalds case S_IFREG: 9141da177e4SLinus Torvalds case S_IFBLK: 915b4caecd4SChristoph Hellwig capabilities = NOMMU_MAP_COPY; 9161da177e4SLinus Torvalds break; 9171da177e4SLinus Torvalds 9181da177e4SLinus Torvalds case S_IFCHR: 9191da177e4SLinus Torvalds capabilities = 920b4caecd4SChristoph Hellwig NOMMU_MAP_DIRECT | 921b4caecd4SChristoph Hellwig NOMMU_MAP_READ | 922b4caecd4SChristoph Hellwig NOMMU_MAP_WRITE; 9231da177e4SLinus Torvalds break; 9241da177e4SLinus Torvalds 9251da177e4SLinus Torvalds default: 9261da177e4SLinus Torvalds return -EINVAL; 9271da177e4SLinus Torvalds } 9281da177e4SLinus Torvalds } 9291da177e4SLinus Torvalds 9301da177e4SLinus Torvalds /* eliminate any capabilities that we can't support on this 9311da177e4SLinus Torvalds * device */ 9321da177e4SLinus Torvalds if (!file->f_op->get_unmapped_area) 933b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_DIRECT; 9346e242a1cSAl Viro if (!(file->f_mode & FMODE_CAN_READ)) 935b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_COPY; 9361da177e4SLinus Torvalds 93728d7a6aeSGraff Yang /* The file shall have been opened with read permission. */ 93828d7a6aeSGraff Yang if (!(file->f_mode & FMODE_READ)) 93928d7a6aeSGraff Yang return -EACCES; 94028d7a6aeSGraff Yang 9411da177e4SLinus Torvalds if (flags & MAP_SHARED) { 9421da177e4SLinus Torvalds /* do checks for writing, appending and locking */ 9431da177e4SLinus Torvalds if ((prot & PROT_WRITE) && 9441da177e4SLinus Torvalds !(file->f_mode & FMODE_WRITE)) 9451da177e4SLinus Torvalds return -EACCES; 9461da177e4SLinus Torvalds 947496ad9aaSAl Viro if (IS_APPEND(file_inode(file)) && 9481da177e4SLinus Torvalds (file->f_mode & FMODE_WRITE)) 9491da177e4SLinus Torvalds return -EACCES; 9501da177e4SLinus Torvalds 951d7a06983SJeff Layton if (locks_verify_locked(file)) 9521da177e4SLinus Torvalds return -EAGAIN; 9531da177e4SLinus Torvalds 954b4caecd4SChristoph Hellwig if (!(capabilities & NOMMU_MAP_DIRECT)) 9551da177e4SLinus Torvalds return -ENODEV; 9561da177e4SLinus Torvalds 9571da177e4SLinus Torvalds /* we mustn't privatise shared mappings */ 958b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_COPY; 959ac714904SChoi Gi-yong } else { 9601da177e4SLinus Torvalds /* we're going to read the file into private memory we 9611da177e4SLinus Torvalds * allocate */ 962b4caecd4SChristoph Hellwig if (!(capabilities & NOMMU_MAP_COPY)) 9631da177e4SLinus Torvalds return -ENODEV; 9641da177e4SLinus Torvalds 9651da177e4SLinus Torvalds /* we don't permit a private writable mapping to be 9661da177e4SLinus Torvalds * shared with the backing device */ 9671da177e4SLinus Torvalds if (prot & PROT_WRITE) 968b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_DIRECT; 9691da177e4SLinus Torvalds } 9701da177e4SLinus Torvalds 971b4caecd4SChristoph Hellwig if (capabilities & NOMMU_MAP_DIRECT) { 972b4caecd4SChristoph Hellwig if (((prot & PROT_READ) && !(capabilities & NOMMU_MAP_READ)) || 973b4caecd4SChristoph Hellwig ((prot & PROT_WRITE) && !(capabilities & NOMMU_MAP_WRITE)) || 974b4caecd4SChristoph Hellwig ((prot & PROT_EXEC) && !(capabilities & NOMMU_MAP_EXEC)) 9753c7b2045SBernd Schmidt ) { 976b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_DIRECT; 9773c7b2045SBernd Schmidt if (flags & MAP_SHARED) { 97822cc877bSLeon Romanovsky pr_warn("MAP_SHARED not completely supported on !MMU\n"); 9793c7b2045SBernd Schmidt return -EINVAL; 9803c7b2045SBernd Schmidt } 9813c7b2045SBernd Schmidt } 9823c7b2045SBernd Schmidt } 9833c7b2045SBernd Schmidt 9841da177e4SLinus Torvalds /* handle executable mappings and implied executable 9851da177e4SLinus Torvalds * mappings */ 98690f8572bSEric W. Biederman if (path_noexec(&file->f_path)) { 9871da177e4SLinus Torvalds if (prot & PROT_EXEC) 9881da177e4SLinus Torvalds return -EPERM; 989ac714904SChoi Gi-yong } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 9901da177e4SLinus Torvalds /* handle implication of PROT_EXEC by PROT_READ */ 9911da177e4SLinus Torvalds if (current->personality & READ_IMPLIES_EXEC) { 992b4caecd4SChristoph Hellwig if (capabilities & NOMMU_MAP_EXEC) 9931da177e4SLinus Torvalds prot |= PROT_EXEC; 9941da177e4SLinus Torvalds } 995ac714904SChoi Gi-yong } else if ((prot & PROT_READ) && 9961da177e4SLinus Torvalds (prot & PROT_EXEC) && 997b4caecd4SChristoph Hellwig !(capabilities & NOMMU_MAP_EXEC) 9981da177e4SLinus Torvalds ) { 9991da177e4SLinus Torvalds /* backing file is not executable, try to copy */ 1000b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_DIRECT; 10011da177e4SLinus Torvalds } 1002ac714904SChoi Gi-yong } else { 10031da177e4SLinus Torvalds /* anonymous mappings are always memory backed and can be 10041da177e4SLinus Torvalds * privately mapped 10051da177e4SLinus Torvalds */ 1006b4caecd4SChristoph Hellwig capabilities = NOMMU_MAP_COPY; 10071da177e4SLinus Torvalds 10081da177e4SLinus Torvalds /* handle PROT_EXEC implication by PROT_READ */ 10091da177e4SLinus Torvalds if ((prot & PROT_READ) && 10101da177e4SLinus Torvalds (current->personality & READ_IMPLIES_EXEC)) 10111da177e4SLinus Torvalds prot |= PROT_EXEC; 10121da177e4SLinus Torvalds } 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds /* allow the security API to have its say */ 1015e5467859SAl Viro ret = security_mmap_addr(addr); 1016e5467859SAl Viro if (ret < 0) 1017e5467859SAl Viro return ret; 10181da177e4SLinus Torvalds 10191da177e4SLinus Torvalds /* looks okay */ 10201da177e4SLinus Torvalds *_capabilities = capabilities; 10211da177e4SLinus Torvalds return 0; 10221da177e4SLinus Torvalds } 10231da177e4SLinus Torvalds 10241da177e4SLinus Torvalds /* 10251da177e4SLinus Torvalds * we've determined that we can make the mapping, now translate what we 10261da177e4SLinus Torvalds * now know into VMA flags 10271da177e4SLinus Torvalds */ 10281da177e4SLinus Torvalds static unsigned long determine_vm_flags(struct file *file, 10291da177e4SLinus Torvalds unsigned long prot, 10301da177e4SLinus Torvalds unsigned long flags, 10311da177e4SLinus Torvalds unsigned long capabilities) 10321da177e4SLinus Torvalds { 10331da177e4SLinus Torvalds unsigned long vm_flags; 10341da177e4SLinus Torvalds 1035e6bfb709SDave Hansen vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags); 10361da177e4SLinus Torvalds /* vm_flags |= mm->def_flags; */ 10371da177e4SLinus Torvalds 1038b4caecd4SChristoph Hellwig if (!(capabilities & NOMMU_MAP_DIRECT)) { 10391da177e4SLinus Torvalds /* attempt to share read-only copies of mapped file chunks */ 10403c7b2045SBernd Schmidt vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 10411da177e4SLinus Torvalds if (file && !(prot & PROT_WRITE)) 10421da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE; 10433c7b2045SBernd Schmidt } else { 10441da177e4SLinus Torvalds /* overlay a shareable mapping on the backing device or inode 10451da177e4SLinus Torvalds * if possible - used for chardevs, ramfs/tmpfs/shmfs and 10461da177e4SLinus Torvalds * romfs/cramfs */ 1047b4caecd4SChristoph Hellwig vm_flags |= VM_MAYSHARE | (capabilities & NOMMU_VMFLAGS); 10481da177e4SLinus Torvalds if (flags & MAP_SHARED) 10493c7b2045SBernd Schmidt vm_flags |= VM_SHARED; 10501da177e4SLinus Torvalds } 10511da177e4SLinus Torvalds 10521da177e4SLinus Torvalds /* refuse to let anyone share private mappings with this process if 10531da177e4SLinus Torvalds * it's being traced - otherwise breakpoints set in it may interfere 10541da177e4SLinus Torvalds * with another untraced process 10551da177e4SLinus Torvalds */ 1056a288eeccSTejun Heo if ((flags & MAP_PRIVATE) && current->ptrace) 10571da177e4SLinus Torvalds vm_flags &= ~VM_MAYSHARE; 10581da177e4SLinus Torvalds 10591da177e4SLinus Torvalds return vm_flags; 10601da177e4SLinus Torvalds } 10611da177e4SLinus Torvalds 10621da177e4SLinus Torvalds /* 10638feae131SDavid Howells * set up a shared mapping on a file (the driver or filesystem provides and 10648feae131SDavid Howells * pins the storage) 10651da177e4SLinus Torvalds */ 10668feae131SDavid Howells static int do_mmap_shared_file(struct vm_area_struct *vma) 10671da177e4SLinus Torvalds { 10681da177e4SLinus Torvalds int ret; 10691da177e4SLinus Torvalds 1070f74ac015SMiklos Szeredi ret = call_mmap(vma->vm_file, vma); 1071dd8632a1SPaul Mundt if (ret == 0) { 1072dd8632a1SPaul Mundt vma->vm_region->vm_top = vma->vm_region->vm_end; 1073645d83c5SDavid Howells return 0; 1074dd8632a1SPaul Mundt } 10751da177e4SLinus Torvalds if (ret != -ENOSYS) 10761da177e4SLinus Torvalds return ret; 10771da177e4SLinus Torvalds 10783fa30460SDavid Howells /* getting -ENOSYS indicates that direct mmap isn't possible (as 10793fa30460SDavid Howells * opposed to tried but failed) so we can only give a suitable error as 10803fa30460SDavid Howells * it's not possible to make a private copy if MAP_SHARED was given */ 10811da177e4SLinus Torvalds return -ENODEV; 10821da177e4SLinus Torvalds } 10831da177e4SLinus Torvalds 10841da177e4SLinus Torvalds /* 10851da177e4SLinus Torvalds * set up a private mapping or an anonymous shared mapping 10861da177e4SLinus Torvalds */ 10878feae131SDavid Howells static int do_mmap_private(struct vm_area_struct *vma, 10888feae131SDavid Howells struct vm_region *region, 1089645d83c5SDavid Howells unsigned long len, 1090645d83c5SDavid Howells unsigned long capabilities) 10911da177e4SLinus Torvalds { 1092dbc8358cSJoonsoo Kim unsigned long total, point; 10931da177e4SLinus Torvalds void *base; 10948feae131SDavid Howells int ret, order; 10951da177e4SLinus Torvalds 10961da177e4SLinus Torvalds /* invoke the file's mapping function so that it can keep track of 10971da177e4SLinus Torvalds * shared mappings on devices or memory 10981da177e4SLinus Torvalds * - VM_MAYSHARE will be set if it may attempt to share 10991da177e4SLinus Torvalds */ 1100b4caecd4SChristoph Hellwig if (capabilities & NOMMU_MAP_DIRECT) { 1101f74ac015SMiklos Szeredi ret = call_mmap(vma->vm_file, vma); 1102dd8632a1SPaul Mundt if (ret == 0) { 11031da177e4SLinus Torvalds /* shouldn't return success if we're not sharing */ 1104dd8632a1SPaul Mundt BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); 1105dd8632a1SPaul Mundt vma->vm_region->vm_top = vma->vm_region->vm_end; 1106645d83c5SDavid Howells return 0; 11071da177e4SLinus Torvalds } 1108dd8632a1SPaul Mundt if (ret != -ENOSYS) 1109dd8632a1SPaul Mundt return ret; 11101da177e4SLinus Torvalds 11111da177e4SLinus Torvalds /* getting an ENOSYS error indicates that direct mmap isn't 11121da177e4SLinus Torvalds * possible (as opposed to tried but failed) so we'll try to 11131da177e4SLinus Torvalds * make a private copy of the data and map that instead */ 11141da177e4SLinus Torvalds } 11151da177e4SLinus Torvalds 11168feae131SDavid Howells 11171da177e4SLinus Torvalds /* allocate some memory to hold the mapping 11181da177e4SLinus Torvalds * - note that this may not return a page-aligned address if the object 11191da177e4SLinus Torvalds * we're allocating is smaller than a page 11201da177e4SLinus Torvalds */ 1121f67d9b15SBob Liu order = get_order(len); 11228feae131SDavid Howells total = 1 << order; 1123f67d9b15SBob Liu point = len >> PAGE_SHIFT; 1124dd8632a1SPaul Mundt 1125dbc8358cSJoonsoo Kim /* we don't want to allocate a power-of-2 sized page set */ 112622cc877bSLeon Romanovsky if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) 1127dbc8358cSJoonsoo Kim total = point; 11288feae131SDavid Howells 1129da616534SJoonsoo Kim base = alloc_pages_exact(total << PAGE_SHIFT, GFP_KERNEL); 1130dbc8358cSJoonsoo Kim if (!base) 1131dbc8358cSJoonsoo Kim goto enomem; 11328feae131SDavid Howells 1133dbc8358cSJoonsoo Kim atomic_long_add(total, &mmap_pages_allocated); 1134dbc8358cSJoonsoo Kim 11358feae131SDavid Howells region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 11368feae131SDavid Howells region->vm_start = (unsigned long) base; 1137f67d9b15SBob Liu region->vm_end = region->vm_start + len; 1138dd8632a1SPaul Mundt region->vm_top = region->vm_start + (total << PAGE_SHIFT); 11398feae131SDavid Howells 11408feae131SDavid Howells vma->vm_start = region->vm_start; 11418feae131SDavid Howells vma->vm_end = region->vm_start + len; 11421da177e4SLinus Torvalds 11431da177e4SLinus Torvalds if (vma->vm_file) { 11441da177e4SLinus Torvalds /* read the contents of a file into the copy */ 11451da177e4SLinus Torvalds loff_t fpos; 11461da177e4SLinus Torvalds 11471da177e4SLinus Torvalds fpos = vma->vm_pgoff; 11481da177e4SLinus Torvalds fpos <<= PAGE_SHIFT; 11491da177e4SLinus Torvalds 1150b4bf802aSChristoph Hellwig ret = kernel_read(vma->vm_file, base, len, &fpos); 11511da177e4SLinus Torvalds if (ret < 0) 11521da177e4SLinus Torvalds goto error_free; 11531da177e4SLinus Torvalds 11541da177e4SLinus Torvalds /* clear the last little bit */ 1155f67d9b15SBob Liu if (ret < len) 1156f67d9b15SBob Liu memset(base + ret, 0, len - ret); 11571da177e4SLinus Torvalds 1158bfd40eafSKirill A. Shutemov } else { 1159bfd40eafSKirill A. Shutemov vma_set_anonymous(vma); 11601da177e4SLinus Torvalds } 11611da177e4SLinus Torvalds 11621da177e4SLinus Torvalds return 0; 11631da177e4SLinus Torvalds 11641da177e4SLinus Torvalds error_free: 11657223bb4aSNamhyung Kim free_page_series(region->vm_start, region->vm_top); 11668feae131SDavid Howells region->vm_start = vma->vm_start = 0; 11678feae131SDavid Howells region->vm_end = vma->vm_end = 0; 1168dd8632a1SPaul Mundt region->vm_top = 0; 11691da177e4SLinus Torvalds return ret; 11701da177e4SLinus Torvalds 11711da177e4SLinus Torvalds enomem: 1172b1de0d13SMitchel Humpherys pr_err("Allocation of length %lu from process %d (%s) failed\n", 117305ae6fa3SGreg Ungerer len, current->pid, current->comm); 11749af744d7SMichal Hocko show_free_areas(0, NULL); 11751da177e4SLinus Torvalds return -ENOMEM; 11761da177e4SLinus Torvalds } 11771da177e4SLinus Torvalds 11781da177e4SLinus Torvalds /* 11791da177e4SLinus Torvalds * handle mapping creation for uClinux 11801da177e4SLinus Torvalds */ 11811fcfd8dbSOleg Nesterov unsigned long do_mmap(struct file *file, 11821da177e4SLinus Torvalds unsigned long addr, 11831da177e4SLinus Torvalds unsigned long len, 11841da177e4SLinus Torvalds unsigned long prot, 11851da177e4SLinus Torvalds unsigned long flags, 11861fcfd8dbSOleg Nesterov vm_flags_t vm_flags, 1187bebeb3d6SMichel Lespinasse unsigned long pgoff, 1188897ab3e0SMike Rapoport unsigned long *populate, 1189897ab3e0SMike Rapoport struct list_head *uf) 11901da177e4SLinus Torvalds { 11918feae131SDavid Howells struct vm_area_struct *vma; 11928feae131SDavid Howells struct vm_region *region; 11931da177e4SLinus Torvalds struct rb_node *rb; 11941fcfd8dbSOleg Nesterov unsigned long capabilities, result; 11951da177e4SLinus Torvalds int ret; 11961da177e4SLinus Torvalds 119741badc15SMichel Lespinasse *populate = 0; 1198bebeb3d6SMichel Lespinasse 11991da177e4SLinus Torvalds /* decide whether we should attempt the mapping, and if so what sort of 12001da177e4SLinus Torvalds * mapping */ 12011da177e4SLinus Torvalds ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 12021da177e4SLinus Torvalds &capabilities); 120322cc877bSLeon Romanovsky if (ret < 0) 12041da177e4SLinus Torvalds return ret; 12051da177e4SLinus Torvalds 120606aab5a3SDavid Howells /* we ignore the address hint */ 120706aab5a3SDavid Howells addr = 0; 1208f67d9b15SBob Liu len = PAGE_ALIGN(len); 120906aab5a3SDavid Howells 12101da177e4SLinus Torvalds /* we've determined that we can make the mapping, now translate what we 12111da177e4SLinus Torvalds * now know into VMA flags */ 12121fcfd8dbSOleg Nesterov vm_flags |= determine_vm_flags(file, prot, flags, capabilities); 12131da177e4SLinus Torvalds 12148feae131SDavid Howells /* we're going to need to record the mapping */ 12158feae131SDavid Howells region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); 12168feae131SDavid Howells if (!region) 12178feae131SDavid Howells goto error_getting_region; 12181da177e4SLinus Torvalds 1219490fc053SLinus Torvalds vma = vm_area_alloc(current->mm); 12208feae131SDavid Howells if (!vma) 12218feae131SDavid Howells goto error_getting_vma; 12221da177e4SLinus Torvalds 12231e2ae599SDavid Howells region->vm_usage = 1; 12248feae131SDavid Howells region->vm_flags = vm_flags; 12258feae131SDavid Howells region->vm_pgoff = pgoff; 12268feae131SDavid Howells 12278feae131SDavid Howells vma->vm_flags = vm_flags; 12288feae131SDavid Howells vma->vm_pgoff = pgoff; 12298feae131SDavid Howells 12308feae131SDavid Howells if (file) { 1231cb0942b8SAl Viro region->vm_file = get_file(file); 1232cb0942b8SAl Viro vma->vm_file = get_file(file); 12338feae131SDavid Howells } 12348feae131SDavid Howells 12358feae131SDavid Howells down_write(&nommu_region_sem); 12368feae131SDavid Howells 12378feae131SDavid Howells /* if we want to share, we need to check for regions created by other 12381da177e4SLinus Torvalds * mmap() calls that overlap with our proposed mapping 12398feae131SDavid Howells * - we can only share with a superset match on most regular files 12401da177e4SLinus Torvalds * - shared mappings on character devices and memory backed files are 12411da177e4SLinus Torvalds * permitted to overlap inexactly as far as we are concerned for in 12421da177e4SLinus Torvalds * these cases, sharing is handled in the driver or filesystem rather 12431da177e4SLinus Torvalds * than here 12441da177e4SLinus Torvalds */ 12451da177e4SLinus Torvalds if (vm_flags & VM_MAYSHARE) { 12468feae131SDavid Howells struct vm_region *pregion; 12478feae131SDavid Howells unsigned long pglen, rpglen, pgend, rpgend, start; 12481da177e4SLinus Torvalds 12498feae131SDavid Howells pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 12508feae131SDavid Howells pgend = pgoff + pglen; 1251165b2392SDavid Howells 12528feae131SDavid Howells for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { 12538feae131SDavid Howells pregion = rb_entry(rb, struct vm_region, vm_rb); 12541da177e4SLinus Torvalds 12558feae131SDavid Howells if (!(pregion->vm_flags & VM_MAYSHARE)) 12561da177e4SLinus Torvalds continue; 12571da177e4SLinus Torvalds 12581da177e4SLinus Torvalds /* search for overlapping mappings on the same file */ 1259496ad9aaSAl Viro if (file_inode(pregion->vm_file) != 1260496ad9aaSAl Viro file_inode(file)) 12611da177e4SLinus Torvalds continue; 12621da177e4SLinus Torvalds 12638feae131SDavid Howells if (pregion->vm_pgoff >= pgend) 12641da177e4SLinus Torvalds continue; 12651da177e4SLinus Torvalds 12668feae131SDavid Howells rpglen = pregion->vm_end - pregion->vm_start; 12678feae131SDavid Howells rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT; 12688feae131SDavid Howells rpgend = pregion->vm_pgoff + rpglen; 12698feae131SDavid Howells if (pgoff >= rpgend) 12701da177e4SLinus Torvalds continue; 12711da177e4SLinus Torvalds 12728feae131SDavid Howells /* handle inexactly overlapping matches between 12738feae131SDavid Howells * mappings */ 12748feae131SDavid Howells if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && 12758feae131SDavid Howells !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { 12768feae131SDavid Howells /* new mapping is not a subset of the region */ 1277b4caecd4SChristoph Hellwig if (!(capabilities & NOMMU_MAP_DIRECT)) 12781da177e4SLinus Torvalds goto sharing_violation; 12791da177e4SLinus Torvalds continue; 12801da177e4SLinus Torvalds } 12811da177e4SLinus Torvalds 12828feae131SDavid Howells /* we've found a region we can share */ 12831e2ae599SDavid Howells pregion->vm_usage++; 12848feae131SDavid Howells vma->vm_region = pregion; 12858feae131SDavid Howells start = pregion->vm_start; 12868feae131SDavid Howells start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; 12878feae131SDavid Howells vma->vm_start = start; 12888feae131SDavid Howells vma->vm_end = start + len; 12891da177e4SLinus Torvalds 129022cc877bSLeon Romanovsky if (pregion->vm_flags & VM_MAPPED_COPY) 12918feae131SDavid Howells vma->vm_flags |= VM_MAPPED_COPY; 129222cc877bSLeon Romanovsky else { 12938feae131SDavid Howells ret = do_mmap_shared_file(vma); 12948feae131SDavid Howells if (ret < 0) { 12958feae131SDavid Howells vma->vm_region = NULL; 12968feae131SDavid Howells vma->vm_start = 0; 12978feae131SDavid Howells vma->vm_end = 0; 12981e2ae599SDavid Howells pregion->vm_usage--; 12998feae131SDavid Howells pregion = NULL; 13008feae131SDavid Howells goto error_just_free; 13011da177e4SLinus Torvalds } 13028feae131SDavid Howells } 13038feae131SDavid Howells fput(region->vm_file); 13048feae131SDavid Howells kmem_cache_free(vm_region_jar, region); 13058feae131SDavid Howells region = pregion; 13068feae131SDavid Howells result = start; 13078feae131SDavid Howells goto share; 13088feae131SDavid Howells } 13091da177e4SLinus Torvalds 13101da177e4SLinus Torvalds /* obtain the address at which to make a shared mapping 13111da177e4SLinus Torvalds * - this is the hook for quasi-memory character devices to 13121da177e4SLinus Torvalds * tell us the location of a shared mapping 13131da177e4SLinus Torvalds */ 1314b4caecd4SChristoph Hellwig if (capabilities & NOMMU_MAP_DIRECT) { 13151da177e4SLinus Torvalds addr = file->f_op->get_unmapped_area(file, addr, len, 13161da177e4SLinus Torvalds pgoff, flags); 1317bb005a59SNamhyung Kim if (IS_ERR_VALUE(addr)) { 13181da177e4SLinus Torvalds ret = addr; 1319bb005a59SNamhyung Kim if (ret != -ENOSYS) 13208feae131SDavid Howells goto error_just_free; 13211da177e4SLinus Torvalds 13221da177e4SLinus Torvalds /* the driver refused to tell us where to site 13231da177e4SLinus Torvalds * the mapping so we'll have to attempt to copy 13241da177e4SLinus Torvalds * it */ 1325bb005a59SNamhyung Kim ret = -ENODEV; 1326b4caecd4SChristoph Hellwig if (!(capabilities & NOMMU_MAP_COPY)) 13278feae131SDavid Howells goto error_just_free; 13281da177e4SLinus Torvalds 1329b4caecd4SChristoph Hellwig capabilities &= ~NOMMU_MAP_DIRECT; 13308feae131SDavid Howells } else { 13318feae131SDavid Howells vma->vm_start = region->vm_start = addr; 13328feae131SDavid Howells vma->vm_end = region->vm_end = addr + len; 13331da177e4SLinus Torvalds } 13341da177e4SLinus Torvalds } 13351da177e4SLinus Torvalds } 13361da177e4SLinus Torvalds 13378feae131SDavid Howells vma->vm_region = region; 13381da177e4SLinus Torvalds 1339645d83c5SDavid Howells /* set up the mapping 1340b4caecd4SChristoph Hellwig * - the region is filled in if NOMMU_MAP_DIRECT is still set 1341645d83c5SDavid Howells */ 13421da177e4SLinus Torvalds if (file && vma->vm_flags & VM_SHARED) 13438feae131SDavid Howells ret = do_mmap_shared_file(vma); 13441da177e4SLinus Torvalds else 1345645d83c5SDavid Howells ret = do_mmap_private(vma, region, len, capabilities); 13461da177e4SLinus Torvalds if (ret < 0) 1347645d83c5SDavid Howells goto error_just_free; 1348645d83c5SDavid Howells add_nommu_region(region); 13498feae131SDavid Howells 1350ea637639SJie Zhang /* clear anonymous mappings that don't ask for uninitialized data */ 1351ea637639SJie Zhang if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) 1352ea637639SJie Zhang memset((void *)region->vm_start, 0, 1353ea637639SJie Zhang region->vm_end - region->vm_start); 1354ea637639SJie Zhang 13551da177e4SLinus Torvalds /* okay... we have a mapping; now we have to register it */ 13568feae131SDavid Howells result = vma->vm_start; 13571da177e4SLinus Torvalds 13581da177e4SLinus Torvalds current->mm->total_vm += len >> PAGE_SHIFT; 13591da177e4SLinus Torvalds 13608feae131SDavid Howells share: 13618feae131SDavid Howells add_vma_to_mm(current->mm, vma); 13621da177e4SLinus Torvalds 1363cfe79c00SMike Frysinger /* we flush the region from the icache only when the first executable 1364cfe79c00SMike Frysinger * mapping of it is made */ 1365cfe79c00SMike Frysinger if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { 1366cfe79c00SMike Frysinger flush_icache_range(region->vm_start, region->vm_end); 1367cfe79c00SMike Frysinger region->vm_icache_flushed = true; 1368cfe79c00SMike Frysinger } 13691da177e4SLinus Torvalds 1370cfe79c00SMike Frysinger up_write(&nommu_region_sem); 13711da177e4SLinus Torvalds 13728feae131SDavid Howells return result; 13731da177e4SLinus Torvalds 13748feae131SDavid Howells error_just_free: 13758feae131SDavid Howells up_write(&nommu_region_sem); 13768feae131SDavid Howells error: 137789a86402SDavid Howells if (region->vm_file) 13788feae131SDavid Howells fput(region->vm_file); 13798feae131SDavid Howells kmem_cache_free(vm_region_jar, region); 138089a86402SDavid Howells if (vma->vm_file) 13818feae131SDavid Howells fput(vma->vm_file); 13823928d4f5SLinus Torvalds vm_area_free(vma); 13831da177e4SLinus Torvalds return ret; 13841da177e4SLinus Torvalds 13851da177e4SLinus Torvalds sharing_violation: 13868feae131SDavid Howells up_write(&nommu_region_sem); 138722cc877bSLeon Romanovsky pr_warn("Attempt to share mismatched mappings\n"); 13888feae131SDavid Howells ret = -EINVAL; 13898feae131SDavid Howells goto error; 13901da177e4SLinus Torvalds 13911da177e4SLinus Torvalds error_getting_vma: 13928feae131SDavid Howells kmem_cache_free(vm_region_jar, region); 139322cc877bSLeon Romanovsky pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", 13941da177e4SLinus Torvalds len, current->pid); 13959af744d7SMichal Hocko show_free_areas(0, NULL); 13961da177e4SLinus Torvalds return -ENOMEM; 13971da177e4SLinus Torvalds 13988feae131SDavid Howells error_getting_region: 139922cc877bSLeon Romanovsky pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", 14001da177e4SLinus Torvalds len, current->pid); 14019af744d7SMichal Hocko show_free_areas(0, NULL); 14021da177e4SLinus Torvalds return -ENOMEM; 14031da177e4SLinus Torvalds } 14046be5ceb0SLinus Torvalds 1405a90f590aSDominik Brodowski unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 1406a90f590aSDominik Brodowski unsigned long prot, unsigned long flags, 1407a90f590aSDominik Brodowski unsigned long fd, unsigned long pgoff) 140866f0dc48SHugh Dickins { 140966f0dc48SHugh Dickins struct file *file = NULL; 141066f0dc48SHugh Dickins unsigned long retval = -EBADF; 141166f0dc48SHugh Dickins 1412120a795dSAl Viro audit_mmap_fd(fd, flags); 141366f0dc48SHugh Dickins if (!(flags & MAP_ANONYMOUS)) { 141466f0dc48SHugh Dickins file = fget(fd); 141566f0dc48SHugh Dickins if (!file) 141666f0dc48SHugh Dickins goto out; 141766f0dc48SHugh Dickins } 141866f0dc48SHugh Dickins 141966f0dc48SHugh Dickins flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); 142066f0dc48SHugh Dickins 1421ad1ed293SGreg Ungerer retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 142266f0dc48SHugh Dickins 142366f0dc48SHugh Dickins if (file) 142466f0dc48SHugh Dickins fput(file); 142566f0dc48SHugh Dickins out: 142666f0dc48SHugh Dickins return retval; 142766f0dc48SHugh Dickins } 142866f0dc48SHugh Dickins 1429a90f590aSDominik Brodowski SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1430a90f590aSDominik Brodowski unsigned long, prot, unsigned long, flags, 1431a90f590aSDominik Brodowski unsigned long, fd, unsigned long, pgoff) 1432a90f590aSDominik Brodowski { 1433a90f590aSDominik Brodowski return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 1434a90f590aSDominik Brodowski } 1435a90f590aSDominik Brodowski 1436a4679373SChristoph Hellwig #ifdef __ARCH_WANT_SYS_OLD_MMAP 1437a4679373SChristoph Hellwig struct mmap_arg_struct { 1438a4679373SChristoph Hellwig unsigned long addr; 1439a4679373SChristoph Hellwig unsigned long len; 1440a4679373SChristoph Hellwig unsigned long prot; 1441a4679373SChristoph Hellwig unsigned long flags; 1442a4679373SChristoph Hellwig unsigned long fd; 1443a4679373SChristoph Hellwig unsigned long offset; 1444a4679373SChristoph Hellwig }; 1445a4679373SChristoph Hellwig 1446a4679373SChristoph Hellwig SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 1447a4679373SChristoph Hellwig { 1448a4679373SChristoph Hellwig struct mmap_arg_struct a; 1449a4679373SChristoph Hellwig 1450a4679373SChristoph Hellwig if (copy_from_user(&a, arg, sizeof(a))) 1451a4679373SChristoph Hellwig return -EFAULT; 14521824cb75SAlexander Kuleshov if (offset_in_page(a.offset)) 1453a4679373SChristoph Hellwig return -EINVAL; 1454a4679373SChristoph Hellwig 1455a90f590aSDominik Brodowski return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 1456a4679373SChristoph Hellwig a.offset >> PAGE_SHIFT); 1457a4679373SChristoph Hellwig } 1458a4679373SChristoph Hellwig #endif /* __ARCH_WANT_SYS_OLD_MMAP */ 1459a4679373SChristoph Hellwig 14601da177e4SLinus Torvalds /* 14618feae131SDavid Howells * split a vma into two pieces at address 'addr', a new vma is allocated either 14628feae131SDavid Howells * for the first part or the tail. 14631da177e4SLinus Torvalds */ 14648feae131SDavid Howells int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 14658feae131SDavid Howells unsigned long addr, int new_below) 14661da177e4SLinus Torvalds { 14678feae131SDavid Howells struct vm_area_struct *new; 14688feae131SDavid Howells struct vm_region *region; 14698feae131SDavid Howells unsigned long npages; 14701da177e4SLinus Torvalds 1471779c1023SDavid Howells /* we're only permitted to split anonymous regions (these should have 1472779c1023SDavid Howells * only a single usage on the region) */ 1473779c1023SDavid Howells if (vma->vm_file) 14748feae131SDavid Howells return -ENOMEM; 14751da177e4SLinus Torvalds 14768feae131SDavid Howells if (mm->map_count >= sysctl_max_map_count) 14778feae131SDavid Howells return -ENOMEM; 14781da177e4SLinus Torvalds 14798feae131SDavid Howells region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); 14808feae131SDavid Howells if (!region) 14818feae131SDavid Howells return -ENOMEM; 14828feae131SDavid Howells 14833928d4f5SLinus Torvalds new = vm_area_dup(vma); 14848feae131SDavid Howells if (!new) { 14858feae131SDavid Howells kmem_cache_free(vm_region_jar, region); 14868feae131SDavid Howells return -ENOMEM; 14871da177e4SLinus Torvalds } 14881da177e4SLinus Torvalds 14898feae131SDavid Howells /* most fields are the same, copy all, and then fixup */ 14908feae131SDavid Howells *region = *vma->vm_region; 14918feae131SDavid Howells new->vm_region = region; 14928feae131SDavid Howells 14938feae131SDavid Howells npages = (addr - vma->vm_start) >> PAGE_SHIFT; 14948feae131SDavid Howells 14958feae131SDavid Howells if (new_below) { 1496dd8632a1SPaul Mundt region->vm_top = region->vm_end = new->vm_end = addr; 14978feae131SDavid Howells } else { 14988feae131SDavid Howells region->vm_start = new->vm_start = addr; 14998feae131SDavid Howells region->vm_pgoff = new->vm_pgoff += npages; 15001da177e4SLinus Torvalds } 15018feae131SDavid Howells 15028feae131SDavid Howells if (new->vm_ops && new->vm_ops->open) 15038feae131SDavid Howells new->vm_ops->open(new); 15048feae131SDavid Howells 15058feae131SDavid Howells delete_vma_from_mm(vma); 15068feae131SDavid Howells down_write(&nommu_region_sem); 15078feae131SDavid Howells delete_nommu_region(vma->vm_region); 15088feae131SDavid Howells if (new_below) { 15098feae131SDavid Howells vma->vm_region->vm_start = vma->vm_start = addr; 15108feae131SDavid Howells vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 15118feae131SDavid Howells } else { 15128feae131SDavid Howells vma->vm_region->vm_end = vma->vm_end = addr; 1513dd8632a1SPaul Mundt vma->vm_region->vm_top = addr; 15148feae131SDavid Howells } 15158feae131SDavid Howells add_nommu_region(vma->vm_region); 15168feae131SDavid Howells add_nommu_region(new->vm_region); 15178feae131SDavid Howells up_write(&nommu_region_sem); 15188feae131SDavid Howells add_vma_to_mm(mm, vma); 15198feae131SDavid Howells add_vma_to_mm(mm, new); 15208feae131SDavid Howells return 0; 15218feae131SDavid Howells } 15228feae131SDavid Howells 15238feae131SDavid Howells /* 15248feae131SDavid Howells * shrink a VMA by removing the specified chunk from either the beginning or 15258feae131SDavid Howells * the end 15268feae131SDavid Howells */ 15278feae131SDavid Howells static int shrink_vma(struct mm_struct *mm, 15288feae131SDavid Howells struct vm_area_struct *vma, 15298feae131SDavid Howells unsigned long from, unsigned long to) 15308feae131SDavid Howells { 15318feae131SDavid Howells struct vm_region *region; 15328feae131SDavid Howells 15338feae131SDavid Howells /* adjust the VMA's pointers, which may reposition it in the MM's tree 15348feae131SDavid Howells * and list */ 15358feae131SDavid Howells delete_vma_from_mm(vma); 15368feae131SDavid Howells if (from > vma->vm_start) 15378feae131SDavid Howells vma->vm_end = from; 15388feae131SDavid Howells else 15398feae131SDavid Howells vma->vm_start = to; 15408feae131SDavid Howells add_vma_to_mm(mm, vma); 15418feae131SDavid Howells 15428feae131SDavid Howells /* cut the backing region down to size */ 15438feae131SDavid Howells region = vma->vm_region; 15441e2ae599SDavid Howells BUG_ON(region->vm_usage != 1); 15458feae131SDavid Howells 15468feae131SDavid Howells down_write(&nommu_region_sem); 15478feae131SDavid Howells delete_nommu_region(region); 1548dd8632a1SPaul Mundt if (from > region->vm_start) { 1549dd8632a1SPaul Mundt to = region->vm_top; 1550dd8632a1SPaul Mundt region->vm_top = region->vm_end = from; 1551dd8632a1SPaul Mundt } else { 15528feae131SDavid Howells region->vm_start = to; 1553dd8632a1SPaul Mundt } 15548feae131SDavid Howells add_nommu_region(region); 15558feae131SDavid Howells up_write(&nommu_region_sem); 15568feae131SDavid Howells 15578feae131SDavid Howells free_page_series(from, to); 15588feae131SDavid Howells return 0; 15591da177e4SLinus Torvalds } 15601da177e4SLinus Torvalds 15613034097aSDavid Howells /* 15623034097aSDavid Howells * release a mapping 15638feae131SDavid Howells * - under NOMMU conditions the chunk to be unmapped must be backed by a single 15648feae131SDavid Howells * VMA, though it need not cover the whole VMA 15653034097aSDavid Howells */ 1566897ab3e0SMike Rapoport int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) 15671da177e4SLinus Torvalds { 15688feae131SDavid Howells struct vm_area_struct *vma; 1569f67d9b15SBob Liu unsigned long end; 15708feae131SDavid Howells int ret; 15711da177e4SLinus Torvalds 1572f67d9b15SBob Liu len = PAGE_ALIGN(len); 15738feae131SDavid Howells if (len == 0) 15741da177e4SLinus Torvalds return -EINVAL; 15751da177e4SLinus Torvalds 1576f67d9b15SBob Liu end = start + len; 1577f67d9b15SBob Liu 15788feae131SDavid Howells /* find the first potentially overlapping VMA */ 15798feae131SDavid Howells vma = find_vma(mm, start); 15808feae131SDavid Howells if (!vma) { 1581ac714904SChoi Gi-yong static int limit; 158233e5d769SDavid Howells if (limit < 5) { 158322cc877bSLeon Romanovsky pr_warn("munmap of memory not mmapped by process %d (%s): 0x%lx-0x%lx\n", 158433e5d769SDavid Howells current->pid, current->comm, 158533e5d769SDavid Howells start, start + len - 1); 158633e5d769SDavid Howells limit++; 158733e5d769SDavid Howells } 15888feae131SDavid Howells return -EINVAL; 15898feae131SDavid Howells } 15901da177e4SLinus Torvalds 15918feae131SDavid Howells /* we're allowed to split an anonymous VMA but not a file-backed one */ 15928feae131SDavid Howells if (vma->vm_file) { 15938feae131SDavid Howells do { 159422cc877bSLeon Romanovsky if (start > vma->vm_start) 15958feae131SDavid Howells return -EINVAL; 15968feae131SDavid Howells if (end == vma->vm_end) 15978feae131SDavid Howells goto erase_whole_vma; 1598d75a310cSNamhyung Kim vma = vma->vm_next; 1599d75a310cSNamhyung Kim } while (vma); 16008feae131SDavid Howells return -EINVAL; 16018feae131SDavid Howells } else { 16028feae131SDavid Howells /* the chunk must be a subset of the VMA found */ 16038feae131SDavid Howells if (start == vma->vm_start && end == vma->vm_end) 16048feae131SDavid Howells goto erase_whole_vma; 160522cc877bSLeon Romanovsky if (start < vma->vm_start || end > vma->vm_end) 16068feae131SDavid Howells return -EINVAL; 16071824cb75SAlexander Kuleshov if (offset_in_page(start)) 16088feae131SDavid Howells return -EINVAL; 16091824cb75SAlexander Kuleshov if (end != vma->vm_end && offset_in_page(end)) 16108feae131SDavid Howells return -EINVAL; 16118feae131SDavid Howells if (start != vma->vm_start && end != vma->vm_end) { 16128feae131SDavid Howells ret = split_vma(mm, vma, start, 1); 161322cc877bSLeon Romanovsky if (ret < 0) 16148feae131SDavid Howells return ret; 16158feae131SDavid Howells } 16168feae131SDavid Howells return shrink_vma(mm, vma, start, end); 16178feae131SDavid Howells } 16181da177e4SLinus Torvalds 16198feae131SDavid Howells erase_whole_vma: 16208feae131SDavid Howells delete_vma_from_mm(vma); 16218feae131SDavid Howells delete_vma(mm, vma); 16221da177e4SLinus Torvalds return 0; 16231da177e4SLinus Torvalds } 1624b5073173SPaul Mundt EXPORT_SYMBOL(do_munmap); 16251da177e4SLinus Torvalds 1626bfce281cSAl Viro int vm_munmap(unsigned long addr, size_t len) 16273034097aSDavid Howells { 1628bfce281cSAl Viro struct mm_struct *mm = current->mm; 16293034097aSDavid Howells int ret; 16303034097aSDavid Howells 16313034097aSDavid Howells down_write(&mm->mmap_sem); 1632897ab3e0SMike Rapoport ret = do_munmap(mm, addr, len, NULL); 16333034097aSDavid Howells up_write(&mm->mmap_sem); 16343034097aSDavid Howells return ret; 16353034097aSDavid Howells } 1636a46ef99dSLinus Torvalds EXPORT_SYMBOL(vm_munmap); 1637a46ef99dSLinus Torvalds 1638a46ef99dSLinus Torvalds SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 1639a46ef99dSLinus Torvalds { 1640bfce281cSAl Viro return vm_munmap(addr, len); 1641a46ef99dSLinus Torvalds } 16423034097aSDavid Howells 16433034097aSDavid Howells /* 16448feae131SDavid Howells * release all the mappings made in a process's VM space 16453034097aSDavid Howells */ 16461da177e4SLinus Torvalds void exit_mmap(struct mm_struct *mm) 16471da177e4SLinus Torvalds { 16488feae131SDavid Howells struct vm_area_struct *vma; 16491da177e4SLinus Torvalds 16508feae131SDavid Howells if (!mm) 16518feae131SDavid Howells return; 16528feae131SDavid Howells 16531da177e4SLinus Torvalds mm->total_vm = 0; 16541da177e4SLinus Torvalds 16558feae131SDavid Howells while ((vma = mm->mmap)) { 16568feae131SDavid Howells mm->mmap = vma->vm_next; 16578feae131SDavid Howells delete_vma_from_mm(vma); 16588feae131SDavid Howells delete_vma(mm, vma); 165904c34961SSteven J. Magnani cond_resched(); 16601da177e4SLinus Torvalds } 16611da177e4SLinus Torvalds } 16621da177e4SLinus Torvalds 16635d22fc25SLinus Torvalds int vm_brk(unsigned long addr, unsigned long len) 16641da177e4SLinus Torvalds { 16651da177e4SLinus Torvalds return -ENOMEM; 16661da177e4SLinus Torvalds } 16671da177e4SLinus Torvalds 16681da177e4SLinus Torvalds /* 16696fa5f80bSDavid Howells * expand (or shrink) an existing mapping, potentially moving it at the same 16706fa5f80bSDavid Howells * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 16711da177e4SLinus Torvalds * 16726fa5f80bSDavid Howells * under NOMMU conditions, we only permit changing a mapping's size, and only 16738feae131SDavid Howells * as long as it stays within the region allocated by do_mmap_private() and the 16748feae131SDavid Howells * block is not shareable 16751da177e4SLinus Torvalds * 16766fa5f80bSDavid Howells * MREMAP_FIXED is not supported under NOMMU conditions 16771da177e4SLinus Torvalds */ 16784b377babSAl Viro static unsigned long do_mremap(unsigned long addr, 16791da177e4SLinus Torvalds unsigned long old_len, unsigned long new_len, 16801da177e4SLinus Torvalds unsigned long flags, unsigned long new_addr) 16811da177e4SLinus Torvalds { 16826fa5f80bSDavid Howells struct vm_area_struct *vma; 16831da177e4SLinus Torvalds 16841da177e4SLinus Torvalds /* insanity checks first */ 1685f67d9b15SBob Liu old_len = PAGE_ALIGN(old_len); 1686f67d9b15SBob Liu new_len = PAGE_ALIGN(new_len); 16878feae131SDavid Howells if (old_len == 0 || new_len == 0) 16881da177e4SLinus Torvalds return (unsigned long) -EINVAL; 16891da177e4SLinus Torvalds 16901824cb75SAlexander Kuleshov if (offset_in_page(addr)) 16918feae131SDavid Howells return -EINVAL; 16928feae131SDavid Howells 16931da177e4SLinus Torvalds if (flags & MREMAP_FIXED && new_addr != addr) 16941da177e4SLinus Torvalds return (unsigned long) -EINVAL; 16951da177e4SLinus Torvalds 16968feae131SDavid Howells vma = find_vma_exact(current->mm, addr, old_len); 16976fa5f80bSDavid Howells if (!vma) 16981da177e4SLinus Torvalds return (unsigned long) -EINVAL; 16991da177e4SLinus Torvalds 17006fa5f80bSDavid Howells if (vma->vm_end != vma->vm_start + old_len) 17011da177e4SLinus Torvalds return (unsigned long) -EFAULT; 17021da177e4SLinus Torvalds 17036fa5f80bSDavid Howells if (vma->vm_flags & VM_MAYSHARE) 17041da177e4SLinus Torvalds return (unsigned long) -EPERM; 17051da177e4SLinus Torvalds 17068feae131SDavid Howells if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start) 17071da177e4SLinus Torvalds return (unsigned long) -ENOMEM; 17081da177e4SLinus Torvalds 17091da177e4SLinus Torvalds /* all checks complete - do it */ 17106fa5f80bSDavid Howells vma->vm_end = vma->vm_start + new_len; 17116fa5f80bSDavid Howells return vma->vm_start; 17126fa5f80bSDavid Howells } 17136fa5f80bSDavid Howells 17146a6160a7SHeiko Carstens SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, 17156a6160a7SHeiko Carstens unsigned long, new_len, unsigned long, flags, 17166a6160a7SHeiko Carstens unsigned long, new_addr) 17176fa5f80bSDavid Howells { 17186fa5f80bSDavid Howells unsigned long ret; 17196fa5f80bSDavid Howells 17206fa5f80bSDavid Howells down_write(¤t->mm->mmap_sem); 17216fa5f80bSDavid Howells ret = do_mremap(addr, old_len, new_len, flags, new_addr); 17226fa5f80bSDavid Howells up_write(¤t->mm->mmap_sem); 17236fa5f80bSDavid Howells return ret; 17241da177e4SLinus Torvalds } 17251da177e4SLinus Torvalds 1726df06b37fSKeith Busch struct page *follow_page(struct vm_area_struct *vma, unsigned long address, 1727df06b37fSKeith Busch unsigned int foll_flags) 17281da177e4SLinus Torvalds { 17291da177e4SLinus Torvalds return NULL; 17301da177e4SLinus Torvalds } 17311da177e4SLinus Torvalds 17328f3b1327SBob Liu int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 17338f3b1327SBob Liu unsigned long pfn, unsigned long size, pgprot_t prot) 17341da177e4SLinus Torvalds { 17358f3b1327SBob Liu if (addr != (pfn << PAGE_SHIFT)) 17368f3b1327SBob Liu return -EINVAL; 17378f3b1327SBob Liu 1738314e51b9SKonstantin Khlebnikov vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; 173966aa2b4bSGreg Ungerer return 0; 17401da177e4SLinus Torvalds } 174122c4af40SLuke Yang EXPORT_SYMBOL(remap_pfn_range); 17421da177e4SLinus Torvalds 17433c0b9de6SLinus Torvalds int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) 17443c0b9de6SLinus Torvalds { 17453c0b9de6SLinus Torvalds unsigned long pfn = start >> PAGE_SHIFT; 17463c0b9de6SLinus Torvalds unsigned long vm_len = vma->vm_end - vma->vm_start; 17473c0b9de6SLinus Torvalds 17483c0b9de6SLinus Torvalds pfn += vma->vm_pgoff; 17493c0b9de6SLinus Torvalds return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); 17503c0b9de6SLinus Torvalds } 17513c0b9de6SLinus Torvalds EXPORT_SYMBOL(vm_iomap_memory); 17523c0b9de6SLinus Torvalds 1753f905bc44SPaul Mundt int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1754f905bc44SPaul Mundt unsigned long pgoff) 1755f905bc44SPaul Mundt { 1756f905bc44SPaul Mundt unsigned int size = vma->vm_end - vma->vm_start; 1757f905bc44SPaul Mundt 1758f905bc44SPaul Mundt if (!(vma->vm_flags & VM_USERMAP)) 1759f905bc44SPaul Mundt return -EINVAL; 1760f905bc44SPaul Mundt 1761f905bc44SPaul Mundt vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1762f905bc44SPaul Mundt vma->vm_end = vma->vm_start + size; 1763f905bc44SPaul Mundt 1764f905bc44SPaul Mundt return 0; 1765f905bc44SPaul Mundt } 1766f905bc44SPaul Mundt EXPORT_SYMBOL(remap_vmalloc_range); 1767f905bc44SPaul Mundt 17681da177e4SLinus Torvalds unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 17691da177e4SLinus Torvalds unsigned long len, unsigned long pgoff, unsigned long flags) 17701da177e4SLinus Torvalds { 17711da177e4SLinus Torvalds return -ENOMEM; 17721da177e4SLinus Torvalds } 17731da177e4SLinus Torvalds 17742bcd6454SSouptick Joarder vm_fault_t filemap_fault(struct vm_fault *vmf) 1775b0e15190SDavid Howells { 1776b0e15190SDavid Howells BUG(); 1777d0217ac0SNick Piggin return 0; 1778b0e15190SDavid Howells } 1779b5073173SPaul Mundt EXPORT_SYMBOL(filemap_fault); 17800ec76a11SDavid Howells 178182b0f8c3SJan Kara void filemap_map_pages(struct vm_fault *vmf, 1782bae473a4SKirill A. Shutemov pgoff_t start_pgoff, pgoff_t end_pgoff) 1783f1820361SKirill A. Shutemov { 1784f1820361SKirill A. Shutemov BUG(); 1785f1820361SKirill A. Shutemov } 1786f1820361SKirill A. Shutemov EXPORT_SYMBOL(filemap_map_pages); 1787f1820361SKirill A. Shutemov 178884d77d3fSEric W. Biederman int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1789442486ecSLorenzo Stoakes unsigned long addr, void *buf, int len, unsigned int gup_flags) 17900ec76a11SDavid Howells { 17910ec76a11SDavid Howells struct vm_area_struct *vma; 1792442486ecSLorenzo Stoakes int write = gup_flags & FOLL_WRITE; 17930ec76a11SDavid Howells 17940ec76a11SDavid Howells down_read(&mm->mmap_sem); 17950ec76a11SDavid Howells 17960ec76a11SDavid Howells /* the access must start within one of the target process's mappings */ 17970159b141SDavid Howells vma = find_vma(mm, addr); 17980159b141SDavid Howells if (vma) { 17990ec76a11SDavid Howells /* don't overrun this mapping */ 18000ec76a11SDavid Howells if (addr + len >= vma->vm_end) 18010ec76a11SDavid Howells len = vma->vm_end - addr; 18020ec76a11SDavid Howells 18030ec76a11SDavid Howells /* only read or write mappings where it is permitted */ 1804d00c7b99SDavid Howells if (write && vma->vm_flags & VM_MAYWRITE) 18057959722bSJie Zhang copy_to_user_page(vma, NULL, addr, 18067959722bSJie Zhang (void *) addr, buf, len); 1807d00c7b99SDavid Howells else if (!write && vma->vm_flags & VM_MAYREAD) 18087959722bSJie Zhang copy_from_user_page(vma, NULL, addr, 18097959722bSJie Zhang buf, (void *) addr, len); 18100ec76a11SDavid Howells else 18110ec76a11SDavid Howells len = 0; 18120ec76a11SDavid Howells } else { 18130ec76a11SDavid Howells len = 0; 18140ec76a11SDavid Howells } 18150ec76a11SDavid Howells 18160ec76a11SDavid Howells up_read(&mm->mmap_sem); 1817f55f199bSMike Frysinger 1818f55f199bSMike Frysinger return len; 1819f55f199bSMike Frysinger } 1820f55f199bSMike Frysinger 1821f55f199bSMike Frysinger /** 1822b7701a5fSMike Rapoport * access_remote_vm - access another process' address space 1823f55f199bSMike Frysinger * @mm: the mm_struct of the target address space 1824f55f199bSMike Frysinger * @addr: start address to access 1825f55f199bSMike Frysinger * @buf: source or destination buffer 1826f55f199bSMike Frysinger * @len: number of bytes to transfer 18276347e8d5SLorenzo Stoakes * @gup_flags: flags modifying lookup behaviour 1828f55f199bSMike Frysinger * 1829f55f199bSMike Frysinger * The caller must hold a reference on @mm. 1830f55f199bSMike Frysinger */ 1831f55f199bSMike Frysinger int access_remote_vm(struct mm_struct *mm, unsigned long addr, 18326347e8d5SLorenzo Stoakes void *buf, int len, unsigned int gup_flags) 1833f55f199bSMike Frysinger { 18346347e8d5SLorenzo Stoakes return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); 1835f55f199bSMike Frysinger } 1836f55f199bSMike Frysinger 1837f55f199bSMike Frysinger /* 1838f55f199bSMike Frysinger * Access another process' address space. 1839f55f199bSMike Frysinger * - source/target buffer must be kernel space 1840f55f199bSMike Frysinger */ 1841f307ab6dSLorenzo Stoakes int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, 1842f307ab6dSLorenzo Stoakes unsigned int gup_flags) 1843f55f199bSMike Frysinger { 1844f55f199bSMike Frysinger struct mm_struct *mm; 1845f55f199bSMike Frysinger 1846f55f199bSMike Frysinger if (addr + len < addr) 1847f55f199bSMike Frysinger return 0; 1848f55f199bSMike Frysinger 1849f55f199bSMike Frysinger mm = get_task_mm(tsk); 1850f55f199bSMike Frysinger if (!mm) 1851f55f199bSMike Frysinger return 0; 1852f55f199bSMike Frysinger 1853f307ab6dSLorenzo Stoakes len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); 1854f55f199bSMike Frysinger 18550ec76a11SDavid Howells mmput(mm); 18560ec76a11SDavid Howells return len; 18570ec76a11SDavid Howells } 1858fcd35857SCatalin Marinas EXPORT_SYMBOL_GPL(access_process_vm); 18597e660872SDavid Howells 18607e660872SDavid Howells /** 18617e660872SDavid Howells * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 18627e660872SDavid Howells * @inode: The inode to check 18637e660872SDavid Howells * @size: The current filesize of the inode 18647e660872SDavid Howells * @newsize: The proposed filesize of the inode 18657e660872SDavid Howells * 18667e660872SDavid Howells * Check the shared mappings on an inode on behalf of a shrinking truncate to 18677e660872SDavid Howells * make sure that that any outstanding VMAs aren't broken and then shrink the 18687e660872SDavid Howells * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't 18697e660872SDavid Howells * automatically grant mappings that are too large. 18707e660872SDavid Howells */ 18717e660872SDavid Howells int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 18727e660872SDavid Howells size_t newsize) 18737e660872SDavid Howells { 18747e660872SDavid Howells struct vm_area_struct *vma; 18757e660872SDavid Howells struct vm_region *region; 18767e660872SDavid Howells pgoff_t low, high; 18777e660872SDavid Howells size_t r_size, r_top; 18787e660872SDavid Howells 18797e660872SDavid Howells low = newsize >> PAGE_SHIFT; 18807e660872SDavid Howells high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 18817e660872SDavid Howells 18827e660872SDavid Howells down_write(&nommu_region_sem); 18831acf2e04SDavidlohr Bueso i_mmap_lock_read(inode->i_mapping); 18847e660872SDavid Howells 18857e660872SDavid Howells /* search for VMAs that fall within the dead zone */ 18866b2dbba8SMichel Lespinasse vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { 18877e660872SDavid Howells /* found one - only interested if it's shared out of the page 18887e660872SDavid Howells * cache */ 18897e660872SDavid Howells if (vma->vm_flags & VM_SHARED) { 18901acf2e04SDavidlohr Bueso i_mmap_unlock_read(inode->i_mapping); 18917e660872SDavid Howells up_write(&nommu_region_sem); 18927e660872SDavid Howells return -ETXTBSY; /* not quite true, but near enough */ 18937e660872SDavid Howells } 18947e660872SDavid Howells } 18957e660872SDavid Howells 18967e660872SDavid Howells /* reduce any regions that overlap the dead zone - if in existence, 18977e660872SDavid Howells * these will be pointed to by VMAs that don't overlap the dead zone 18987e660872SDavid Howells * 18997e660872SDavid Howells * we don't check for any regions that start beyond the EOF as there 19007e660872SDavid Howells * shouldn't be any 19017e660872SDavid Howells */ 19021acf2e04SDavidlohr Bueso vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, 0, ULONG_MAX) { 19037e660872SDavid Howells if (!(vma->vm_flags & VM_SHARED)) 19047e660872SDavid Howells continue; 19057e660872SDavid Howells 19067e660872SDavid Howells region = vma->vm_region; 19077e660872SDavid Howells r_size = region->vm_top - region->vm_start; 19087e660872SDavid Howells r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 19097e660872SDavid Howells 19107e660872SDavid Howells if (r_top > newsize) { 19117e660872SDavid Howells region->vm_top -= r_top - newsize; 19127e660872SDavid Howells if (region->vm_end > region->vm_top) 19137e660872SDavid Howells region->vm_end = region->vm_top; 19147e660872SDavid Howells } 19157e660872SDavid Howells } 19167e660872SDavid Howells 19171acf2e04SDavidlohr Bueso i_mmap_unlock_read(inode->i_mapping); 19187e660872SDavid Howells up_write(&nommu_region_sem); 19197e660872SDavid Howells return 0; 19207e660872SDavid Howells } 1921c9b1d098SAndrew Shewmaker 1922c9b1d098SAndrew Shewmaker /* 1923c9b1d098SAndrew Shewmaker * Initialise sysctl_user_reserve_kbytes. 1924c9b1d098SAndrew Shewmaker * 1925c9b1d098SAndrew Shewmaker * This is intended to prevent a user from starting a single memory hogging 1926c9b1d098SAndrew Shewmaker * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 1927c9b1d098SAndrew Shewmaker * mode. 1928c9b1d098SAndrew Shewmaker * 1929c9b1d098SAndrew Shewmaker * The default value is min(3% of free memory, 128MB) 1930c9b1d098SAndrew Shewmaker * 128MB is enough to recover with sshd/login, bash, and top/kill. 1931c9b1d098SAndrew Shewmaker */ 1932c9b1d098SAndrew Shewmaker static int __meminit init_user_reserve(void) 1933c9b1d098SAndrew Shewmaker { 1934c9b1d098SAndrew Shewmaker unsigned long free_kbytes; 1935c9b1d098SAndrew Shewmaker 1936c41f012aSMichal Hocko free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 1937c9b1d098SAndrew Shewmaker 1938c9b1d098SAndrew Shewmaker sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 1939c9b1d098SAndrew Shewmaker return 0; 1940c9b1d098SAndrew Shewmaker } 1941a4bc6fc7SPaul Gortmaker subsys_initcall(init_user_reserve); 19424eeab4f5SAndrew Shewmaker 19434eeab4f5SAndrew Shewmaker /* 19444eeab4f5SAndrew Shewmaker * Initialise sysctl_admin_reserve_kbytes. 19454eeab4f5SAndrew Shewmaker * 19464eeab4f5SAndrew Shewmaker * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 19474eeab4f5SAndrew Shewmaker * to log in and kill a memory hogging process. 19484eeab4f5SAndrew Shewmaker * 19494eeab4f5SAndrew Shewmaker * Systems with more than 256MB will reserve 8MB, enough to recover 19504eeab4f5SAndrew Shewmaker * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 19514eeab4f5SAndrew Shewmaker * only reserve 3% of free pages by default. 19524eeab4f5SAndrew Shewmaker */ 19534eeab4f5SAndrew Shewmaker static int __meminit init_admin_reserve(void) 19544eeab4f5SAndrew Shewmaker { 19554eeab4f5SAndrew Shewmaker unsigned long free_kbytes; 19564eeab4f5SAndrew Shewmaker 1957c41f012aSMichal Hocko free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 19584eeab4f5SAndrew Shewmaker 19594eeab4f5SAndrew Shewmaker sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 19604eeab4f5SAndrew Shewmaker return 0; 19614eeab4f5SAndrew Shewmaker } 1962a4bc6fc7SPaul Gortmaker subsys_initcall(init_admin_reserve); 1963