11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/nommu.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Replacement code for mm functions to support CPU's that don't 51da177e4SLinus Torvalds * have any form of memory management unit (thus no virtual memory). 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * See Documentation/nommu-mmap.txt 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> 101da177e4SLinus Torvalds * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 111da177e4SLinus Torvalds * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 121da177e4SLinus Torvalds * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 13f905bc44SPaul Mundt * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 16f2b8544fSDavid Howells #include <linux/module.h> 171da177e4SLinus Torvalds #include <linux/mm.h> 181da177e4SLinus Torvalds #include <linux/mman.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/file.h> 211da177e4SLinus Torvalds #include <linux/highmem.h> 221da177e4SLinus Torvalds #include <linux/pagemap.h> 231da177e4SLinus Torvalds #include <linux/slab.h> 241da177e4SLinus Torvalds #include <linux/vmalloc.h> 251da177e4SLinus Torvalds #include <linux/ptrace.h> 261da177e4SLinus Torvalds #include <linux/blkdev.h> 271da177e4SLinus Torvalds #include <linux/backing-dev.h> 281da177e4SLinus Torvalds #include <linux/mount.h> 291da177e4SLinus Torvalds #include <linux/personality.h> 301da177e4SLinus Torvalds #include <linux/security.h> 311da177e4SLinus Torvalds #include <linux/syscalls.h> 321da177e4SLinus Torvalds 331da177e4SLinus Torvalds #include <asm/uaccess.h> 341da177e4SLinus Torvalds #include <asm/tlb.h> 351da177e4SLinus Torvalds #include <asm/tlbflush.h> 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds void *high_memory; 381da177e4SLinus Torvalds struct page *mem_map; 391da177e4SLinus Torvalds unsigned long max_mapnr; 401da177e4SLinus Torvalds unsigned long num_physpages; 411da177e4SLinus Torvalds unsigned long askedalloc, realalloc; 4280119ef5SAlan Cox atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); 431da177e4SLinus Torvalds int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 441da177e4SLinus Torvalds int sysctl_overcommit_ratio = 50; /* default is 50% */ 451da177e4SLinus Torvalds int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 461da177e4SLinus Torvalds int heap_stack_gap = 0; 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds EXPORT_SYMBOL(mem_map); 496a04de6dSWu, Bryan EXPORT_SYMBOL(num_physpages); 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds /* list of shareable VMAs */ 521da177e4SLinus Torvalds struct rb_root nommu_vma_tree = RB_ROOT; 531da177e4SLinus Torvalds DECLARE_RWSEM(nommu_vma_sem); 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds struct vm_operations_struct generic_file_vm_ops = { 561da177e4SLinus Torvalds }; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds /* 591da177e4SLinus Torvalds * Handle all mappings that got truncated by a "truncate()" 601da177e4SLinus Torvalds * system call. 611da177e4SLinus Torvalds * 621da177e4SLinus Torvalds * NOTE! We have to be ready to update the memory sharing 631da177e4SLinus Torvalds * between the file and the memory map for a potential last 641da177e4SLinus Torvalds * incomplete page. Ugly, but necessary. 651da177e4SLinus Torvalds */ 661da177e4SLinus Torvalds int vmtruncate(struct inode *inode, loff_t offset) 671da177e4SLinus Torvalds { 681da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 691da177e4SLinus Torvalds unsigned long limit; 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds if (inode->i_size < offset) 721da177e4SLinus Torvalds goto do_expand; 731da177e4SLinus Torvalds i_size_write(inode, offset); 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds truncate_inode_pages(mapping, offset); 761da177e4SLinus Torvalds goto out_truncate; 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds do_expand: 791da177e4SLinus Torvalds limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 801da177e4SLinus Torvalds if (limit != RLIM_INFINITY && offset > limit) 811da177e4SLinus Torvalds goto out_sig; 821da177e4SLinus Torvalds if (offset > inode->i_sb->s_maxbytes) 831da177e4SLinus Torvalds goto out; 841da177e4SLinus Torvalds i_size_write(inode, offset); 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds out_truncate: 871da177e4SLinus Torvalds if (inode->i_op && inode->i_op->truncate) 881da177e4SLinus Torvalds inode->i_op->truncate(inode); 891da177e4SLinus Torvalds return 0; 901da177e4SLinus Torvalds out_sig: 911da177e4SLinus Torvalds send_sig(SIGXFSZ, current, 0); 921da177e4SLinus Torvalds out: 931da177e4SLinus Torvalds return -EFBIG; 941da177e4SLinus Torvalds } 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds EXPORT_SYMBOL(vmtruncate); 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds /* 991da177e4SLinus Torvalds * Return the total memory allocated for this pointer, not 1001da177e4SLinus Torvalds * just what the caller asked for. 1011da177e4SLinus Torvalds * 1021da177e4SLinus Torvalds * Doesn't have to be accurate, i.e. may have races. 1031da177e4SLinus Torvalds */ 1041da177e4SLinus Torvalds unsigned int kobjsize(const void *objp) 1051da177e4SLinus Torvalds { 1061da177e4SLinus Torvalds struct page *page; 107*6cfd53fcSPaul Mundt int order = 0; 1081da177e4SLinus Torvalds 1094016a139SMichael Hennerich /* 1104016a139SMichael Hennerich * If the object we have should not have ksize performed on it, 1114016a139SMichael Hennerich * return size of 0 1124016a139SMichael Hennerich */ 113*6cfd53fcSPaul Mundt if (!objp) 1141da177e4SLinus Torvalds return 0; 1151da177e4SLinus Torvalds 116*6cfd53fcSPaul Mundt if ((unsigned long)objp >= memory_end) 117*6cfd53fcSPaul Mundt return 0; 118*6cfd53fcSPaul Mundt 119*6cfd53fcSPaul Mundt page = virt_to_head_page(objp); 120*6cfd53fcSPaul Mundt if (!page) 121*6cfd53fcSPaul Mundt return 0; 122*6cfd53fcSPaul Mundt 123*6cfd53fcSPaul Mundt /* 124*6cfd53fcSPaul Mundt * If the allocator sets PageSlab, we know the pointer came from 125*6cfd53fcSPaul Mundt * kmalloc(). 126*6cfd53fcSPaul Mundt */ 1271da177e4SLinus Torvalds if (PageSlab(page)) 1281da177e4SLinus Torvalds return ksize(objp); 1291da177e4SLinus Torvalds 130*6cfd53fcSPaul Mundt /* 131*6cfd53fcSPaul Mundt * The ksize() function is only guaranteed to work for pointers 132*6cfd53fcSPaul Mundt * returned by kmalloc(). So handle arbitrary pointers, that we expect 133*6cfd53fcSPaul Mundt * always to be compound pages, here. 134*6cfd53fcSPaul Mundt */ 135*6cfd53fcSPaul Mundt if (PageCompound(page)) 136*6cfd53fcSPaul Mundt order = compound_order(page); 1371da177e4SLinus Torvalds 138*6cfd53fcSPaul Mundt /* 139*6cfd53fcSPaul Mundt * Finally, handle arbitrary pointers that don't set PageSlab. 140*6cfd53fcSPaul Mundt * Default to 0-order in the case when we're unable to ksize() 141*6cfd53fcSPaul Mundt * the object. 142*6cfd53fcSPaul Mundt */ 143*6cfd53fcSPaul Mundt return PAGE_SIZE << order; 1441da177e4SLinus Torvalds } 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds /* 1477b4d5b8bSDavid Howells * get a list of pages in an address range belonging to the specified process 1487b4d5b8bSDavid Howells * and indicate the VMA that covers each page 1497b4d5b8bSDavid Howells * - this is potentially dodgy as we may end incrementing the page count of a 1507b4d5b8bSDavid Howells * slab page or a secondary page from a compound page 1517b4d5b8bSDavid Howells * - don't permit access to VMAs that don't support it, such as I/O mappings 1521da177e4SLinus Torvalds */ 1531da177e4SLinus Torvalds int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1541da177e4SLinus Torvalds unsigned long start, int len, int write, int force, 1551da177e4SLinus Torvalds struct page **pages, struct vm_area_struct **vmas) 1561da177e4SLinus Torvalds { 157910e46daSSonic Zhang struct vm_area_struct *vma; 1587b4d5b8bSDavid Howells unsigned long vm_flags; 1597b4d5b8bSDavid Howells int i; 1607b4d5b8bSDavid Howells 1617b4d5b8bSDavid Howells /* calculate required read or write permissions. 1627b4d5b8bSDavid Howells * - if 'force' is set, we only require the "MAY" flags. 1637b4d5b8bSDavid Howells */ 1647b4d5b8bSDavid Howells vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 1657b4d5b8bSDavid Howells vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 1661da177e4SLinus Torvalds 1671da177e4SLinus Torvalds for (i = 0; i < len; i++) { 168910e46daSSonic Zhang vma = find_vma(mm, start); 169910e46daSSonic Zhang if (!vma) 1707b4d5b8bSDavid Howells goto finish_or_fault; 1717b4d5b8bSDavid Howells 1727b4d5b8bSDavid Howells /* protect what we can, including chardevs */ 1737b4d5b8bSDavid Howells if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 1747b4d5b8bSDavid Howells !(vm_flags & vma->vm_flags)) 1757b4d5b8bSDavid Howells goto finish_or_fault; 176910e46daSSonic Zhang 1771da177e4SLinus Torvalds if (pages) { 1781da177e4SLinus Torvalds pages[i] = virt_to_page(start); 1791da177e4SLinus Torvalds if (pages[i]) 1801da177e4SLinus Torvalds page_cache_get(pages[i]); 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds if (vmas) 183910e46daSSonic Zhang vmas[i] = vma; 1841da177e4SLinus Torvalds start += PAGE_SIZE; 1851da177e4SLinus Torvalds } 1867b4d5b8bSDavid Howells 1877b4d5b8bSDavid Howells return i; 1887b4d5b8bSDavid Howells 1897b4d5b8bSDavid Howells finish_or_fault: 1907b4d5b8bSDavid Howells return i ? : -EFAULT; 1911da177e4SLinus Torvalds } 19266aa2b4bSGreg Ungerer EXPORT_SYMBOL(get_user_pages); 19366aa2b4bSGreg Ungerer 1941da177e4SLinus Torvalds DEFINE_RWLOCK(vmlist_lock); 1951da177e4SLinus Torvalds struct vm_struct *vmlist; 1961da177e4SLinus Torvalds 197b3bdda02SChristoph Lameter void vfree(const void *addr) 1981da177e4SLinus Torvalds { 1991da177e4SLinus Torvalds kfree(addr); 2001da177e4SLinus Torvalds } 201b5073173SPaul Mundt EXPORT_SYMBOL(vfree); 2021da177e4SLinus Torvalds 203dd0fc66fSAl Viro void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 2041da177e4SLinus Torvalds { 2051da177e4SLinus Torvalds /* 2068518609dSRobert P. J. Day * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 2078518609dSRobert P. J. Day * returns only a logical address. 2081da177e4SLinus Torvalds */ 20984097518SNick Piggin return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 2101da177e4SLinus Torvalds } 211b5073173SPaul Mundt EXPORT_SYMBOL(__vmalloc); 2121da177e4SLinus Torvalds 213f905bc44SPaul Mundt void *vmalloc_user(unsigned long size) 214f905bc44SPaul Mundt { 215f905bc44SPaul Mundt void *ret; 216f905bc44SPaul Mundt 217f905bc44SPaul Mundt ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 218f905bc44SPaul Mundt PAGE_KERNEL); 219f905bc44SPaul Mundt if (ret) { 220f905bc44SPaul Mundt struct vm_area_struct *vma; 221f905bc44SPaul Mundt 222f905bc44SPaul Mundt down_write(¤t->mm->mmap_sem); 223f905bc44SPaul Mundt vma = find_vma(current->mm, (unsigned long)ret); 224f905bc44SPaul Mundt if (vma) 225f905bc44SPaul Mundt vma->vm_flags |= VM_USERMAP; 226f905bc44SPaul Mundt up_write(¤t->mm->mmap_sem); 227f905bc44SPaul Mundt } 228f905bc44SPaul Mundt 229f905bc44SPaul Mundt return ret; 230f905bc44SPaul Mundt } 231f905bc44SPaul Mundt EXPORT_SYMBOL(vmalloc_user); 232f905bc44SPaul Mundt 233b3bdda02SChristoph Lameter struct page *vmalloc_to_page(const void *addr) 2341da177e4SLinus Torvalds { 2351da177e4SLinus Torvalds return virt_to_page(addr); 2361da177e4SLinus Torvalds } 237b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_to_page); 2381da177e4SLinus Torvalds 239b3bdda02SChristoph Lameter unsigned long vmalloc_to_pfn(const void *addr) 2401da177e4SLinus Torvalds { 2411da177e4SLinus Torvalds return page_to_pfn(virt_to_page(addr)); 2421da177e4SLinus Torvalds } 243b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_to_pfn); 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds long vread(char *buf, char *addr, unsigned long count) 2461da177e4SLinus Torvalds { 2471da177e4SLinus Torvalds memcpy(buf, addr, count); 2481da177e4SLinus Torvalds return count; 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds long vwrite(char *buf, char *addr, unsigned long count) 2521da177e4SLinus Torvalds { 2531da177e4SLinus Torvalds /* Don't allow overflow */ 2541da177e4SLinus Torvalds if ((unsigned long) addr + count < count) 2551da177e4SLinus Torvalds count = -(unsigned long) addr; 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds memcpy(addr, buf, count); 2581da177e4SLinus Torvalds return(count); 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds /* 2621da177e4SLinus Torvalds * vmalloc - allocate virtually continguos memory 2631da177e4SLinus Torvalds * 2641da177e4SLinus Torvalds * @size: allocation size 2651da177e4SLinus Torvalds * 2661da177e4SLinus Torvalds * Allocate enough pages to cover @size from the page level 2671da177e4SLinus Torvalds * allocator and map them into continguos kernel virtual space. 2681da177e4SLinus Torvalds * 269c1c8897fSMichael Opdenacker * For tight control over page level allocator and protection flags 2701da177e4SLinus Torvalds * use __vmalloc() instead. 2711da177e4SLinus Torvalds */ 2721da177e4SLinus Torvalds void *vmalloc(unsigned long size) 2731da177e4SLinus Torvalds { 2741da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 2751da177e4SLinus Torvalds } 276f6138882SAndrew Morton EXPORT_SYMBOL(vmalloc); 277f6138882SAndrew Morton 278f6138882SAndrew Morton void *vmalloc_node(unsigned long size, int node) 279f6138882SAndrew Morton { 280f6138882SAndrew Morton return vmalloc(size); 281f6138882SAndrew Morton } 282f6138882SAndrew Morton EXPORT_SYMBOL(vmalloc_node); 2831da177e4SLinus Torvalds 284b5073173SPaul Mundt /** 285b5073173SPaul Mundt * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 2861da177e4SLinus Torvalds * @size: allocation size 2871da177e4SLinus Torvalds * 2881da177e4SLinus Torvalds * Allocate enough 32bit PA addressable pages to cover @size from the 2891da177e4SLinus Torvalds * page level allocator and map them into continguos kernel virtual space. 2901da177e4SLinus Torvalds */ 2911da177e4SLinus Torvalds void *vmalloc_32(unsigned long size) 2921da177e4SLinus Torvalds { 2931da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 2941da177e4SLinus Torvalds } 295b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_32); 296b5073173SPaul Mundt 297b5073173SPaul Mundt /** 298b5073173SPaul Mundt * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 299b5073173SPaul Mundt * @size: allocation size 300b5073173SPaul Mundt * 301b5073173SPaul Mundt * The resulting memory area is 32bit addressable and zeroed so it can be 302b5073173SPaul Mundt * mapped to userspace without leaking data. 303f905bc44SPaul Mundt * 304f905bc44SPaul Mundt * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 305f905bc44SPaul Mundt * remap_vmalloc_range() are permissible. 306b5073173SPaul Mundt */ 307b5073173SPaul Mundt void *vmalloc_32_user(unsigned long size) 308b5073173SPaul Mundt { 309f905bc44SPaul Mundt /* 310f905bc44SPaul Mundt * We'll have to sort out the ZONE_DMA bits for 64-bit, 311f905bc44SPaul Mundt * but for now this can simply use vmalloc_user() directly. 312f905bc44SPaul Mundt */ 313f905bc44SPaul Mundt return vmalloc_user(size); 314b5073173SPaul Mundt } 315b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_32_user); 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 3181da177e4SLinus Torvalds { 3191da177e4SLinus Torvalds BUG(); 3201da177e4SLinus Torvalds return NULL; 3211da177e4SLinus Torvalds } 322b5073173SPaul Mundt EXPORT_SYMBOL(vmap); 3231da177e4SLinus Torvalds 324b3bdda02SChristoph Lameter void vunmap(const void *addr) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds BUG(); 3271da177e4SLinus Torvalds } 328b5073173SPaul Mundt EXPORT_SYMBOL(vunmap); 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds /* 3311eeb66a1SChristoph Hellwig * Implement a stub for vmalloc_sync_all() if the architecture chose not to 3321eeb66a1SChristoph Hellwig * have one. 3331eeb66a1SChristoph Hellwig */ 3341eeb66a1SChristoph Hellwig void __attribute__((weak)) vmalloc_sync_all(void) 3351eeb66a1SChristoph Hellwig { 3361eeb66a1SChristoph Hellwig } 3371eeb66a1SChristoph Hellwig 338b5073173SPaul Mundt int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 339b5073173SPaul Mundt struct page *page) 340b5073173SPaul Mundt { 341b5073173SPaul Mundt return -EINVAL; 342b5073173SPaul Mundt } 343b5073173SPaul Mundt EXPORT_SYMBOL(vm_insert_page); 344b5073173SPaul Mundt 3451eeb66a1SChristoph Hellwig /* 3461da177e4SLinus Torvalds * sys_brk() for the most part doesn't need the global kernel 3471da177e4SLinus Torvalds * lock, except when an application is doing something nasty 3481da177e4SLinus Torvalds * like trying to un-brk an area that has already been mapped 3491da177e4SLinus Torvalds * to a regular file. in this case, the unmapping will need 3501da177e4SLinus Torvalds * to invoke file system routines that need the global lock. 3511da177e4SLinus Torvalds */ 3521da177e4SLinus Torvalds asmlinkage unsigned long sys_brk(unsigned long brk) 3531da177e4SLinus Torvalds { 3541da177e4SLinus Torvalds struct mm_struct *mm = current->mm; 3551da177e4SLinus Torvalds 3561da177e4SLinus Torvalds if (brk < mm->start_brk || brk > mm->context.end_brk) 3571da177e4SLinus Torvalds return mm->brk; 3581da177e4SLinus Torvalds 3591da177e4SLinus Torvalds if (mm->brk == brk) 3601da177e4SLinus Torvalds return mm->brk; 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds /* 3631da177e4SLinus Torvalds * Always allow shrinking brk 3641da177e4SLinus Torvalds */ 3651da177e4SLinus Torvalds if (brk <= mm->brk) { 3661da177e4SLinus Torvalds mm->brk = brk; 3671da177e4SLinus Torvalds return brk; 3681da177e4SLinus Torvalds } 3691da177e4SLinus Torvalds 3701da177e4SLinus Torvalds /* 3711da177e4SLinus Torvalds * Ok, looks good - let it rip. 3721da177e4SLinus Torvalds */ 3731da177e4SLinus Torvalds return mm->brk = brk; 3741da177e4SLinus Torvalds } 3751da177e4SLinus Torvalds 3761da177e4SLinus Torvalds #ifdef DEBUG 3771da177e4SLinus Torvalds static void show_process_blocks(void) 3781da177e4SLinus Torvalds { 3791da177e4SLinus Torvalds struct vm_list_struct *vml; 3801da177e4SLinus Torvalds 3811da177e4SLinus Torvalds printk("Process blocks %d:", current->pid); 3821da177e4SLinus Torvalds 3831da177e4SLinus Torvalds for (vml = ¤t->mm->context.vmlist; vml; vml = vml->next) { 3841da177e4SLinus Torvalds printk(" %p: %p", vml, vml->vma); 3851da177e4SLinus Torvalds if (vml->vma) 3861da177e4SLinus Torvalds printk(" (%d @%lx #%d)", 3871da177e4SLinus Torvalds kobjsize((void *) vml->vma->vm_start), 3881da177e4SLinus Torvalds vml->vma->vm_start, 3891da177e4SLinus Torvalds atomic_read(&vml->vma->vm_usage)); 3901da177e4SLinus Torvalds printk(vml->next ? " ->" : ".\n"); 3911da177e4SLinus Torvalds } 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds #endif /* DEBUG */ 3941da177e4SLinus Torvalds 3953034097aSDavid Howells /* 3963034097aSDavid Howells * add a VMA into a process's mm_struct in the appropriate place in the list 3973034097aSDavid Howells * - should be called with mm->mmap_sem held writelocked 3983034097aSDavid Howells */ 3993034097aSDavid Howells static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) 4003034097aSDavid Howells { 4013034097aSDavid Howells struct vm_list_struct **ppv; 4023034097aSDavid Howells 4033034097aSDavid Howells for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) 4043034097aSDavid Howells if ((*ppv)->vma->vm_start > vml->vma->vm_start) 4053034097aSDavid Howells break; 4063034097aSDavid Howells 4073034097aSDavid Howells vml->next = *ppv; 4083034097aSDavid Howells *ppv = vml; 4093034097aSDavid Howells } 4103034097aSDavid Howells 4113034097aSDavid Howells /* 4123034097aSDavid Howells * look up the first VMA in which addr resides, NULL if none 4133034097aSDavid Howells * - should be called with mm->mmap_sem at least held readlocked 4143034097aSDavid Howells */ 4153034097aSDavid Howells struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 4163034097aSDavid Howells { 4173034097aSDavid Howells struct vm_list_struct *loop, *vml; 4183034097aSDavid Howells 4193034097aSDavid Howells /* search the vm_start ordered list */ 4203034097aSDavid Howells vml = NULL; 4213034097aSDavid Howells for (loop = mm->context.vmlist; loop; loop = loop->next) { 4223034097aSDavid Howells if (loop->vma->vm_start > addr) 4233034097aSDavid Howells break; 4243034097aSDavid Howells vml = loop; 4253034097aSDavid Howells } 4263034097aSDavid Howells 4273034097aSDavid Howells if (vml && vml->vma->vm_end > addr) 4283034097aSDavid Howells return vml->vma; 4293034097aSDavid Howells 4303034097aSDavid Howells return NULL; 4313034097aSDavid Howells } 4323034097aSDavid Howells EXPORT_SYMBOL(find_vma); 4333034097aSDavid Howells 4343034097aSDavid Howells /* 435930e652aSDavid Howells * find a VMA 436930e652aSDavid Howells * - we don't extend stack VMAs under NOMMU conditions 437930e652aSDavid Howells */ 438930e652aSDavid Howells struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 439930e652aSDavid Howells { 440930e652aSDavid Howells return find_vma(mm, addr); 441930e652aSDavid Howells } 442930e652aSDavid Howells 44357c8f63eSGreg Ungerer int expand_stack(struct vm_area_struct *vma, unsigned long address) 44457c8f63eSGreg Ungerer { 44557c8f63eSGreg Ungerer return -ENOMEM; 44657c8f63eSGreg Ungerer } 44757c8f63eSGreg Ungerer 448930e652aSDavid Howells /* 4496fa5f80bSDavid Howells * look up the first VMA exactly that exactly matches addr 4506fa5f80bSDavid Howells * - should be called with mm->mmap_sem at least held readlocked 4516fa5f80bSDavid Howells */ 4526fa5f80bSDavid Howells static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 4536fa5f80bSDavid Howells unsigned long addr) 4546fa5f80bSDavid Howells { 4556fa5f80bSDavid Howells struct vm_list_struct *vml; 4566fa5f80bSDavid Howells 4576fa5f80bSDavid Howells /* search the vm_start ordered list */ 4586fa5f80bSDavid Howells for (vml = mm->context.vmlist; vml; vml = vml->next) { 4596fa5f80bSDavid Howells if (vml->vma->vm_start == addr) 4606fa5f80bSDavid Howells return vml->vma; 4616fa5f80bSDavid Howells if (vml->vma->vm_start > addr) 4626fa5f80bSDavid Howells break; 4636fa5f80bSDavid Howells } 4646fa5f80bSDavid Howells 4656fa5f80bSDavid Howells return NULL; 4666fa5f80bSDavid Howells } 4676fa5f80bSDavid Howells 4686fa5f80bSDavid Howells /* 4693034097aSDavid Howells * find a VMA in the global tree 4703034097aSDavid Howells */ 4711da177e4SLinus Torvalds static inline struct vm_area_struct *find_nommu_vma(unsigned long start) 4721da177e4SLinus Torvalds { 4731da177e4SLinus Torvalds struct vm_area_struct *vma; 4741da177e4SLinus Torvalds struct rb_node *n = nommu_vma_tree.rb_node; 4751da177e4SLinus Torvalds 4761da177e4SLinus Torvalds while (n) { 4771da177e4SLinus Torvalds vma = rb_entry(n, struct vm_area_struct, vm_rb); 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds if (start < vma->vm_start) 4801da177e4SLinus Torvalds n = n->rb_left; 4811da177e4SLinus Torvalds else if (start > vma->vm_start) 4821da177e4SLinus Torvalds n = n->rb_right; 4831da177e4SLinus Torvalds else 4841da177e4SLinus Torvalds return vma; 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds 4871da177e4SLinus Torvalds return NULL; 4881da177e4SLinus Torvalds } 4891da177e4SLinus Torvalds 4903034097aSDavid Howells /* 4913034097aSDavid Howells * add a VMA in the global tree 4923034097aSDavid Howells */ 4931da177e4SLinus Torvalds static void add_nommu_vma(struct vm_area_struct *vma) 4941da177e4SLinus Torvalds { 4951da177e4SLinus Torvalds struct vm_area_struct *pvma; 4961da177e4SLinus Torvalds struct address_space *mapping; 4971da177e4SLinus Torvalds struct rb_node **p = &nommu_vma_tree.rb_node; 4981da177e4SLinus Torvalds struct rb_node *parent = NULL; 4991da177e4SLinus Torvalds 5001da177e4SLinus Torvalds /* add the VMA to the mapping */ 5011da177e4SLinus Torvalds if (vma->vm_file) { 5021da177e4SLinus Torvalds mapping = vma->vm_file->f_mapping; 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds flush_dcache_mmap_lock(mapping); 5051da177e4SLinus Torvalds vma_prio_tree_insert(vma, &mapping->i_mmap); 5061da177e4SLinus Torvalds flush_dcache_mmap_unlock(mapping); 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds 5091da177e4SLinus Torvalds /* add the VMA to the master list */ 5101da177e4SLinus Torvalds while (*p) { 5111da177e4SLinus Torvalds parent = *p; 5121da177e4SLinus Torvalds pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 5131da177e4SLinus Torvalds 5141da177e4SLinus Torvalds if (vma->vm_start < pvma->vm_start) { 5151da177e4SLinus Torvalds p = &(*p)->rb_left; 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds else if (vma->vm_start > pvma->vm_start) { 5181da177e4SLinus Torvalds p = &(*p)->rb_right; 5191da177e4SLinus Torvalds } 5201da177e4SLinus Torvalds else { 5211da177e4SLinus Torvalds /* mappings are at the same address - this can only 5221da177e4SLinus Torvalds * happen for shared-mem chardevs and shared file 5231da177e4SLinus Torvalds * mappings backed by ramfs/tmpfs */ 5241da177e4SLinus Torvalds BUG_ON(!(pvma->vm_flags & VM_SHARED)); 5251da177e4SLinus Torvalds 5261da177e4SLinus Torvalds if (vma < pvma) 5271da177e4SLinus Torvalds p = &(*p)->rb_left; 5281da177e4SLinus Torvalds else if (vma > pvma) 5291da177e4SLinus Torvalds p = &(*p)->rb_right; 5301da177e4SLinus Torvalds else 5311da177e4SLinus Torvalds BUG(); 5321da177e4SLinus Torvalds } 5331da177e4SLinus Torvalds } 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds rb_link_node(&vma->vm_rb, parent, p); 5361da177e4SLinus Torvalds rb_insert_color(&vma->vm_rb, &nommu_vma_tree); 5371da177e4SLinus Torvalds } 5381da177e4SLinus Torvalds 5393034097aSDavid Howells /* 5403034097aSDavid Howells * delete a VMA from the global list 5413034097aSDavid Howells */ 5421da177e4SLinus Torvalds static void delete_nommu_vma(struct vm_area_struct *vma) 5431da177e4SLinus Torvalds { 5441da177e4SLinus Torvalds struct address_space *mapping; 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds /* remove the VMA from the mapping */ 5471da177e4SLinus Torvalds if (vma->vm_file) { 5481da177e4SLinus Torvalds mapping = vma->vm_file->f_mapping; 5491da177e4SLinus Torvalds 5501da177e4SLinus Torvalds flush_dcache_mmap_lock(mapping); 5511da177e4SLinus Torvalds vma_prio_tree_remove(vma, &mapping->i_mmap); 5521da177e4SLinus Torvalds flush_dcache_mmap_unlock(mapping); 5531da177e4SLinus Torvalds } 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds /* remove from the master list */ 5561da177e4SLinus Torvalds rb_erase(&vma->vm_rb, &nommu_vma_tree); 5571da177e4SLinus Torvalds } 5581da177e4SLinus Torvalds 5591da177e4SLinus Torvalds /* 5601da177e4SLinus Torvalds * determine whether a mapping should be permitted and, if so, what sort of 5611da177e4SLinus Torvalds * mapping we're capable of supporting 5621da177e4SLinus Torvalds */ 5631da177e4SLinus Torvalds static int validate_mmap_request(struct file *file, 5641da177e4SLinus Torvalds unsigned long addr, 5651da177e4SLinus Torvalds unsigned long len, 5661da177e4SLinus Torvalds unsigned long prot, 5671da177e4SLinus Torvalds unsigned long flags, 5681da177e4SLinus Torvalds unsigned long pgoff, 5691da177e4SLinus Torvalds unsigned long *_capabilities) 5701da177e4SLinus Torvalds { 5711da177e4SLinus Torvalds unsigned long capabilities; 5721da177e4SLinus Torvalds unsigned long reqprot = prot; 5731da177e4SLinus Torvalds int ret; 5741da177e4SLinus Torvalds 5751da177e4SLinus Torvalds /* do the simple checks first */ 5761da177e4SLinus Torvalds if (flags & MAP_FIXED || addr) { 5771da177e4SLinus Torvalds printk(KERN_DEBUG 5781da177e4SLinus Torvalds "%d: Can't do fixed-address/overlay mmap of RAM\n", 5791da177e4SLinus Torvalds current->pid); 5801da177e4SLinus Torvalds return -EINVAL; 5811da177e4SLinus Torvalds } 5821da177e4SLinus Torvalds 5831da177e4SLinus Torvalds if ((flags & MAP_TYPE) != MAP_PRIVATE && 5841da177e4SLinus Torvalds (flags & MAP_TYPE) != MAP_SHARED) 5851da177e4SLinus Torvalds return -EINVAL; 5861da177e4SLinus Torvalds 587f81cff0dSMike Frysinger if (!len) 5881da177e4SLinus Torvalds return -EINVAL; 5891da177e4SLinus Torvalds 590f81cff0dSMike Frysinger /* Careful about overflows.. */ 591f81cff0dSMike Frysinger len = PAGE_ALIGN(len); 592f81cff0dSMike Frysinger if (!len || len > TASK_SIZE) 593f81cff0dSMike Frysinger return -ENOMEM; 594f81cff0dSMike Frysinger 5951da177e4SLinus Torvalds /* offset overflow? */ 5961da177e4SLinus Torvalds if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 597f81cff0dSMike Frysinger return -EOVERFLOW; 5981da177e4SLinus Torvalds 5991da177e4SLinus Torvalds if (file) { 6001da177e4SLinus Torvalds /* validate file mapping requests */ 6011da177e4SLinus Torvalds struct address_space *mapping; 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds /* files must support mmap */ 6041da177e4SLinus Torvalds if (!file->f_op || !file->f_op->mmap) 6051da177e4SLinus Torvalds return -ENODEV; 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds /* work out if what we've got could possibly be shared 6081da177e4SLinus Torvalds * - we support chardevs that provide their own "memory" 6091da177e4SLinus Torvalds * - we support files/blockdevs that are memory backed 6101da177e4SLinus Torvalds */ 6111da177e4SLinus Torvalds mapping = file->f_mapping; 6121da177e4SLinus Torvalds if (!mapping) 613e9536ae7SJosef Sipek mapping = file->f_path.dentry->d_inode->i_mapping; 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds capabilities = 0; 6161da177e4SLinus Torvalds if (mapping && mapping->backing_dev_info) 6171da177e4SLinus Torvalds capabilities = mapping->backing_dev_info->capabilities; 6181da177e4SLinus Torvalds 6191da177e4SLinus Torvalds if (!capabilities) { 6201da177e4SLinus Torvalds /* no explicit capabilities set, so assume some 6211da177e4SLinus Torvalds * defaults */ 622e9536ae7SJosef Sipek switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { 6231da177e4SLinus Torvalds case S_IFREG: 6241da177e4SLinus Torvalds case S_IFBLK: 6251da177e4SLinus Torvalds capabilities = BDI_CAP_MAP_COPY; 6261da177e4SLinus Torvalds break; 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds case S_IFCHR: 6291da177e4SLinus Torvalds capabilities = 6301da177e4SLinus Torvalds BDI_CAP_MAP_DIRECT | 6311da177e4SLinus Torvalds BDI_CAP_READ_MAP | 6321da177e4SLinus Torvalds BDI_CAP_WRITE_MAP; 6331da177e4SLinus Torvalds break; 6341da177e4SLinus Torvalds 6351da177e4SLinus Torvalds default: 6361da177e4SLinus Torvalds return -EINVAL; 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds 6401da177e4SLinus Torvalds /* eliminate any capabilities that we can't support on this 6411da177e4SLinus Torvalds * device */ 6421da177e4SLinus Torvalds if (!file->f_op->get_unmapped_area) 6431da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 6441da177e4SLinus Torvalds if (!file->f_op->read) 6451da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_COPY; 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds if (flags & MAP_SHARED) { 6481da177e4SLinus Torvalds /* do checks for writing, appending and locking */ 6491da177e4SLinus Torvalds if ((prot & PROT_WRITE) && 6501da177e4SLinus Torvalds !(file->f_mode & FMODE_WRITE)) 6511da177e4SLinus Torvalds return -EACCES; 6521da177e4SLinus Torvalds 653e9536ae7SJosef Sipek if (IS_APPEND(file->f_path.dentry->d_inode) && 6541da177e4SLinus Torvalds (file->f_mode & FMODE_WRITE)) 6551da177e4SLinus Torvalds return -EACCES; 6561da177e4SLinus Torvalds 657e9536ae7SJosef Sipek if (locks_verify_locked(file->f_path.dentry->d_inode)) 6581da177e4SLinus Torvalds return -EAGAIN; 6591da177e4SLinus Torvalds 6601da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_DIRECT)) 6611da177e4SLinus Torvalds return -ENODEV; 6621da177e4SLinus Torvalds 6631da177e4SLinus Torvalds if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || 6641da177e4SLinus Torvalds ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || 6651da177e4SLinus Torvalds ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) 6661da177e4SLinus Torvalds ) { 6671da177e4SLinus Torvalds printk("MAP_SHARED not completely supported on !MMU\n"); 6681da177e4SLinus Torvalds return -EINVAL; 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 6711da177e4SLinus Torvalds /* we mustn't privatise shared mappings */ 6721da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_COPY; 6731da177e4SLinus Torvalds } 6741da177e4SLinus Torvalds else { 6751da177e4SLinus Torvalds /* we're going to read the file into private memory we 6761da177e4SLinus Torvalds * allocate */ 6771da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_COPY)) 6781da177e4SLinus Torvalds return -ENODEV; 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds /* we don't permit a private writable mapping to be 6811da177e4SLinus Torvalds * shared with the backing device */ 6821da177e4SLinus Torvalds if (prot & PROT_WRITE) 6831da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 6841da177e4SLinus Torvalds } 6851da177e4SLinus Torvalds 6861da177e4SLinus Torvalds /* handle executable mappings and implied executable 6871da177e4SLinus Torvalds * mappings */ 688e9536ae7SJosef Sipek if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 6891da177e4SLinus Torvalds if (prot & PROT_EXEC) 6901da177e4SLinus Torvalds return -EPERM; 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 6931da177e4SLinus Torvalds /* handle implication of PROT_EXEC by PROT_READ */ 6941da177e4SLinus Torvalds if (current->personality & READ_IMPLIES_EXEC) { 6951da177e4SLinus Torvalds if (capabilities & BDI_CAP_EXEC_MAP) 6961da177e4SLinus Torvalds prot |= PROT_EXEC; 6971da177e4SLinus Torvalds } 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds else if ((prot & PROT_READ) && 7001da177e4SLinus Torvalds (prot & PROT_EXEC) && 7011da177e4SLinus Torvalds !(capabilities & BDI_CAP_EXEC_MAP) 7021da177e4SLinus Torvalds ) { 7031da177e4SLinus Torvalds /* backing file is not executable, try to copy */ 7041da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 7051da177e4SLinus Torvalds } 7061da177e4SLinus Torvalds } 7071da177e4SLinus Torvalds else { 7081da177e4SLinus Torvalds /* anonymous mappings are always memory backed and can be 7091da177e4SLinus Torvalds * privately mapped 7101da177e4SLinus Torvalds */ 7111da177e4SLinus Torvalds capabilities = BDI_CAP_MAP_COPY; 7121da177e4SLinus Torvalds 7131da177e4SLinus Torvalds /* handle PROT_EXEC implication by PROT_READ */ 7141da177e4SLinus Torvalds if ((prot & PROT_READ) && 7151da177e4SLinus Torvalds (current->personality & READ_IMPLIES_EXEC)) 7161da177e4SLinus Torvalds prot |= PROT_EXEC; 7171da177e4SLinus Torvalds } 7181da177e4SLinus Torvalds 7191da177e4SLinus Torvalds /* allow the security API to have its say */ 720ed032189SEric Paris ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); 7211da177e4SLinus Torvalds if (ret < 0) 7221da177e4SLinus Torvalds return ret; 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds /* looks okay */ 7251da177e4SLinus Torvalds *_capabilities = capabilities; 7261da177e4SLinus Torvalds return 0; 7271da177e4SLinus Torvalds } 7281da177e4SLinus Torvalds 7291da177e4SLinus Torvalds /* 7301da177e4SLinus Torvalds * we've determined that we can make the mapping, now translate what we 7311da177e4SLinus Torvalds * now know into VMA flags 7321da177e4SLinus Torvalds */ 7331da177e4SLinus Torvalds static unsigned long determine_vm_flags(struct file *file, 7341da177e4SLinus Torvalds unsigned long prot, 7351da177e4SLinus Torvalds unsigned long flags, 7361da177e4SLinus Torvalds unsigned long capabilities) 7371da177e4SLinus Torvalds { 7381da177e4SLinus Torvalds unsigned long vm_flags; 7391da177e4SLinus Torvalds 7401da177e4SLinus Torvalds vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); 7411da177e4SLinus Torvalds vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 7421da177e4SLinus Torvalds /* vm_flags |= mm->def_flags; */ 7431da177e4SLinus Torvalds 7441da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_DIRECT)) { 7451da177e4SLinus Torvalds /* attempt to share read-only copies of mapped file chunks */ 7461da177e4SLinus Torvalds if (file && !(prot & PROT_WRITE)) 7471da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE; 7481da177e4SLinus Torvalds } 7491da177e4SLinus Torvalds else { 7501da177e4SLinus Torvalds /* overlay a shareable mapping on the backing device or inode 7511da177e4SLinus Torvalds * if possible - used for chardevs, ramfs/tmpfs/shmfs and 7521da177e4SLinus Torvalds * romfs/cramfs */ 7531da177e4SLinus Torvalds if (flags & MAP_SHARED) 7541da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE | VM_SHARED; 7551da177e4SLinus Torvalds else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0) 7561da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE; 7571da177e4SLinus Torvalds } 7581da177e4SLinus Torvalds 7591da177e4SLinus Torvalds /* refuse to let anyone share private mappings with this process if 7601da177e4SLinus Torvalds * it's being traced - otherwise breakpoints set in it may interfere 7611da177e4SLinus Torvalds * with another untraced process 7621da177e4SLinus Torvalds */ 7631da177e4SLinus Torvalds if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED)) 7641da177e4SLinus Torvalds vm_flags &= ~VM_MAYSHARE; 7651da177e4SLinus Torvalds 7661da177e4SLinus Torvalds return vm_flags; 7671da177e4SLinus Torvalds } 7681da177e4SLinus Torvalds 7691da177e4SLinus Torvalds /* 7701da177e4SLinus Torvalds * set up a shared mapping on a file 7711da177e4SLinus Torvalds */ 7721da177e4SLinus Torvalds static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) 7731da177e4SLinus Torvalds { 7741da177e4SLinus Torvalds int ret; 7751da177e4SLinus Torvalds 7761da177e4SLinus Torvalds ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 7771da177e4SLinus Torvalds if (ret != -ENOSYS) 7781da177e4SLinus Torvalds return ret; 7791da177e4SLinus Torvalds 7801da177e4SLinus Torvalds /* getting an ENOSYS error indicates that direct mmap isn't 7811da177e4SLinus Torvalds * possible (as opposed to tried but failed) so we'll fall 7821da177e4SLinus Torvalds * through to making a private copy of the data and mapping 7831da177e4SLinus Torvalds * that if we can */ 7841da177e4SLinus Torvalds return -ENODEV; 7851da177e4SLinus Torvalds } 7861da177e4SLinus Torvalds 7871da177e4SLinus Torvalds /* 7881da177e4SLinus Torvalds * set up a private mapping or an anonymous shared mapping 7891da177e4SLinus Torvalds */ 7901da177e4SLinus Torvalds static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) 7911da177e4SLinus Torvalds { 7921da177e4SLinus Torvalds void *base; 7931da177e4SLinus Torvalds int ret; 7941da177e4SLinus Torvalds 7951da177e4SLinus Torvalds /* invoke the file's mapping function so that it can keep track of 7961da177e4SLinus Torvalds * shared mappings on devices or memory 7971da177e4SLinus Torvalds * - VM_MAYSHARE will be set if it may attempt to share 7981da177e4SLinus Torvalds */ 7991da177e4SLinus Torvalds if (vma->vm_file) { 8001da177e4SLinus Torvalds ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 8011da177e4SLinus Torvalds if (ret != -ENOSYS) { 8021da177e4SLinus Torvalds /* shouldn't return success if we're not sharing */ 8031da177e4SLinus Torvalds BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); 8041da177e4SLinus Torvalds return ret; /* success or a real error */ 8051da177e4SLinus Torvalds } 8061da177e4SLinus Torvalds 8071da177e4SLinus Torvalds /* getting an ENOSYS error indicates that direct mmap isn't 8081da177e4SLinus Torvalds * possible (as opposed to tried but failed) so we'll try to 8091da177e4SLinus Torvalds * make a private copy of the data and map that instead */ 8101da177e4SLinus Torvalds } 8111da177e4SLinus Torvalds 8121da177e4SLinus Torvalds /* allocate some memory to hold the mapping 8131da177e4SLinus Torvalds * - note that this may not return a page-aligned address if the object 8141da177e4SLinus Torvalds * we're allocating is smaller than a page 8151da177e4SLinus Torvalds */ 81684097518SNick Piggin base = kmalloc(len, GFP_KERNEL|__GFP_COMP); 8171da177e4SLinus Torvalds if (!base) 8181da177e4SLinus Torvalds goto enomem; 8191da177e4SLinus Torvalds 8201da177e4SLinus Torvalds vma->vm_start = (unsigned long) base; 8211da177e4SLinus Torvalds vma->vm_end = vma->vm_start + len; 8221da177e4SLinus Torvalds vma->vm_flags |= VM_MAPPED_COPY; 8231da177e4SLinus Torvalds 8241da177e4SLinus Torvalds #ifdef WARN_ON_SLACK 8251da177e4SLinus Torvalds if (len + WARN_ON_SLACK <= kobjsize(result)) 8261da177e4SLinus Torvalds printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", 8271da177e4SLinus Torvalds len, current->pid, kobjsize(result) - len); 8281da177e4SLinus Torvalds #endif 8291da177e4SLinus Torvalds 8301da177e4SLinus Torvalds if (vma->vm_file) { 8311da177e4SLinus Torvalds /* read the contents of a file into the copy */ 8321da177e4SLinus Torvalds mm_segment_t old_fs; 8331da177e4SLinus Torvalds loff_t fpos; 8341da177e4SLinus Torvalds 8351da177e4SLinus Torvalds fpos = vma->vm_pgoff; 8361da177e4SLinus Torvalds fpos <<= PAGE_SHIFT; 8371da177e4SLinus Torvalds 8381da177e4SLinus Torvalds old_fs = get_fs(); 8391da177e4SLinus Torvalds set_fs(KERNEL_DS); 8401da177e4SLinus Torvalds ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); 8411da177e4SLinus Torvalds set_fs(old_fs); 8421da177e4SLinus Torvalds 8431da177e4SLinus Torvalds if (ret < 0) 8441da177e4SLinus Torvalds goto error_free; 8451da177e4SLinus Torvalds 8461da177e4SLinus Torvalds /* clear the last little bit */ 8471da177e4SLinus Torvalds if (ret < len) 8481da177e4SLinus Torvalds memset(base + ret, 0, len - ret); 8491da177e4SLinus Torvalds 8501da177e4SLinus Torvalds } else { 8511da177e4SLinus Torvalds /* if it's an anonymous mapping, then just clear it */ 8521da177e4SLinus Torvalds memset(base, 0, len); 8531da177e4SLinus Torvalds } 8541da177e4SLinus Torvalds 8551da177e4SLinus Torvalds return 0; 8561da177e4SLinus Torvalds 8571da177e4SLinus Torvalds error_free: 8581da177e4SLinus Torvalds kfree(base); 8591da177e4SLinus Torvalds vma->vm_start = 0; 8601da177e4SLinus Torvalds return ret; 8611da177e4SLinus Torvalds 8621da177e4SLinus Torvalds enomem: 8631da177e4SLinus Torvalds printk("Allocation of length %lu from process %d failed\n", 8641da177e4SLinus Torvalds len, current->pid); 8651da177e4SLinus Torvalds show_free_areas(); 8661da177e4SLinus Torvalds return -ENOMEM; 8671da177e4SLinus Torvalds } 8681da177e4SLinus Torvalds 8691da177e4SLinus Torvalds /* 8701da177e4SLinus Torvalds * handle mapping creation for uClinux 8711da177e4SLinus Torvalds */ 8721da177e4SLinus Torvalds unsigned long do_mmap_pgoff(struct file *file, 8731da177e4SLinus Torvalds unsigned long addr, 8741da177e4SLinus Torvalds unsigned long len, 8751da177e4SLinus Torvalds unsigned long prot, 8761da177e4SLinus Torvalds unsigned long flags, 8771da177e4SLinus Torvalds unsigned long pgoff) 8781da177e4SLinus Torvalds { 8791da177e4SLinus Torvalds struct vm_list_struct *vml = NULL; 8801da177e4SLinus Torvalds struct vm_area_struct *vma = NULL; 8811da177e4SLinus Torvalds struct rb_node *rb; 8821da177e4SLinus Torvalds unsigned long capabilities, vm_flags; 8831da177e4SLinus Torvalds void *result; 8841da177e4SLinus Torvalds int ret; 8851da177e4SLinus Torvalds 8867cd94146SEric Paris if (!(flags & MAP_FIXED)) 8877cd94146SEric Paris addr = round_hint_to_min(addr); 8887cd94146SEric Paris 8891da177e4SLinus Torvalds /* decide whether we should attempt the mapping, and if so what sort of 8901da177e4SLinus Torvalds * mapping */ 8911da177e4SLinus Torvalds ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 8921da177e4SLinus Torvalds &capabilities); 8931da177e4SLinus Torvalds if (ret < 0) 8941da177e4SLinus Torvalds return ret; 8951da177e4SLinus Torvalds 8961da177e4SLinus Torvalds /* we've determined that we can make the mapping, now translate what we 8971da177e4SLinus Torvalds * now know into VMA flags */ 8981da177e4SLinus Torvalds vm_flags = determine_vm_flags(file, prot, flags, capabilities); 8991da177e4SLinus Torvalds 9001da177e4SLinus Torvalds /* we're going to need to record the mapping if it works */ 9014668edc3SBurman Yan vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); 9021da177e4SLinus Torvalds if (!vml) 9031da177e4SLinus Torvalds goto error_getting_vml; 9041da177e4SLinus Torvalds 9051da177e4SLinus Torvalds down_write(&nommu_vma_sem); 9061da177e4SLinus Torvalds 9071da177e4SLinus Torvalds /* if we want to share, we need to check for VMAs created by other 9081da177e4SLinus Torvalds * mmap() calls that overlap with our proposed mapping 9091da177e4SLinus Torvalds * - we can only share with an exact match on most regular files 9101da177e4SLinus Torvalds * - shared mappings on character devices and memory backed files are 9111da177e4SLinus Torvalds * permitted to overlap inexactly as far as we are concerned for in 9121da177e4SLinus Torvalds * these cases, sharing is handled in the driver or filesystem rather 9131da177e4SLinus Torvalds * than here 9141da177e4SLinus Torvalds */ 9151da177e4SLinus Torvalds if (vm_flags & VM_MAYSHARE) { 9161da177e4SLinus Torvalds unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 9171da177e4SLinus Torvalds unsigned long vmpglen; 9181da177e4SLinus Torvalds 919165b2392SDavid Howells /* suppress VMA sharing for shared regions */ 920165b2392SDavid Howells if (vm_flags & VM_SHARED && 921165b2392SDavid Howells capabilities & BDI_CAP_MAP_DIRECT) 922165b2392SDavid Howells goto dont_share_VMAs; 923165b2392SDavid Howells 9241da177e4SLinus Torvalds for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 9251da177e4SLinus Torvalds vma = rb_entry(rb, struct vm_area_struct, vm_rb); 9261da177e4SLinus Torvalds 9271da177e4SLinus Torvalds if (!(vma->vm_flags & VM_MAYSHARE)) 9281da177e4SLinus Torvalds continue; 9291da177e4SLinus Torvalds 9301da177e4SLinus Torvalds /* search for overlapping mappings on the same file */ 931e9536ae7SJosef Sipek if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) 9321da177e4SLinus Torvalds continue; 9331da177e4SLinus Torvalds 9341da177e4SLinus Torvalds if (vma->vm_pgoff >= pgoff + pglen) 9351da177e4SLinus Torvalds continue; 9361da177e4SLinus Torvalds 9371da177e4SLinus Torvalds vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; 9381da177e4SLinus Torvalds vmpglen >>= PAGE_SHIFT; 9391da177e4SLinus Torvalds if (pgoff >= vma->vm_pgoff + vmpglen) 9401da177e4SLinus Torvalds continue; 9411da177e4SLinus Torvalds 9421da177e4SLinus Torvalds /* handle inexactly overlapping matches between mappings */ 9431da177e4SLinus Torvalds if (vma->vm_pgoff != pgoff || vmpglen != pglen) { 9441da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_DIRECT)) 9451da177e4SLinus Torvalds goto sharing_violation; 9461da177e4SLinus Torvalds continue; 9471da177e4SLinus Torvalds } 9481da177e4SLinus Torvalds 9491da177e4SLinus Torvalds /* we've found a VMA we can share */ 9501da177e4SLinus Torvalds atomic_inc(&vma->vm_usage); 9511da177e4SLinus Torvalds 9521da177e4SLinus Torvalds vml->vma = vma; 9531da177e4SLinus Torvalds result = (void *) vma->vm_start; 9541da177e4SLinus Torvalds goto shared; 9551da177e4SLinus Torvalds } 9561da177e4SLinus Torvalds 957165b2392SDavid Howells dont_share_VMAs: 9581da177e4SLinus Torvalds vma = NULL; 9591da177e4SLinus Torvalds 9601da177e4SLinus Torvalds /* obtain the address at which to make a shared mapping 9611da177e4SLinus Torvalds * - this is the hook for quasi-memory character devices to 9621da177e4SLinus Torvalds * tell us the location of a shared mapping 9631da177e4SLinus Torvalds */ 9641da177e4SLinus Torvalds if (file && file->f_op->get_unmapped_area) { 9651da177e4SLinus Torvalds addr = file->f_op->get_unmapped_area(file, addr, len, 9661da177e4SLinus Torvalds pgoff, flags); 9671da177e4SLinus Torvalds if (IS_ERR((void *) addr)) { 9681da177e4SLinus Torvalds ret = addr; 9691da177e4SLinus Torvalds if (ret != (unsigned long) -ENOSYS) 9701da177e4SLinus Torvalds goto error; 9711da177e4SLinus Torvalds 9721da177e4SLinus Torvalds /* the driver refused to tell us where to site 9731da177e4SLinus Torvalds * the mapping so we'll have to attempt to copy 9741da177e4SLinus Torvalds * it */ 9751da177e4SLinus Torvalds ret = (unsigned long) -ENODEV; 9761da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_COPY)) 9771da177e4SLinus Torvalds goto error; 9781da177e4SLinus Torvalds 9791da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 9801da177e4SLinus Torvalds } 9811da177e4SLinus Torvalds } 9821da177e4SLinus Torvalds } 9831da177e4SLinus Torvalds 9841da177e4SLinus Torvalds /* we're going to need a VMA struct as well */ 9854668edc3SBurman Yan vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); 9861da177e4SLinus Torvalds if (!vma) 9871da177e4SLinus Torvalds goto error_getting_vma; 9881da177e4SLinus Torvalds 9891da177e4SLinus Torvalds INIT_LIST_HEAD(&vma->anon_vma_node); 9901da177e4SLinus Torvalds atomic_set(&vma->vm_usage, 1); 991925d1c40SMatt Helsley if (file) { 9921da177e4SLinus Torvalds get_file(file); 993925d1c40SMatt Helsley if (vm_flags & VM_EXECUTABLE) { 994925d1c40SMatt Helsley added_exe_file_vma(current->mm); 995925d1c40SMatt Helsley vma->vm_mm = current->mm; 996925d1c40SMatt Helsley } 997925d1c40SMatt Helsley } 9981da177e4SLinus Torvalds vma->vm_file = file; 9991da177e4SLinus Torvalds vma->vm_flags = vm_flags; 10001da177e4SLinus Torvalds vma->vm_start = addr; 10011da177e4SLinus Torvalds vma->vm_end = addr + len; 10021da177e4SLinus Torvalds vma->vm_pgoff = pgoff; 10031da177e4SLinus Torvalds 10041da177e4SLinus Torvalds vml->vma = vma; 10051da177e4SLinus Torvalds 10061da177e4SLinus Torvalds /* set up the mapping */ 10071da177e4SLinus Torvalds if (file && vma->vm_flags & VM_SHARED) 10081da177e4SLinus Torvalds ret = do_mmap_shared_file(vma, len); 10091da177e4SLinus Torvalds else 10101da177e4SLinus Torvalds ret = do_mmap_private(vma, len); 10111da177e4SLinus Torvalds if (ret < 0) 10121da177e4SLinus Torvalds goto error; 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds /* okay... we have a mapping; now we have to register it */ 10151da177e4SLinus Torvalds result = (void *) vma->vm_start; 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds if (vma->vm_flags & VM_MAPPED_COPY) { 10181da177e4SLinus Torvalds realalloc += kobjsize(result); 10191da177e4SLinus Torvalds askedalloc += len; 10201da177e4SLinus Torvalds } 10211da177e4SLinus Torvalds 10221da177e4SLinus Torvalds realalloc += kobjsize(vma); 10231da177e4SLinus Torvalds askedalloc += sizeof(*vma); 10241da177e4SLinus Torvalds 10251da177e4SLinus Torvalds current->mm->total_vm += len >> PAGE_SHIFT; 10261da177e4SLinus Torvalds 10271da177e4SLinus Torvalds add_nommu_vma(vma); 10281da177e4SLinus Torvalds 10291da177e4SLinus Torvalds shared: 10301da177e4SLinus Torvalds realalloc += kobjsize(vml); 10311da177e4SLinus Torvalds askedalloc += sizeof(*vml); 10321da177e4SLinus Torvalds 10333034097aSDavid Howells add_vma_to_mm(current->mm, vml); 10341da177e4SLinus Torvalds 10351da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10361da177e4SLinus Torvalds 10371da177e4SLinus Torvalds if (prot & PROT_EXEC) 10381da177e4SLinus Torvalds flush_icache_range((unsigned long) result, 10391da177e4SLinus Torvalds (unsigned long) result + len); 10401da177e4SLinus Torvalds 10411da177e4SLinus Torvalds #ifdef DEBUG 10421da177e4SLinus Torvalds printk("do_mmap:\n"); 10431da177e4SLinus Torvalds show_process_blocks(); 10441da177e4SLinus Torvalds #endif 10451da177e4SLinus Torvalds 10461da177e4SLinus Torvalds return (unsigned long) result; 10471da177e4SLinus Torvalds 10481da177e4SLinus Torvalds error: 10491da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10501da177e4SLinus Torvalds kfree(vml); 10511da177e4SLinus Torvalds if (vma) { 1052925d1c40SMatt Helsley if (vma->vm_file) { 10531da177e4SLinus Torvalds fput(vma->vm_file); 1054925d1c40SMatt Helsley if (vma->vm_flags & VM_EXECUTABLE) 1055925d1c40SMatt Helsley removed_exe_file_vma(vma->vm_mm); 1056925d1c40SMatt Helsley } 10571da177e4SLinus Torvalds kfree(vma); 10581da177e4SLinus Torvalds } 10591da177e4SLinus Torvalds return ret; 10601da177e4SLinus Torvalds 10611da177e4SLinus Torvalds sharing_violation: 10621da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10631da177e4SLinus Torvalds printk("Attempt to share mismatched mappings\n"); 10641da177e4SLinus Torvalds kfree(vml); 10651da177e4SLinus Torvalds return -EINVAL; 10661da177e4SLinus Torvalds 10671da177e4SLinus Torvalds error_getting_vma: 10681da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10691da177e4SLinus Torvalds kfree(vml); 107066aa2b4bSGreg Ungerer printk("Allocation of vma for %lu byte allocation from process %d failed\n", 10711da177e4SLinus Torvalds len, current->pid); 10721da177e4SLinus Torvalds show_free_areas(); 10731da177e4SLinus Torvalds return -ENOMEM; 10741da177e4SLinus Torvalds 10751da177e4SLinus Torvalds error_getting_vml: 10761da177e4SLinus Torvalds printk("Allocation of vml for %lu byte allocation from process %d failed\n", 10771da177e4SLinus Torvalds len, current->pid); 10781da177e4SLinus Torvalds show_free_areas(); 10791da177e4SLinus Torvalds return -ENOMEM; 10801da177e4SLinus Torvalds } 1081b5073173SPaul Mundt EXPORT_SYMBOL(do_mmap_pgoff); 10821da177e4SLinus Torvalds 10831da177e4SLinus Torvalds /* 10841da177e4SLinus Torvalds * handle mapping disposal for uClinux 10851da177e4SLinus Torvalds */ 1086925d1c40SMatt Helsley static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) 10871da177e4SLinus Torvalds { 10881da177e4SLinus Torvalds if (vma) { 10891da177e4SLinus Torvalds down_write(&nommu_vma_sem); 10901da177e4SLinus Torvalds 10911da177e4SLinus Torvalds if (atomic_dec_and_test(&vma->vm_usage)) { 10921da177e4SLinus Torvalds delete_nommu_vma(vma); 10931da177e4SLinus Torvalds 10941da177e4SLinus Torvalds if (vma->vm_ops && vma->vm_ops->close) 10951da177e4SLinus Torvalds vma->vm_ops->close(vma); 10961da177e4SLinus Torvalds 10971da177e4SLinus Torvalds /* IO memory and memory shared directly out of the pagecache from 10981da177e4SLinus Torvalds * ramfs/tmpfs mustn't be released here */ 10991da177e4SLinus Torvalds if (vma->vm_flags & VM_MAPPED_COPY) { 11001da177e4SLinus Torvalds realalloc -= kobjsize((void *) vma->vm_start); 11011da177e4SLinus Torvalds askedalloc -= vma->vm_end - vma->vm_start; 11021da177e4SLinus Torvalds kfree((void *) vma->vm_start); 11031da177e4SLinus Torvalds } 11041da177e4SLinus Torvalds 11051da177e4SLinus Torvalds realalloc -= kobjsize(vma); 11061da177e4SLinus Torvalds askedalloc -= sizeof(*vma); 11071da177e4SLinus Torvalds 1108925d1c40SMatt Helsley if (vma->vm_file) { 11091da177e4SLinus Torvalds fput(vma->vm_file); 1110925d1c40SMatt Helsley if (vma->vm_flags & VM_EXECUTABLE) 1111925d1c40SMatt Helsley removed_exe_file_vma(mm); 1112925d1c40SMatt Helsley } 11131da177e4SLinus Torvalds kfree(vma); 11141da177e4SLinus Torvalds } 11151da177e4SLinus Torvalds 11161da177e4SLinus Torvalds up_write(&nommu_vma_sem); 11171da177e4SLinus Torvalds } 11181da177e4SLinus Torvalds } 11191da177e4SLinus Torvalds 11203034097aSDavid Howells /* 11213034097aSDavid Howells * release a mapping 11223034097aSDavid Howells * - under NOMMU conditions the parameters must match exactly to the mapping to 11233034097aSDavid Howells * be removed 11243034097aSDavid Howells */ 11251da177e4SLinus Torvalds int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) 11261da177e4SLinus Torvalds { 11271da177e4SLinus Torvalds struct vm_list_struct *vml, **parent; 11281da177e4SLinus Torvalds unsigned long end = addr + len; 11291da177e4SLinus Torvalds 11301da177e4SLinus Torvalds #ifdef DEBUG 11311da177e4SLinus Torvalds printk("do_munmap:\n"); 11321da177e4SLinus Torvalds #endif 11331da177e4SLinus Torvalds 11343034097aSDavid Howells for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { 11353034097aSDavid Howells if ((*parent)->vma->vm_start > addr) 11363034097aSDavid Howells break; 11371da177e4SLinus Torvalds if ((*parent)->vma->vm_start == addr && 113866aa2b4bSGreg Ungerer ((len == 0) || ((*parent)->vma->vm_end == end))) 11391da177e4SLinus Torvalds goto found; 11403034097aSDavid Howells } 11411da177e4SLinus Torvalds 11421da177e4SLinus Torvalds printk("munmap of non-mmaped memory by process %d (%s): %p\n", 11431da177e4SLinus Torvalds current->pid, current->comm, (void *) addr); 11441da177e4SLinus Torvalds return -EINVAL; 11451da177e4SLinus Torvalds 11461da177e4SLinus Torvalds found: 11471da177e4SLinus Torvalds vml = *parent; 11481da177e4SLinus Torvalds 1149925d1c40SMatt Helsley put_vma(mm, vml->vma); 11501da177e4SLinus Torvalds 11511da177e4SLinus Torvalds *parent = vml->next; 11521da177e4SLinus Torvalds realalloc -= kobjsize(vml); 11531da177e4SLinus Torvalds askedalloc -= sizeof(*vml); 11541da177e4SLinus Torvalds kfree(vml); 1155365e9c87SHugh Dickins 1156365e9c87SHugh Dickins update_hiwater_vm(mm); 11571da177e4SLinus Torvalds mm->total_vm -= len >> PAGE_SHIFT; 11581da177e4SLinus Torvalds 11591da177e4SLinus Torvalds #ifdef DEBUG 11601da177e4SLinus Torvalds show_process_blocks(); 11611da177e4SLinus Torvalds #endif 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds return 0; 11641da177e4SLinus Torvalds } 1165b5073173SPaul Mundt EXPORT_SYMBOL(do_munmap); 11661da177e4SLinus Torvalds 11673034097aSDavid Howells asmlinkage long sys_munmap(unsigned long addr, size_t len) 11683034097aSDavid Howells { 11693034097aSDavid Howells int ret; 11703034097aSDavid Howells struct mm_struct *mm = current->mm; 11713034097aSDavid Howells 11723034097aSDavid Howells down_write(&mm->mmap_sem); 11733034097aSDavid Howells ret = do_munmap(mm, addr, len); 11743034097aSDavid Howells up_write(&mm->mmap_sem); 11753034097aSDavid Howells return ret; 11763034097aSDavid Howells } 11773034097aSDavid Howells 11783034097aSDavid Howells /* 11793034097aSDavid Howells * Release all mappings 11803034097aSDavid Howells */ 11811da177e4SLinus Torvalds void exit_mmap(struct mm_struct * mm) 11821da177e4SLinus Torvalds { 11831da177e4SLinus Torvalds struct vm_list_struct *tmp; 11841da177e4SLinus Torvalds 11851da177e4SLinus Torvalds if (mm) { 11861da177e4SLinus Torvalds #ifdef DEBUG 11871da177e4SLinus Torvalds printk("Exit_mmap:\n"); 11881da177e4SLinus Torvalds #endif 11891da177e4SLinus Torvalds 11901da177e4SLinus Torvalds mm->total_vm = 0; 11911da177e4SLinus Torvalds 11921da177e4SLinus Torvalds while ((tmp = mm->context.vmlist)) { 11931da177e4SLinus Torvalds mm->context.vmlist = tmp->next; 1194925d1c40SMatt Helsley put_vma(mm, tmp->vma); 11951da177e4SLinus Torvalds 11961da177e4SLinus Torvalds realalloc -= kobjsize(tmp); 11971da177e4SLinus Torvalds askedalloc -= sizeof(*tmp); 11981da177e4SLinus Torvalds kfree(tmp); 11991da177e4SLinus Torvalds } 12001da177e4SLinus Torvalds 12011da177e4SLinus Torvalds #ifdef DEBUG 12021da177e4SLinus Torvalds show_process_blocks(); 12031da177e4SLinus Torvalds #endif 12041da177e4SLinus Torvalds } 12051da177e4SLinus Torvalds } 12061da177e4SLinus Torvalds 12071da177e4SLinus Torvalds unsigned long do_brk(unsigned long addr, unsigned long len) 12081da177e4SLinus Torvalds { 12091da177e4SLinus Torvalds return -ENOMEM; 12101da177e4SLinus Torvalds } 12111da177e4SLinus Torvalds 12121da177e4SLinus Torvalds /* 12136fa5f80bSDavid Howells * expand (or shrink) an existing mapping, potentially moving it at the same 12146fa5f80bSDavid Howells * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 12151da177e4SLinus Torvalds * 12166fa5f80bSDavid Howells * under NOMMU conditions, we only permit changing a mapping's size, and only 12176fa5f80bSDavid Howells * as long as it stays within the hole allocated by the kmalloc() call in 12186fa5f80bSDavid Howells * do_mmap_pgoff() and the block is not shareable 12191da177e4SLinus Torvalds * 12206fa5f80bSDavid Howells * MREMAP_FIXED is not supported under NOMMU conditions 12211da177e4SLinus Torvalds */ 12221da177e4SLinus Torvalds unsigned long do_mremap(unsigned long addr, 12231da177e4SLinus Torvalds unsigned long old_len, unsigned long new_len, 12241da177e4SLinus Torvalds unsigned long flags, unsigned long new_addr) 12251da177e4SLinus Torvalds { 12266fa5f80bSDavid Howells struct vm_area_struct *vma; 12271da177e4SLinus Torvalds 12281da177e4SLinus Torvalds /* insanity checks first */ 12291da177e4SLinus Torvalds if (new_len == 0) 12301da177e4SLinus Torvalds return (unsigned long) -EINVAL; 12311da177e4SLinus Torvalds 12321da177e4SLinus Torvalds if (flags & MREMAP_FIXED && new_addr != addr) 12331da177e4SLinus Torvalds return (unsigned long) -EINVAL; 12341da177e4SLinus Torvalds 12356fa5f80bSDavid Howells vma = find_vma_exact(current->mm, addr); 12366fa5f80bSDavid Howells if (!vma) 12371da177e4SLinus Torvalds return (unsigned long) -EINVAL; 12381da177e4SLinus Torvalds 12396fa5f80bSDavid Howells if (vma->vm_end != vma->vm_start + old_len) 12401da177e4SLinus Torvalds return (unsigned long) -EFAULT; 12411da177e4SLinus Torvalds 12426fa5f80bSDavid Howells if (vma->vm_flags & VM_MAYSHARE) 12431da177e4SLinus Torvalds return (unsigned long) -EPERM; 12441da177e4SLinus Torvalds 12451da177e4SLinus Torvalds if (new_len > kobjsize((void *) addr)) 12461da177e4SLinus Torvalds return (unsigned long) -ENOMEM; 12471da177e4SLinus Torvalds 12481da177e4SLinus Torvalds /* all checks complete - do it */ 12496fa5f80bSDavid Howells vma->vm_end = vma->vm_start + new_len; 12501da177e4SLinus Torvalds 12511da177e4SLinus Torvalds askedalloc -= old_len; 12521da177e4SLinus Torvalds askedalloc += new_len; 12531da177e4SLinus Torvalds 12546fa5f80bSDavid Howells return vma->vm_start; 12556fa5f80bSDavid Howells } 1256b5073173SPaul Mundt EXPORT_SYMBOL(do_mremap); 12576fa5f80bSDavid Howells 12586fa5f80bSDavid Howells asmlinkage unsigned long sys_mremap(unsigned long addr, 12596fa5f80bSDavid Howells unsigned long old_len, unsigned long new_len, 12606fa5f80bSDavid Howells unsigned long flags, unsigned long new_addr) 12616fa5f80bSDavid Howells { 12626fa5f80bSDavid Howells unsigned long ret; 12636fa5f80bSDavid Howells 12646fa5f80bSDavid Howells down_write(¤t->mm->mmap_sem); 12656fa5f80bSDavid Howells ret = do_mremap(addr, old_len, new_len, flags, new_addr); 12666fa5f80bSDavid Howells up_write(¤t->mm->mmap_sem); 12676fa5f80bSDavid Howells return ret; 12681da177e4SLinus Torvalds } 12691da177e4SLinus Torvalds 12706aab341eSLinus Torvalds struct page *follow_page(struct vm_area_struct *vma, unsigned long address, 1271deceb6cdSHugh Dickins unsigned int foll_flags) 12721da177e4SLinus Torvalds { 12731da177e4SLinus Torvalds return NULL; 12741da177e4SLinus Torvalds } 12751da177e4SLinus Torvalds 12761da177e4SLinus Torvalds int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, 12771da177e4SLinus Torvalds unsigned long to, unsigned long size, pgprot_t prot) 12781da177e4SLinus Torvalds { 127966aa2b4bSGreg Ungerer vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; 128066aa2b4bSGreg Ungerer return 0; 12811da177e4SLinus Torvalds } 128222c4af40SLuke Yang EXPORT_SYMBOL(remap_pfn_range); 12831da177e4SLinus Torvalds 1284f905bc44SPaul Mundt int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1285f905bc44SPaul Mundt unsigned long pgoff) 1286f905bc44SPaul Mundt { 1287f905bc44SPaul Mundt unsigned int size = vma->vm_end - vma->vm_start; 1288f905bc44SPaul Mundt 1289f905bc44SPaul Mundt if (!(vma->vm_flags & VM_USERMAP)) 1290f905bc44SPaul Mundt return -EINVAL; 1291f905bc44SPaul Mundt 1292f905bc44SPaul Mundt vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1293f905bc44SPaul Mundt vma->vm_end = vma->vm_start + size; 1294f905bc44SPaul Mundt 1295f905bc44SPaul Mundt return 0; 1296f905bc44SPaul Mundt } 1297f905bc44SPaul Mundt EXPORT_SYMBOL(remap_vmalloc_range); 1298f905bc44SPaul Mundt 12991da177e4SLinus Torvalds void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 13001da177e4SLinus Torvalds { 13011da177e4SLinus Torvalds } 13021da177e4SLinus Torvalds 13031da177e4SLinus Torvalds unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 13041da177e4SLinus Torvalds unsigned long len, unsigned long pgoff, unsigned long flags) 13051da177e4SLinus Torvalds { 13061da177e4SLinus Torvalds return -ENOMEM; 13071da177e4SLinus Torvalds } 13081da177e4SLinus Torvalds 13091363c3cdSWolfgang Wander void arch_unmap_area(struct mm_struct *mm, unsigned long addr) 13101da177e4SLinus Torvalds { 13111da177e4SLinus Torvalds } 13121da177e4SLinus Torvalds 13131da177e4SLinus Torvalds void unmap_mapping_range(struct address_space *mapping, 13141da177e4SLinus Torvalds loff_t const holebegin, loff_t const holelen, 13151da177e4SLinus Torvalds int even_cows) 13161da177e4SLinus Torvalds { 13171da177e4SLinus Torvalds } 131822c4af40SLuke Yang EXPORT_SYMBOL(unmap_mapping_range); 13191da177e4SLinus Torvalds 13201da177e4SLinus Torvalds /* 1321d56e03cdSDavid Howells * ask for an unmapped area at which to create a mapping on a file 1322d56e03cdSDavid Howells */ 1323d56e03cdSDavid Howells unsigned long get_unmapped_area(struct file *file, unsigned long addr, 1324d56e03cdSDavid Howells unsigned long len, unsigned long pgoff, 1325d56e03cdSDavid Howells unsigned long flags) 1326d56e03cdSDavid Howells { 1327d56e03cdSDavid Howells unsigned long (*get_area)(struct file *, unsigned long, unsigned long, 1328d56e03cdSDavid Howells unsigned long, unsigned long); 1329d56e03cdSDavid Howells 1330d56e03cdSDavid Howells get_area = current->mm->get_unmapped_area; 1331d56e03cdSDavid Howells if (file && file->f_op && file->f_op->get_unmapped_area) 1332d56e03cdSDavid Howells get_area = file->f_op->get_unmapped_area; 1333d56e03cdSDavid Howells 1334d56e03cdSDavid Howells if (!get_area) 1335d56e03cdSDavid Howells return -ENOSYS; 1336d56e03cdSDavid Howells 1337d56e03cdSDavid Howells return get_area(file, addr, len, pgoff, flags); 1338d56e03cdSDavid Howells } 1339d56e03cdSDavid Howells EXPORT_SYMBOL(get_unmapped_area); 1340d56e03cdSDavid Howells 1341d56e03cdSDavid Howells /* 13421da177e4SLinus Torvalds * Check that a process has enough memory to allocate a new virtual 13431da177e4SLinus Torvalds * mapping. 0 means there is enough memory for the allocation to 13441da177e4SLinus Torvalds * succeed and -ENOMEM implies there is not. 13451da177e4SLinus Torvalds * 13461da177e4SLinus Torvalds * We currently support three overcommit policies, which are set via the 13471da177e4SLinus Torvalds * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 13481da177e4SLinus Torvalds * 13491da177e4SLinus Torvalds * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 13501da177e4SLinus Torvalds * Additional code 2002 Jul 20 by Robert Love. 13511da177e4SLinus Torvalds * 13521da177e4SLinus Torvalds * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 13531da177e4SLinus Torvalds * 13541da177e4SLinus Torvalds * Note this is a helper function intended to be used by LSMs which 13551da177e4SLinus Torvalds * wish to use this logic. 13561da177e4SLinus Torvalds */ 135734b4e4aaSAlan Cox int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 13581da177e4SLinus Torvalds { 13591da177e4SLinus Torvalds unsigned long free, allowed; 13601da177e4SLinus Torvalds 13611da177e4SLinus Torvalds vm_acct_memory(pages); 13621da177e4SLinus Torvalds 13631da177e4SLinus Torvalds /* 13641da177e4SLinus Torvalds * Sometimes we want to use more memory than we have 13651da177e4SLinus Torvalds */ 13661da177e4SLinus Torvalds if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 13671da177e4SLinus Torvalds return 0; 13681da177e4SLinus Torvalds 13691da177e4SLinus Torvalds if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 13701da177e4SLinus Torvalds unsigned long n; 13711da177e4SLinus Torvalds 1372347ce434SChristoph Lameter free = global_page_state(NR_FILE_PAGES); 13731da177e4SLinus Torvalds free += nr_swap_pages; 13741da177e4SLinus Torvalds 13751da177e4SLinus Torvalds /* 13761da177e4SLinus Torvalds * Any slabs which are created with the 13771da177e4SLinus Torvalds * SLAB_RECLAIM_ACCOUNT flag claim to have contents 13781da177e4SLinus Torvalds * which are reclaimable, under pressure. The dentry 13791da177e4SLinus Torvalds * cache and most inode caches should fall into this 13801da177e4SLinus Torvalds */ 1381972d1a7bSChristoph Lameter free += global_page_state(NR_SLAB_RECLAIMABLE); 13821da177e4SLinus Torvalds 13831da177e4SLinus Torvalds /* 13841da177e4SLinus Torvalds * Leave the last 3% for root 13851da177e4SLinus Torvalds */ 13861da177e4SLinus Torvalds if (!cap_sys_admin) 13871da177e4SLinus Torvalds free -= free / 32; 13881da177e4SLinus Torvalds 13891da177e4SLinus Torvalds if (free > pages) 13901da177e4SLinus Torvalds return 0; 13911da177e4SLinus Torvalds 13921da177e4SLinus Torvalds /* 13931da177e4SLinus Torvalds * nr_free_pages() is very expensive on large systems, 13941da177e4SLinus Torvalds * only call if we're about to fail. 13951da177e4SLinus Torvalds */ 13961da177e4SLinus Torvalds n = nr_free_pages(); 1397d5ddc79bSHideo AOKI 1398d5ddc79bSHideo AOKI /* 1399d5ddc79bSHideo AOKI * Leave reserved pages. The pages are not for anonymous pages. 1400d5ddc79bSHideo AOKI */ 1401d5ddc79bSHideo AOKI if (n <= totalreserve_pages) 1402d5ddc79bSHideo AOKI goto error; 1403d5ddc79bSHideo AOKI else 1404d5ddc79bSHideo AOKI n -= totalreserve_pages; 1405d5ddc79bSHideo AOKI 1406d5ddc79bSHideo AOKI /* 1407d5ddc79bSHideo AOKI * Leave the last 3% for root 1408d5ddc79bSHideo AOKI */ 14091da177e4SLinus Torvalds if (!cap_sys_admin) 14101da177e4SLinus Torvalds n -= n / 32; 14111da177e4SLinus Torvalds free += n; 14121da177e4SLinus Torvalds 14131da177e4SLinus Torvalds if (free > pages) 14141da177e4SLinus Torvalds return 0; 1415d5ddc79bSHideo AOKI 1416d5ddc79bSHideo AOKI goto error; 14171da177e4SLinus Torvalds } 14181da177e4SLinus Torvalds 14191da177e4SLinus Torvalds allowed = totalram_pages * sysctl_overcommit_ratio / 100; 14201da177e4SLinus Torvalds /* 14211da177e4SLinus Torvalds * Leave the last 3% for root 14221da177e4SLinus Torvalds */ 14231da177e4SLinus Torvalds if (!cap_sys_admin) 14241da177e4SLinus Torvalds allowed -= allowed / 32; 14251da177e4SLinus Torvalds allowed += total_swap_pages; 14261da177e4SLinus Torvalds 14271da177e4SLinus Torvalds /* Don't let a single process grow too big: 14281da177e4SLinus Torvalds leave 3% of the size of this process for other processes */ 14291da177e4SLinus Torvalds allowed -= current->mm->total_vm / 32; 14301da177e4SLinus Torvalds 14312f60f8d3SSimon Derr /* 14322f60f8d3SSimon Derr * cast `allowed' as a signed long because vm_committed_space 14332f60f8d3SSimon Derr * sometimes has a negative value 14342f60f8d3SSimon Derr */ 143580119ef5SAlan Cox if (atomic_long_read(&vm_committed_space) < (long)allowed) 14361da177e4SLinus Torvalds return 0; 1437d5ddc79bSHideo AOKI error: 14381da177e4SLinus Torvalds vm_unacct_memory(pages); 14391da177e4SLinus Torvalds 14401da177e4SLinus Torvalds return -ENOMEM; 14411da177e4SLinus Torvalds } 14421da177e4SLinus Torvalds 14431da177e4SLinus Torvalds int in_gate_area_no_task(unsigned long addr) 14441da177e4SLinus Torvalds { 14451da177e4SLinus Torvalds return 0; 14461da177e4SLinus Torvalds } 1447b0e15190SDavid Howells 1448d0217ac0SNick Piggin int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1449b0e15190SDavid Howells { 1450b0e15190SDavid Howells BUG(); 1451d0217ac0SNick Piggin return 0; 1452b0e15190SDavid Howells } 1453b5073173SPaul Mundt EXPORT_SYMBOL(filemap_fault); 14540ec76a11SDavid Howells 14550ec76a11SDavid Howells /* 14560ec76a11SDavid Howells * Access another process' address space. 14570ec76a11SDavid Howells * - source/target buffer must be kernel space 14580ec76a11SDavid Howells */ 14590ec76a11SDavid Howells int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) 14600ec76a11SDavid Howells { 14610ec76a11SDavid Howells struct vm_area_struct *vma; 14620ec76a11SDavid Howells struct mm_struct *mm; 14630ec76a11SDavid Howells 14640ec76a11SDavid Howells if (addr + len < addr) 14650ec76a11SDavid Howells return 0; 14660ec76a11SDavid Howells 14670ec76a11SDavid Howells mm = get_task_mm(tsk); 14680ec76a11SDavid Howells if (!mm) 14690ec76a11SDavid Howells return 0; 14700ec76a11SDavid Howells 14710ec76a11SDavid Howells down_read(&mm->mmap_sem); 14720ec76a11SDavid Howells 14730ec76a11SDavid Howells /* the access must start within one of the target process's mappings */ 14740159b141SDavid Howells vma = find_vma(mm, addr); 14750159b141SDavid Howells if (vma) { 14760ec76a11SDavid Howells /* don't overrun this mapping */ 14770ec76a11SDavid Howells if (addr + len >= vma->vm_end) 14780ec76a11SDavid Howells len = vma->vm_end - addr; 14790ec76a11SDavid Howells 14800ec76a11SDavid Howells /* only read or write mappings where it is permitted */ 1481d00c7b99SDavid Howells if (write && vma->vm_flags & VM_MAYWRITE) 14820ec76a11SDavid Howells len -= copy_to_user((void *) addr, buf, len); 1483d00c7b99SDavid Howells else if (!write && vma->vm_flags & VM_MAYREAD) 14840ec76a11SDavid Howells len -= copy_from_user(buf, (void *) addr, len); 14850ec76a11SDavid Howells else 14860ec76a11SDavid Howells len = 0; 14870ec76a11SDavid Howells } else { 14880ec76a11SDavid Howells len = 0; 14890ec76a11SDavid Howells } 14900ec76a11SDavid Howells 14910ec76a11SDavid Howells up_read(&mm->mmap_sem); 14920ec76a11SDavid Howells mmput(mm); 14930ec76a11SDavid Howells return len; 14940ec76a11SDavid Howells } 1495