11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/nommu.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Replacement code for mm functions to support CPU's that don't 51da177e4SLinus Torvalds * have any form of memory management unit (thus no virtual memory). 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * See Documentation/nommu-mmap.txt 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com> 101da177e4SLinus Torvalds * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 111da177e4SLinus Torvalds * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 121da177e4SLinus Torvalds * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 13*f905bc44SPaul Mundt * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> 141da177e4SLinus Torvalds */ 151da177e4SLinus Torvalds 16f2b8544fSDavid Howells #include <linux/module.h> 171da177e4SLinus Torvalds #include <linux/mm.h> 181da177e4SLinus Torvalds #include <linux/mman.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/file.h> 211da177e4SLinus Torvalds #include <linux/highmem.h> 221da177e4SLinus Torvalds #include <linux/pagemap.h> 231da177e4SLinus Torvalds #include <linux/slab.h> 241da177e4SLinus Torvalds #include <linux/vmalloc.h> 251da177e4SLinus Torvalds #include <linux/ptrace.h> 261da177e4SLinus Torvalds #include <linux/blkdev.h> 271da177e4SLinus Torvalds #include <linux/backing-dev.h> 281da177e4SLinus Torvalds #include <linux/mount.h> 291da177e4SLinus Torvalds #include <linux/personality.h> 301da177e4SLinus Torvalds #include <linux/security.h> 311da177e4SLinus Torvalds #include <linux/syscalls.h> 321da177e4SLinus Torvalds 331da177e4SLinus Torvalds #include <asm/uaccess.h> 341da177e4SLinus Torvalds #include <asm/tlb.h> 351da177e4SLinus Torvalds #include <asm/tlbflush.h> 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds void *high_memory; 381da177e4SLinus Torvalds struct page *mem_map; 391da177e4SLinus Torvalds unsigned long max_mapnr; 401da177e4SLinus Torvalds unsigned long num_physpages; 411da177e4SLinus Torvalds unsigned long askedalloc, realalloc; 421da177e4SLinus Torvalds atomic_t vm_committed_space = ATOMIC_INIT(0); 431da177e4SLinus Torvalds int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 441da177e4SLinus Torvalds int sysctl_overcommit_ratio = 50; /* default is 50% */ 451da177e4SLinus Torvalds int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 461da177e4SLinus Torvalds int heap_stack_gap = 0; 471da177e4SLinus Torvalds 481da177e4SLinus Torvalds EXPORT_SYMBOL(mem_map); 496a04de6dSWu, Bryan EXPORT_SYMBOL(num_physpages); 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds /* list of shareable VMAs */ 521da177e4SLinus Torvalds struct rb_root nommu_vma_tree = RB_ROOT; 531da177e4SLinus Torvalds DECLARE_RWSEM(nommu_vma_sem); 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds struct vm_operations_struct generic_file_vm_ops = { 561da177e4SLinus Torvalds }; 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds /* 591da177e4SLinus Torvalds * Handle all mappings that got truncated by a "truncate()" 601da177e4SLinus Torvalds * system call. 611da177e4SLinus Torvalds * 621da177e4SLinus Torvalds * NOTE! We have to be ready to update the memory sharing 631da177e4SLinus Torvalds * between the file and the memory map for a potential last 641da177e4SLinus Torvalds * incomplete page. Ugly, but necessary. 651da177e4SLinus Torvalds */ 661da177e4SLinus Torvalds int vmtruncate(struct inode *inode, loff_t offset) 671da177e4SLinus Torvalds { 681da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 691da177e4SLinus Torvalds unsigned long limit; 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds if (inode->i_size < offset) 721da177e4SLinus Torvalds goto do_expand; 731da177e4SLinus Torvalds i_size_write(inode, offset); 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds truncate_inode_pages(mapping, offset); 761da177e4SLinus Torvalds goto out_truncate; 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds do_expand: 791da177e4SLinus Torvalds limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 801da177e4SLinus Torvalds if (limit != RLIM_INFINITY && offset > limit) 811da177e4SLinus Torvalds goto out_sig; 821da177e4SLinus Torvalds if (offset > inode->i_sb->s_maxbytes) 831da177e4SLinus Torvalds goto out; 841da177e4SLinus Torvalds i_size_write(inode, offset); 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds out_truncate: 871da177e4SLinus Torvalds if (inode->i_op && inode->i_op->truncate) 881da177e4SLinus Torvalds inode->i_op->truncate(inode); 891da177e4SLinus Torvalds return 0; 901da177e4SLinus Torvalds out_sig: 911da177e4SLinus Torvalds send_sig(SIGXFSZ, current, 0); 921da177e4SLinus Torvalds out: 931da177e4SLinus Torvalds return -EFBIG; 941da177e4SLinus Torvalds } 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds EXPORT_SYMBOL(vmtruncate); 971da177e4SLinus Torvalds 981da177e4SLinus Torvalds /* 991da177e4SLinus Torvalds * Return the total memory allocated for this pointer, not 1001da177e4SLinus Torvalds * just what the caller asked for. 1011da177e4SLinus Torvalds * 1021da177e4SLinus Torvalds * Doesn't have to be accurate, i.e. may have races. 1031da177e4SLinus Torvalds */ 1041da177e4SLinus Torvalds unsigned int kobjsize(const void *objp) 1051da177e4SLinus Torvalds { 1061da177e4SLinus Torvalds struct page *page; 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds if (!objp || !((page = virt_to_page(objp)))) 1091da177e4SLinus Torvalds return 0; 1101da177e4SLinus Torvalds 1111da177e4SLinus Torvalds if (PageSlab(page)) 1121da177e4SLinus Torvalds return ksize(objp); 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds BUG_ON(page->index < 0); 1151da177e4SLinus Torvalds BUG_ON(page->index >= MAX_ORDER); 1161da177e4SLinus Torvalds 1171da177e4SLinus Torvalds return (PAGE_SIZE << page->index); 1181da177e4SLinus Torvalds } 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds /* 1217b4d5b8bSDavid Howells * get a list of pages in an address range belonging to the specified process 1227b4d5b8bSDavid Howells * and indicate the VMA that covers each page 1237b4d5b8bSDavid Howells * - this is potentially dodgy as we may end incrementing the page count of a 1247b4d5b8bSDavid Howells * slab page or a secondary page from a compound page 1257b4d5b8bSDavid Howells * - don't permit access to VMAs that don't support it, such as I/O mappings 1261da177e4SLinus Torvalds */ 1271da177e4SLinus Torvalds int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1281da177e4SLinus Torvalds unsigned long start, int len, int write, int force, 1291da177e4SLinus Torvalds struct page **pages, struct vm_area_struct **vmas) 1301da177e4SLinus Torvalds { 131910e46daSSonic Zhang struct vm_area_struct *vma; 1327b4d5b8bSDavid Howells unsigned long vm_flags; 1337b4d5b8bSDavid Howells int i; 1347b4d5b8bSDavid Howells 1357b4d5b8bSDavid Howells /* calculate required read or write permissions. 1367b4d5b8bSDavid Howells * - if 'force' is set, we only require the "MAY" flags. 1377b4d5b8bSDavid Howells */ 1387b4d5b8bSDavid Howells vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 1397b4d5b8bSDavid Howells vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds for (i = 0; i < len; i++) { 142910e46daSSonic Zhang vma = find_vma(mm, start); 143910e46daSSonic Zhang if (!vma) 1447b4d5b8bSDavid Howells goto finish_or_fault; 1457b4d5b8bSDavid Howells 1467b4d5b8bSDavid Howells /* protect what we can, including chardevs */ 1477b4d5b8bSDavid Howells if (vma->vm_flags & (VM_IO | VM_PFNMAP) || 1487b4d5b8bSDavid Howells !(vm_flags & vma->vm_flags)) 1497b4d5b8bSDavid Howells goto finish_or_fault; 150910e46daSSonic Zhang 1511da177e4SLinus Torvalds if (pages) { 1521da177e4SLinus Torvalds pages[i] = virt_to_page(start); 1531da177e4SLinus Torvalds if (pages[i]) 1541da177e4SLinus Torvalds page_cache_get(pages[i]); 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds if (vmas) 157910e46daSSonic Zhang vmas[i] = vma; 1581da177e4SLinus Torvalds start += PAGE_SIZE; 1591da177e4SLinus Torvalds } 1607b4d5b8bSDavid Howells 1617b4d5b8bSDavid Howells return i; 1627b4d5b8bSDavid Howells 1637b4d5b8bSDavid Howells finish_or_fault: 1647b4d5b8bSDavid Howells return i ? : -EFAULT; 1651da177e4SLinus Torvalds } 16666aa2b4bSGreg Ungerer EXPORT_SYMBOL(get_user_pages); 16766aa2b4bSGreg Ungerer 1681da177e4SLinus Torvalds DEFINE_RWLOCK(vmlist_lock); 1691da177e4SLinus Torvalds struct vm_struct *vmlist; 1701da177e4SLinus Torvalds 171b3bdda02SChristoph Lameter void vfree(const void *addr) 1721da177e4SLinus Torvalds { 1731da177e4SLinus Torvalds kfree(addr); 1741da177e4SLinus Torvalds } 175b5073173SPaul Mundt EXPORT_SYMBOL(vfree); 1761da177e4SLinus Torvalds 177dd0fc66fSAl Viro void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 1781da177e4SLinus Torvalds { 1791da177e4SLinus Torvalds /* 1808518609dSRobert P. J. Day * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() 1818518609dSRobert P. J. Day * returns only a logical address. 1821da177e4SLinus Torvalds */ 18384097518SNick Piggin return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); 1841da177e4SLinus Torvalds } 185b5073173SPaul Mundt EXPORT_SYMBOL(__vmalloc); 1861da177e4SLinus Torvalds 187*f905bc44SPaul Mundt void *vmalloc_user(unsigned long size) 188*f905bc44SPaul Mundt { 189*f905bc44SPaul Mundt void *ret; 190*f905bc44SPaul Mundt 191*f905bc44SPaul Mundt ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 192*f905bc44SPaul Mundt PAGE_KERNEL); 193*f905bc44SPaul Mundt if (ret) { 194*f905bc44SPaul Mundt struct vm_area_struct *vma; 195*f905bc44SPaul Mundt 196*f905bc44SPaul Mundt down_write(¤t->mm->mmap_sem); 197*f905bc44SPaul Mundt vma = find_vma(current->mm, (unsigned long)ret); 198*f905bc44SPaul Mundt if (vma) 199*f905bc44SPaul Mundt vma->vm_flags |= VM_USERMAP; 200*f905bc44SPaul Mundt up_write(¤t->mm->mmap_sem); 201*f905bc44SPaul Mundt } 202*f905bc44SPaul Mundt 203*f905bc44SPaul Mundt return ret; 204*f905bc44SPaul Mundt } 205*f905bc44SPaul Mundt EXPORT_SYMBOL(vmalloc_user); 206*f905bc44SPaul Mundt 207b3bdda02SChristoph Lameter struct page *vmalloc_to_page(const void *addr) 2081da177e4SLinus Torvalds { 2091da177e4SLinus Torvalds return virt_to_page(addr); 2101da177e4SLinus Torvalds } 211b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_to_page); 2121da177e4SLinus Torvalds 213b3bdda02SChristoph Lameter unsigned long vmalloc_to_pfn(const void *addr) 2141da177e4SLinus Torvalds { 2151da177e4SLinus Torvalds return page_to_pfn(virt_to_page(addr)); 2161da177e4SLinus Torvalds } 217b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_to_pfn); 2181da177e4SLinus Torvalds 2191da177e4SLinus Torvalds long vread(char *buf, char *addr, unsigned long count) 2201da177e4SLinus Torvalds { 2211da177e4SLinus Torvalds memcpy(buf, addr, count); 2221da177e4SLinus Torvalds return count; 2231da177e4SLinus Torvalds } 2241da177e4SLinus Torvalds 2251da177e4SLinus Torvalds long vwrite(char *buf, char *addr, unsigned long count) 2261da177e4SLinus Torvalds { 2271da177e4SLinus Torvalds /* Don't allow overflow */ 2281da177e4SLinus Torvalds if ((unsigned long) addr + count < count) 2291da177e4SLinus Torvalds count = -(unsigned long) addr; 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds memcpy(addr, buf, count); 2321da177e4SLinus Torvalds return(count); 2331da177e4SLinus Torvalds } 2341da177e4SLinus Torvalds 2351da177e4SLinus Torvalds /* 2361da177e4SLinus Torvalds * vmalloc - allocate virtually continguos memory 2371da177e4SLinus Torvalds * 2381da177e4SLinus Torvalds * @size: allocation size 2391da177e4SLinus Torvalds * 2401da177e4SLinus Torvalds * Allocate enough pages to cover @size from the page level 2411da177e4SLinus Torvalds * allocator and map them into continguos kernel virtual space. 2421da177e4SLinus Torvalds * 243c1c8897fSMichael Opdenacker * For tight control over page level allocator and protection flags 2441da177e4SLinus Torvalds * use __vmalloc() instead. 2451da177e4SLinus Torvalds */ 2461da177e4SLinus Torvalds void *vmalloc(unsigned long size) 2471da177e4SLinus Torvalds { 2481da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 2491da177e4SLinus Torvalds } 250f6138882SAndrew Morton EXPORT_SYMBOL(vmalloc); 251f6138882SAndrew Morton 252f6138882SAndrew Morton void *vmalloc_node(unsigned long size, int node) 253f6138882SAndrew Morton { 254f6138882SAndrew Morton return vmalloc(size); 255f6138882SAndrew Morton } 256f6138882SAndrew Morton EXPORT_SYMBOL(vmalloc_node); 2571da177e4SLinus Torvalds 258b5073173SPaul Mundt /** 259b5073173SPaul Mundt * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 2601da177e4SLinus Torvalds * @size: allocation size 2611da177e4SLinus Torvalds * 2621da177e4SLinus Torvalds * Allocate enough 32bit PA addressable pages to cover @size from the 2631da177e4SLinus Torvalds * page level allocator and map them into continguos kernel virtual space. 2641da177e4SLinus Torvalds */ 2651da177e4SLinus Torvalds void *vmalloc_32(unsigned long size) 2661da177e4SLinus Torvalds { 2671da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 2681da177e4SLinus Torvalds } 269b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_32); 270b5073173SPaul Mundt 271b5073173SPaul Mundt /** 272b5073173SPaul Mundt * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 273b5073173SPaul Mundt * @size: allocation size 274b5073173SPaul Mundt * 275b5073173SPaul Mundt * The resulting memory area is 32bit addressable and zeroed so it can be 276b5073173SPaul Mundt * mapped to userspace without leaking data. 277*f905bc44SPaul Mundt * 278*f905bc44SPaul Mundt * VM_USERMAP is set on the corresponding VMA so that subsequent calls to 279*f905bc44SPaul Mundt * remap_vmalloc_range() are permissible. 280b5073173SPaul Mundt */ 281b5073173SPaul Mundt void *vmalloc_32_user(unsigned long size) 282b5073173SPaul Mundt { 283*f905bc44SPaul Mundt /* 284*f905bc44SPaul Mundt * We'll have to sort out the ZONE_DMA bits for 64-bit, 285*f905bc44SPaul Mundt * but for now this can simply use vmalloc_user() directly. 286*f905bc44SPaul Mundt */ 287*f905bc44SPaul Mundt return vmalloc_user(size); 288b5073173SPaul Mundt } 289b5073173SPaul Mundt EXPORT_SYMBOL(vmalloc_32_user); 2901da177e4SLinus Torvalds 2911da177e4SLinus Torvalds void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) 2921da177e4SLinus Torvalds { 2931da177e4SLinus Torvalds BUG(); 2941da177e4SLinus Torvalds return NULL; 2951da177e4SLinus Torvalds } 296b5073173SPaul Mundt EXPORT_SYMBOL(vmap); 2971da177e4SLinus Torvalds 298b3bdda02SChristoph Lameter void vunmap(const void *addr) 2991da177e4SLinus Torvalds { 3001da177e4SLinus Torvalds BUG(); 3011da177e4SLinus Torvalds } 302b5073173SPaul Mundt EXPORT_SYMBOL(vunmap); 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds /* 3051eeb66a1SChristoph Hellwig * Implement a stub for vmalloc_sync_all() if the architecture chose not to 3061eeb66a1SChristoph Hellwig * have one. 3071eeb66a1SChristoph Hellwig */ 3081eeb66a1SChristoph Hellwig void __attribute__((weak)) vmalloc_sync_all(void) 3091eeb66a1SChristoph Hellwig { 3101eeb66a1SChristoph Hellwig } 3111eeb66a1SChristoph Hellwig 312b5073173SPaul Mundt int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 313b5073173SPaul Mundt struct page *page) 314b5073173SPaul Mundt { 315b5073173SPaul Mundt return -EINVAL; 316b5073173SPaul Mundt } 317b5073173SPaul Mundt EXPORT_SYMBOL(vm_insert_page); 318b5073173SPaul Mundt 3191eeb66a1SChristoph Hellwig /* 3201da177e4SLinus Torvalds * sys_brk() for the most part doesn't need the global kernel 3211da177e4SLinus Torvalds * lock, except when an application is doing something nasty 3221da177e4SLinus Torvalds * like trying to un-brk an area that has already been mapped 3231da177e4SLinus Torvalds * to a regular file. in this case, the unmapping will need 3241da177e4SLinus Torvalds * to invoke file system routines that need the global lock. 3251da177e4SLinus Torvalds */ 3261da177e4SLinus Torvalds asmlinkage unsigned long sys_brk(unsigned long brk) 3271da177e4SLinus Torvalds { 3281da177e4SLinus Torvalds struct mm_struct *mm = current->mm; 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds if (brk < mm->start_brk || brk > mm->context.end_brk) 3311da177e4SLinus Torvalds return mm->brk; 3321da177e4SLinus Torvalds 3331da177e4SLinus Torvalds if (mm->brk == brk) 3341da177e4SLinus Torvalds return mm->brk; 3351da177e4SLinus Torvalds 3361da177e4SLinus Torvalds /* 3371da177e4SLinus Torvalds * Always allow shrinking brk 3381da177e4SLinus Torvalds */ 3391da177e4SLinus Torvalds if (brk <= mm->brk) { 3401da177e4SLinus Torvalds mm->brk = brk; 3411da177e4SLinus Torvalds return brk; 3421da177e4SLinus Torvalds } 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds /* 3451da177e4SLinus Torvalds * Ok, looks good - let it rip. 3461da177e4SLinus Torvalds */ 3471da177e4SLinus Torvalds return mm->brk = brk; 3481da177e4SLinus Torvalds } 3491da177e4SLinus Torvalds 3501da177e4SLinus Torvalds #ifdef DEBUG 3511da177e4SLinus Torvalds static void show_process_blocks(void) 3521da177e4SLinus Torvalds { 3531da177e4SLinus Torvalds struct vm_list_struct *vml; 3541da177e4SLinus Torvalds 3551da177e4SLinus Torvalds printk("Process blocks %d:", current->pid); 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds for (vml = ¤t->mm->context.vmlist; vml; vml = vml->next) { 3581da177e4SLinus Torvalds printk(" %p: %p", vml, vml->vma); 3591da177e4SLinus Torvalds if (vml->vma) 3601da177e4SLinus Torvalds printk(" (%d @%lx #%d)", 3611da177e4SLinus Torvalds kobjsize((void *) vml->vma->vm_start), 3621da177e4SLinus Torvalds vml->vma->vm_start, 3631da177e4SLinus Torvalds atomic_read(&vml->vma->vm_usage)); 3641da177e4SLinus Torvalds printk(vml->next ? " ->" : ".\n"); 3651da177e4SLinus Torvalds } 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds #endif /* DEBUG */ 3681da177e4SLinus Torvalds 3693034097aSDavid Howells /* 3703034097aSDavid Howells * add a VMA into a process's mm_struct in the appropriate place in the list 3713034097aSDavid Howells * - should be called with mm->mmap_sem held writelocked 3723034097aSDavid Howells */ 3733034097aSDavid Howells static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) 3743034097aSDavid Howells { 3753034097aSDavid Howells struct vm_list_struct **ppv; 3763034097aSDavid Howells 3773034097aSDavid Howells for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) 3783034097aSDavid Howells if ((*ppv)->vma->vm_start > vml->vma->vm_start) 3793034097aSDavid Howells break; 3803034097aSDavid Howells 3813034097aSDavid Howells vml->next = *ppv; 3823034097aSDavid Howells *ppv = vml; 3833034097aSDavid Howells } 3843034097aSDavid Howells 3853034097aSDavid Howells /* 3863034097aSDavid Howells * look up the first VMA in which addr resides, NULL if none 3873034097aSDavid Howells * - should be called with mm->mmap_sem at least held readlocked 3883034097aSDavid Howells */ 3893034097aSDavid Howells struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 3903034097aSDavid Howells { 3913034097aSDavid Howells struct vm_list_struct *loop, *vml; 3923034097aSDavid Howells 3933034097aSDavid Howells /* search the vm_start ordered list */ 3943034097aSDavid Howells vml = NULL; 3953034097aSDavid Howells for (loop = mm->context.vmlist; loop; loop = loop->next) { 3963034097aSDavid Howells if (loop->vma->vm_start > addr) 3973034097aSDavid Howells break; 3983034097aSDavid Howells vml = loop; 3993034097aSDavid Howells } 4003034097aSDavid Howells 4013034097aSDavid Howells if (vml && vml->vma->vm_end > addr) 4023034097aSDavid Howells return vml->vma; 4033034097aSDavid Howells 4043034097aSDavid Howells return NULL; 4053034097aSDavid Howells } 4063034097aSDavid Howells EXPORT_SYMBOL(find_vma); 4073034097aSDavid Howells 4083034097aSDavid Howells /* 409930e652aSDavid Howells * find a VMA 410930e652aSDavid Howells * - we don't extend stack VMAs under NOMMU conditions 411930e652aSDavid Howells */ 412930e652aSDavid Howells struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 413930e652aSDavid Howells { 414930e652aSDavid Howells return find_vma(mm, addr); 415930e652aSDavid Howells } 416930e652aSDavid Howells 41757c8f63eSGreg Ungerer int expand_stack(struct vm_area_struct *vma, unsigned long address) 41857c8f63eSGreg Ungerer { 41957c8f63eSGreg Ungerer return -ENOMEM; 42057c8f63eSGreg Ungerer } 42157c8f63eSGreg Ungerer 422930e652aSDavid Howells /* 4236fa5f80bSDavid Howells * look up the first VMA exactly that exactly matches addr 4246fa5f80bSDavid Howells * - should be called with mm->mmap_sem at least held readlocked 4256fa5f80bSDavid Howells */ 4266fa5f80bSDavid Howells static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, 4276fa5f80bSDavid Howells unsigned long addr) 4286fa5f80bSDavid Howells { 4296fa5f80bSDavid Howells struct vm_list_struct *vml; 4306fa5f80bSDavid Howells 4316fa5f80bSDavid Howells /* search the vm_start ordered list */ 4326fa5f80bSDavid Howells for (vml = mm->context.vmlist; vml; vml = vml->next) { 4336fa5f80bSDavid Howells if (vml->vma->vm_start == addr) 4346fa5f80bSDavid Howells return vml->vma; 4356fa5f80bSDavid Howells if (vml->vma->vm_start > addr) 4366fa5f80bSDavid Howells break; 4376fa5f80bSDavid Howells } 4386fa5f80bSDavid Howells 4396fa5f80bSDavid Howells return NULL; 4406fa5f80bSDavid Howells } 4416fa5f80bSDavid Howells 4426fa5f80bSDavid Howells /* 4433034097aSDavid Howells * find a VMA in the global tree 4443034097aSDavid Howells */ 4451da177e4SLinus Torvalds static inline struct vm_area_struct *find_nommu_vma(unsigned long start) 4461da177e4SLinus Torvalds { 4471da177e4SLinus Torvalds struct vm_area_struct *vma; 4481da177e4SLinus Torvalds struct rb_node *n = nommu_vma_tree.rb_node; 4491da177e4SLinus Torvalds 4501da177e4SLinus Torvalds while (n) { 4511da177e4SLinus Torvalds vma = rb_entry(n, struct vm_area_struct, vm_rb); 4521da177e4SLinus Torvalds 4531da177e4SLinus Torvalds if (start < vma->vm_start) 4541da177e4SLinus Torvalds n = n->rb_left; 4551da177e4SLinus Torvalds else if (start > vma->vm_start) 4561da177e4SLinus Torvalds n = n->rb_right; 4571da177e4SLinus Torvalds else 4581da177e4SLinus Torvalds return vma; 4591da177e4SLinus Torvalds } 4601da177e4SLinus Torvalds 4611da177e4SLinus Torvalds return NULL; 4621da177e4SLinus Torvalds } 4631da177e4SLinus Torvalds 4643034097aSDavid Howells /* 4653034097aSDavid Howells * add a VMA in the global tree 4663034097aSDavid Howells */ 4671da177e4SLinus Torvalds static void add_nommu_vma(struct vm_area_struct *vma) 4681da177e4SLinus Torvalds { 4691da177e4SLinus Torvalds struct vm_area_struct *pvma; 4701da177e4SLinus Torvalds struct address_space *mapping; 4711da177e4SLinus Torvalds struct rb_node **p = &nommu_vma_tree.rb_node; 4721da177e4SLinus Torvalds struct rb_node *parent = NULL; 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds /* add the VMA to the mapping */ 4751da177e4SLinus Torvalds if (vma->vm_file) { 4761da177e4SLinus Torvalds mapping = vma->vm_file->f_mapping; 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds flush_dcache_mmap_lock(mapping); 4791da177e4SLinus Torvalds vma_prio_tree_insert(vma, &mapping->i_mmap); 4801da177e4SLinus Torvalds flush_dcache_mmap_unlock(mapping); 4811da177e4SLinus Torvalds } 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds /* add the VMA to the master list */ 4841da177e4SLinus Torvalds while (*p) { 4851da177e4SLinus Torvalds parent = *p; 4861da177e4SLinus Torvalds pvma = rb_entry(parent, struct vm_area_struct, vm_rb); 4871da177e4SLinus Torvalds 4881da177e4SLinus Torvalds if (vma->vm_start < pvma->vm_start) { 4891da177e4SLinus Torvalds p = &(*p)->rb_left; 4901da177e4SLinus Torvalds } 4911da177e4SLinus Torvalds else if (vma->vm_start > pvma->vm_start) { 4921da177e4SLinus Torvalds p = &(*p)->rb_right; 4931da177e4SLinus Torvalds } 4941da177e4SLinus Torvalds else { 4951da177e4SLinus Torvalds /* mappings are at the same address - this can only 4961da177e4SLinus Torvalds * happen for shared-mem chardevs and shared file 4971da177e4SLinus Torvalds * mappings backed by ramfs/tmpfs */ 4981da177e4SLinus Torvalds BUG_ON(!(pvma->vm_flags & VM_SHARED)); 4991da177e4SLinus Torvalds 5001da177e4SLinus Torvalds if (vma < pvma) 5011da177e4SLinus Torvalds p = &(*p)->rb_left; 5021da177e4SLinus Torvalds else if (vma > pvma) 5031da177e4SLinus Torvalds p = &(*p)->rb_right; 5041da177e4SLinus Torvalds else 5051da177e4SLinus Torvalds BUG(); 5061da177e4SLinus Torvalds } 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds 5091da177e4SLinus Torvalds rb_link_node(&vma->vm_rb, parent, p); 5101da177e4SLinus Torvalds rb_insert_color(&vma->vm_rb, &nommu_vma_tree); 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds 5133034097aSDavid Howells /* 5143034097aSDavid Howells * delete a VMA from the global list 5153034097aSDavid Howells */ 5161da177e4SLinus Torvalds static void delete_nommu_vma(struct vm_area_struct *vma) 5171da177e4SLinus Torvalds { 5181da177e4SLinus Torvalds struct address_space *mapping; 5191da177e4SLinus Torvalds 5201da177e4SLinus Torvalds /* remove the VMA from the mapping */ 5211da177e4SLinus Torvalds if (vma->vm_file) { 5221da177e4SLinus Torvalds mapping = vma->vm_file->f_mapping; 5231da177e4SLinus Torvalds 5241da177e4SLinus Torvalds flush_dcache_mmap_lock(mapping); 5251da177e4SLinus Torvalds vma_prio_tree_remove(vma, &mapping->i_mmap); 5261da177e4SLinus Torvalds flush_dcache_mmap_unlock(mapping); 5271da177e4SLinus Torvalds } 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds /* remove from the master list */ 5301da177e4SLinus Torvalds rb_erase(&vma->vm_rb, &nommu_vma_tree); 5311da177e4SLinus Torvalds } 5321da177e4SLinus Torvalds 5331da177e4SLinus Torvalds /* 5341da177e4SLinus Torvalds * determine whether a mapping should be permitted and, if so, what sort of 5351da177e4SLinus Torvalds * mapping we're capable of supporting 5361da177e4SLinus Torvalds */ 5371da177e4SLinus Torvalds static int validate_mmap_request(struct file *file, 5381da177e4SLinus Torvalds unsigned long addr, 5391da177e4SLinus Torvalds unsigned long len, 5401da177e4SLinus Torvalds unsigned long prot, 5411da177e4SLinus Torvalds unsigned long flags, 5421da177e4SLinus Torvalds unsigned long pgoff, 5431da177e4SLinus Torvalds unsigned long *_capabilities) 5441da177e4SLinus Torvalds { 5451da177e4SLinus Torvalds unsigned long capabilities; 5461da177e4SLinus Torvalds unsigned long reqprot = prot; 5471da177e4SLinus Torvalds int ret; 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds /* do the simple checks first */ 5501da177e4SLinus Torvalds if (flags & MAP_FIXED || addr) { 5511da177e4SLinus Torvalds printk(KERN_DEBUG 5521da177e4SLinus Torvalds "%d: Can't do fixed-address/overlay mmap of RAM\n", 5531da177e4SLinus Torvalds current->pid); 5541da177e4SLinus Torvalds return -EINVAL; 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds if ((flags & MAP_TYPE) != MAP_PRIVATE && 5581da177e4SLinus Torvalds (flags & MAP_TYPE) != MAP_SHARED) 5591da177e4SLinus Torvalds return -EINVAL; 5601da177e4SLinus Torvalds 561f81cff0dSMike Frysinger if (!len) 5621da177e4SLinus Torvalds return -EINVAL; 5631da177e4SLinus Torvalds 564f81cff0dSMike Frysinger /* Careful about overflows.. */ 565f81cff0dSMike Frysinger len = PAGE_ALIGN(len); 566f81cff0dSMike Frysinger if (!len || len > TASK_SIZE) 567f81cff0dSMike Frysinger return -ENOMEM; 568f81cff0dSMike Frysinger 5691da177e4SLinus Torvalds /* offset overflow? */ 5701da177e4SLinus Torvalds if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 571f81cff0dSMike Frysinger return -EOVERFLOW; 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds if (file) { 5741da177e4SLinus Torvalds /* validate file mapping requests */ 5751da177e4SLinus Torvalds struct address_space *mapping; 5761da177e4SLinus Torvalds 5771da177e4SLinus Torvalds /* files must support mmap */ 5781da177e4SLinus Torvalds if (!file->f_op || !file->f_op->mmap) 5791da177e4SLinus Torvalds return -ENODEV; 5801da177e4SLinus Torvalds 5811da177e4SLinus Torvalds /* work out if what we've got could possibly be shared 5821da177e4SLinus Torvalds * - we support chardevs that provide their own "memory" 5831da177e4SLinus Torvalds * - we support files/blockdevs that are memory backed 5841da177e4SLinus Torvalds */ 5851da177e4SLinus Torvalds mapping = file->f_mapping; 5861da177e4SLinus Torvalds if (!mapping) 587e9536ae7SJosef Sipek mapping = file->f_path.dentry->d_inode->i_mapping; 5881da177e4SLinus Torvalds 5891da177e4SLinus Torvalds capabilities = 0; 5901da177e4SLinus Torvalds if (mapping && mapping->backing_dev_info) 5911da177e4SLinus Torvalds capabilities = mapping->backing_dev_info->capabilities; 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds if (!capabilities) { 5941da177e4SLinus Torvalds /* no explicit capabilities set, so assume some 5951da177e4SLinus Torvalds * defaults */ 596e9536ae7SJosef Sipek switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { 5971da177e4SLinus Torvalds case S_IFREG: 5981da177e4SLinus Torvalds case S_IFBLK: 5991da177e4SLinus Torvalds capabilities = BDI_CAP_MAP_COPY; 6001da177e4SLinus Torvalds break; 6011da177e4SLinus Torvalds 6021da177e4SLinus Torvalds case S_IFCHR: 6031da177e4SLinus Torvalds capabilities = 6041da177e4SLinus Torvalds BDI_CAP_MAP_DIRECT | 6051da177e4SLinus Torvalds BDI_CAP_READ_MAP | 6061da177e4SLinus Torvalds BDI_CAP_WRITE_MAP; 6071da177e4SLinus Torvalds break; 6081da177e4SLinus Torvalds 6091da177e4SLinus Torvalds default: 6101da177e4SLinus Torvalds return -EINVAL; 6111da177e4SLinus Torvalds } 6121da177e4SLinus Torvalds } 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds /* eliminate any capabilities that we can't support on this 6151da177e4SLinus Torvalds * device */ 6161da177e4SLinus Torvalds if (!file->f_op->get_unmapped_area) 6171da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 6181da177e4SLinus Torvalds if (!file->f_op->read) 6191da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_COPY; 6201da177e4SLinus Torvalds 6211da177e4SLinus Torvalds if (flags & MAP_SHARED) { 6221da177e4SLinus Torvalds /* do checks for writing, appending and locking */ 6231da177e4SLinus Torvalds if ((prot & PROT_WRITE) && 6241da177e4SLinus Torvalds !(file->f_mode & FMODE_WRITE)) 6251da177e4SLinus Torvalds return -EACCES; 6261da177e4SLinus Torvalds 627e9536ae7SJosef Sipek if (IS_APPEND(file->f_path.dentry->d_inode) && 6281da177e4SLinus Torvalds (file->f_mode & FMODE_WRITE)) 6291da177e4SLinus Torvalds return -EACCES; 6301da177e4SLinus Torvalds 631e9536ae7SJosef Sipek if (locks_verify_locked(file->f_path.dentry->d_inode)) 6321da177e4SLinus Torvalds return -EAGAIN; 6331da177e4SLinus Torvalds 6341da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_DIRECT)) 6351da177e4SLinus Torvalds return -ENODEV; 6361da177e4SLinus Torvalds 6371da177e4SLinus Torvalds if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || 6381da177e4SLinus Torvalds ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || 6391da177e4SLinus Torvalds ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) 6401da177e4SLinus Torvalds ) { 6411da177e4SLinus Torvalds printk("MAP_SHARED not completely supported on !MMU\n"); 6421da177e4SLinus Torvalds return -EINVAL; 6431da177e4SLinus Torvalds } 6441da177e4SLinus Torvalds 6451da177e4SLinus Torvalds /* we mustn't privatise shared mappings */ 6461da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_COPY; 6471da177e4SLinus Torvalds } 6481da177e4SLinus Torvalds else { 6491da177e4SLinus Torvalds /* we're going to read the file into private memory we 6501da177e4SLinus Torvalds * allocate */ 6511da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_COPY)) 6521da177e4SLinus Torvalds return -ENODEV; 6531da177e4SLinus Torvalds 6541da177e4SLinus Torvalds /* we don't permit a private writable mapping to be 6551da177e4SLinus Torvalds * shared with the backing device */ 6561da177e4SLinus Torvalds if (prot & PROT_WRITE) 6571da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 6581da177e4SLinus Torvalds } 6591da177e4SLinus Torvalds 6601da177e4SLinus Torvalds /* handle executable mappings and implied executable 6611da177e4SLinus Torvalds * mappings */ 662e9536ae7SJosef Sipek if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { 6631da177e4SLinus Torvalds if (prot & PROT_EXEC) 6641da177e4SLinus Torvalds return -EPERM; 6651da177e4SLinus Torvalds } 6661da177e4SLinus Torvalds else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { 6671da177e4SLinus Torvalds /* handle implication of PROT_EXEC by PROT_READ */ 6681da177e4SLinus Torvalds if (current->personality & READ_IMPLIES_EXEC) { 6691da177e4SLinus Torvalds if (capabilities & BDI_CAP_EXEC_MAP) 6701da177e4SLinus Torvalds prot |= PROT_EXEC; 6711da177e4SLinus Torvalds } 6721da177e4SLinus Torvalds } 6731da177e4SLinus Torvalds else if ((prot & PROT_READ) && 6741da177e4SLinus Torvalds (prot & PROT_EXEC) && 6751da177e4SLinus Torvalds !(capabilities & BDI_CAP_EXEC_MAP) 6761da177e4SLinus Torvalds ) { 6771da177e4SLinus Torvalds /* backing file is not executable, try to copy */ 6781da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 6791da177e4SLinus Torvalds } 6801da177e4SLinus Torvalds } 6811da177e4SLinus Torvalds else { 6821da177e4SLinus Torvalds /* anonymous mappings are always memory backed and can be 6831da177e4SLinus Torvalds * privately mapped 6841da177e4SLinus Torvalds */ 6851da177e4SLinus Torvalds capabilities = BDI_CAP_MAP_COPY; 6861da177e4SLinus Torvalds 6871da177e4SLinus Torvalds /* handle PROT_EXEC implication by PROT_READ */ 6881da177e4SLinus Torvalds if ((prot & PROT_READ) && 6891da177e4SLinus Torvalds (current->personality & READ_IMPLIES_EXEC)) 6901da177e4SLinus Torvalds prot |= PROT_EXEC; 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds 6931da177e4SLinus Torvalds /* allow the security API to have its say */ 694ed032189SEric Paris ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); 6951da177e4SLinus Torvalds if (ret < 0) 6961da177e4SLinus Torvalds return ret; 6971da177e4SLinus Torvalds 6981da177e4SLinus Torvalds /* looks okay */ 6991da177e4SLinus Torvalds *_capabilities = capabilities; 7001da177e4SLinus Torvalds return 0; 7011da177e4SLinus Torvalds } 7021da177e4SLinus Torvalds 7031da177e4SLinus Torvalds /* 7041da177e4SLinus Torvalds * we've determined that we can make the mapping, now translate what we 7051da177e4SLinus Torvalds * now know into VMA flags 7061da177e4SLinus Torvalds */ 7071da177e4SLinus Torvalds static unsigned long determine_vm_flags(struct file *file, 7081da177e4SLinus Torvalds unsigned long prot, 7091da177e4SLinus Torvalds unsigned long flags, 7101da177e4SLinus Torvalds unsigned long capabilities) 7111da177e4SLinus Torvalds { 7121da177e4SLinus Torvalds unsigned long vm_flags; 7131da177e4SLinus Torvalds 7141da177e4SLinus Torvalds vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); 7151da177e4SLinus Torvalds vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 7161da177e4SLinus Torvalds /* vm_flags |= mm->def_flags; */ 7171da177e4SLinus Torvalds 7181da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_DIRECT)) { 7191da177e4SLinus Torvalds /* attempt to share read-only copies of mapped file chunks */ 7201da177e4SLinus Torvalds if (file && !(prot & PROT_WRITE)) 7211da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE; 7221da177e4SLinus Torvalds } 7231da177e4SLinus Torvalds else { 7241da177e4SLinus Torvalds /* overlay a shareable mapping on the backing device or inode 7251da177e4SLinus Torvalds * if possible - used for chardevs, ramfs/tmpfs/shmfs and 7261da177e4SLinus Torvalds * romfs/cramfs */ 7271da177e4SLinus Torvalds if (flags & MAP_SHARED) 7281da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE | VM_SHARED; 7291da177e4SLinus Torvalds else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0) 7301da177e4SLinus Torvalds vm_flags |= VM_MAYSHARE; 7311da177e4SLinus Torvalds } 7321da177e4SLinus Torvalds 7331da177e4SLinus Torvalds /* refuse to let anyone share private mappings with this process if 7341da177e4SLinus Torvalds * it's being traced - otherwise breakpoints set in it may interfere 7351da177e4SLinus Torvalds * with another untraced process 7361da177e4SLinus Torvalds */ 7371da177e4SLinus Torvalds if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED)) 7381da177e4SLinus Torvalds vm_flags &= ~VM_MAYSHARE; 7391da177e4SLinus Torvalds 7401da177e4SLinus Torvalds return vm_flags; 7411da177e4SLinus Torvalds } 7421da177e4SLinus Torvalds 7431da177e4SLinus Torvalds /* 7441da177e4SLinus Torvalds * set up a shared mapping on a file 7451da177e4SLinus Torvalds */ 7461da177e4SLinus Torvalds static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) 7471da177e4SLinus Torvalds { 7481da177e4SLinus Torvalds int ret; 7491da177e4SLinus Torvalds 7501da177e4SLinus Torvalds ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 7511da177e4SLinus Torvalds if (ret != -ENOSYS) 7521da177e4SLinus Torvalds return ret; 7531da177e4SLinus Torvalds 7541da177e4SLinus Torvalds /* getting an ENOSYS error indicates that direct mmap isn't 7551da177e4SLinus Torvalds * possible (as opposed to tried but failed) so we'll fall 7561da177e4SLinus Torvalds * through to making a private copy of the data and mapping 7571da177e4SLinus Torvalds * that if we can */ 7581da177e4SLinus Torvalds return -ENODEV; 7591da177e4SLinus Torvalds } 7601da177e4SLinus Torvalds 7611da177e4SLinus Torvalds /* 7621da177e4SLinus Torvalds * set up a private mapping or an anonymous shared mapping 7631da177e4SLinus Torvalds */ 7641da177e4SLinus Torvalds static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) 7651da177e4SLinus Torvalds { 7661da177e4SLinus Torvalds void *base; 7671da177e4SLinus Torvalds int ret; 7681da177e4SLinus Torvalds 7691da177e4SLinus Torvalds /* invoke the file's mapping function so that it can keep track of 7701da177e4SLinus Torvalds * shared mappings on devices or memory 7711da177e4SLinus Torvalds * - VM_MAYSHARE will be set if it may attempt to share 7721da177e4SLinus Torvalds */ 7731da177e4SLinus Torvalds if (vma->vm_file) { 7741da177e4SLinus Torvalds ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 7751da177e4SLinus Torvalds if (ret != -ENOSYS) { 7761da177e4SLinus Torvalds /* shouldn't return success if we're not sharing */ 7771da177e4SLinus Torvalds BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); 7781da177e4SLinus Torvalds return ret; /* success or a real error */ 7791da177e4SLinus Torvalds } 7801da177e4SLinus Torvalds 7811da177e4SLinus Torvalds /* getting an ENOSYS error indicates that direct mmap isn't 7821da177e4SLinus Torvalds * possible (as opposed to tried but failed) so we'll try to 7831da177e4SLinus Torvalds * make a private copy of the data and map that instead */ 7841da177e4SLinus Torvalds } 7851da177e4SLinus Torvalds 7861da177e4SLinus Torvalds /* allocate some memory to hold the mapping 7871da177e4SLinus Torvalds * - note that this may not return a page-aligned address if the object 7881da177e4SLinus Torvalds * we're allocating is smaller than a page 7891da177e4SLinus Torvalds */ 79084097518SNick Piggin base = kmalloc(len, GFP_KERNEL|__GFP_COMP); 7911da177e4SLinus Torvalds if (!base) 7921da177e4SLinus Torvalds goto enomem; 7931da177e4SLinus Torvalds 7941da177e4SLinus Torvalds vma->vm_start = (unsigned long) base; 7951da177e4SLinus Torvalds vma->vm_end = vma->vm_start + len; 7961da177e4SLinus Torvalds vma->vm_flags |= VM_MAPPED_COPY; 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds #ifdef WARN_ON_SLACK 7991da177e4SLinus Torvalds if (len + WARN_ON_SLACK <= kobjsize(result)) 8001da177e4SLinus Torvalds printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", 8011da177e4SLinus Torvalds len, current->pid, kobjsize(result) - len); 8021da177e4SLinus Torvalds #endif 8031da177e4SLinus Torvalds 8041da177e4SLinus Torvalds if (vma->vm_file) { 8051da177e4SLinus Torvalds /* read the contents of a file into the copy */ 8061da177e4SLinus Torvalds mm_segment_t old_fs; 8071da177e4SLinus Torvalds loff_t fpos; 8081da177e4SLinus Torvalds 8091da177e4SLinus Torvalds fpos = vma->vm_pgoff; 8101da177e4SLinus Torvalds fpos <<= PAGE_SHIFT; 8111da177e4SLinus Torvalds 8121da177e4SLinus Torvalds old_fs = get_fs(); 8131da177e4SLinus Torvalds set_fs(KERNEL_DS); 8141da177e4SLinus Torvalds ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); 8151da177e4SLinus Torvalds set_fs(old_fs); 8161da177e4SLinus Torvalds 8171da177e4SLinus Torvalds if (ret < 0) 8181da177e4SLinus Torvalds goto error_free; 8191da177e4SLinus Torvalds 8201da177e4SLinus Torvalds /* clear the last little bit */ 8211da177e4SLinus Torvalds if (ret < len) 8221da177e4SLinus Torvalds memset(base + ret, 0, len - ret); 8231da177e4SLinus Torvalds 8241da177e4SLinus Torvalds } else { 8251da177e4SLinus Torvalds /* if it's an anonymous mapping, then just clear it */ 8261da177e4SLinus Torvalds memset(base, 0, len); 8271da177e4SLinus Torvalds } 8281da177e4SLinus Torvalds 8291da177e4SLinus Torvalds return 0; 8301da177e4SLinus Torvalds 8311da177e4SLinus Torvalds error_free: 8321da177e4SLinus Torvalds kfree(base); 8331da177e4SLinus Torvalds vma->vm_start = 0; 8341da177e4SLinus Torvalds return ret; 8351da177e4SLinus Torvalds 8361da177e4SLinus Torvalds enomem: 8371da177e4SLinus Torvalds printk("Allocation of length %lu from process %d failed\n", 8381da177e4SLinus Torvalds len, current->pid); 8391da177e4SLinus Torvalds show_free_areas(); 8401da177e4SLinus Torvalds return -ENOMEM; 8411da177e4SLinus Torvalds } 8421da177e4SLinus Torvalds 8431da177e4SLinus Torvalds /* 8441da177e4SLinus Torvalds * handle mapping creation for uClinux 8451da177e4SLinus Torvalds */ 8461da177e4SLinus Torvalds unsigned long do_mmap_pgoff(struct file *file, 8471da177e4SLinus Torvalds unsigned long addr, 8481da177e4SLinus Torvalds unsigned long len, 8491da177e4SLinus Torvalds unsigned long prot, 8501da177e4SLinus Torvalds unsigned long flags, 8511da177e4SLinus Torvalds unsigned long pgoff) 8521da177e4SLinus Torvalds { 8531da177e4SLinus Torvalds struct vm_list_struct *vml = NULL; 8541da177e4SLinus Torvalds struct vm_area_struct *vma = NULL; 8551da177e4SLinus Torvalds struct rb_node *rb; 8561da177e4SLinus Torvalds unsigned long capabilities, vm_flags; 8571da177e4SLinus Torvalds void *result; 8581da177e4SLinus Torvalds int ret; 8591da177e4SLinus Torvalds 8607cd94146SEric Paris if (!(flags & MAP_FIXED)) 8617cd94146SEric Paris addr = round_hint_to_min(addr); 8627cd94146SEric Paris 8631da177e4SLinus Torvalds /* decide whether we should attempt the mapping, and if so what sort of 8641da177e4SLinus Torvalds * mapping */ 8651da177e4SLinus Torvalds ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, 8661da177e4SLinus Torvalds &capabilities); 8671da177e4SLinus Torvalds if (ret < 0) 8681da177e4SLinus Torvalds return ret; 8691da177e4SLinus Torvalds 8701da177e4SLinus Torvalds /* we've determined that we can make the mapping, now translate what we 8711da177e4SLinus Torvalds * now know into VMA flags */ 8721da177e4SLinus Torvalds vm_flags = determine_vm_flags(file, prot, flags, capabilities); 8731da177e4SLinus Torvalds 8741da177e4SLinus Torvalds /* we're going to need to record the mapping if it works */ 8754668edc3SBurman Yan vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); 8761da177e4SLinus Torvalds if (!vml) 8771da177e4SLinus Torvalds goto error_getting_vml; 8781da177e4SLinus Torvalds 8791da177e4SLinus Torvalds down_write(&nommu_vma_sem); 8801da177e4SLinus Torvalds 8811da177e4SLinus Torvalds /* if we want to share, we need to check for VMAs created by other 8821da177e4SLinus Torvalds * mmap() calls that overlap with our proposed mapping 8831da177e4SLinus Torvalds * - we can only share with an exact match on most regular files 8841da177e4SLinus Torvalds * - shared mappings on character devices and memory backed files are 8851da177e4SLinus Torvalds * permitted to overlap inexactly as far as we are concerned for in 8861da177e4SLinus Torvalds * these cases, sharing is handled in the driver or filesystem rather 8871da177e4SLinus Torvalds * than here 8881da177e4SLinus Torvalds */ 8891da177e4SLinus Torvalds if (vm_flags & VM_MAYSHARE) { 8901da177e4SLinus Torvalds unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 8911da177e4SLinus Torvalds unsigned long vmpglen; 8921da177e4SLinus Torvalds 893165b2392SDavid Howells /* suppress VMA sharing for shared regions */ 894165b2392SDavid Howells if (vm_flags & VM_SHARED && 895165b2392SDavid Howells capabilities & BDI_CAP_MAP_DIRECT) 896165b2392SDavid Howells goto dont_share_VMAs; 897165b2392SDavid Howells 8981da177e4SLinus Torvalds for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 8991da177e4SLinus Torvalds vma = rb_entry(rb, struct vm_area_struct, vm_rb); 9001da177e4SLinus Torvalds 9011da177e4SLinus Torvalds if (!(vma->vm_flags & VM_MAYSHARE)) 9021da177e4SLinus Torvalds continue; 9031da177e4SLinus Torvalds 9041da177e4SLinus Torvalds /* search for overlapping mappings on the same file */ 905e9536ae7SJosef Sipek if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) 9061da177e4SLinus Torvalds continue; 9071da177e4SLinus Torvalds 9081da177e4SLinus Torvalds if (vma->vm_pgoff >= pgoff + pglen) 9091da177e4SLinus Torvalds continue; 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; 9121da177e4SLinus Torvalds vmpglen >>= PAGE_SHIFT; 9131da177e4SLinus Torvalds if (pgoff >= vma->vm_pgoff + vmpglen) 9141da177e4SLinus Torvalds continue; 9151da177e4SLinus Torvalds 9161da177e4SLinus Torvalds /* handle inexactly overlapping matches between mappings */ 9171da177e4SLinus Torvalds if (vma->vm_pgoff != pgoff || vmpglen != pglen) { 9181da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_DIRECT)) 9191da177e4SLinus Torvalds goto sharing_violation; 9201da177e4SLinus Torvalds continue; 9211da177e4SLinus Torvalds } 9221da177e4SLinus Torvalds 9231da177e4SLinus Torvalds /* we've found a VMA we can share */ 9241da177e4SLinus Torvalds atomic_inc(&vma->vm_usage); 9251da177e4SLinus Torvalds 9261da177e4SLinus Torvalds vml->vma = vma; 9271da177e4SLinus Torvalds result = (void *) vma->vm_start; 9281da177e4SLinus Torvalds goto shared; 9291da177e4SLinus Torvalds } 9301da177e4SLinus Torvalds 931165b2392SDavid Howells dont_share_VMAs: 9321da177e4SLinus Torvalds vma = NULL; 9331da177e4SLinus Torvalds 9341da177e4SLinus Torvalds /* obtain the address at which to make a shared mapping 9351da177e4SLinus Torvalds * - this is the hook for quasi-memory character devices to 9361da177e4SLinus Torvalds * tell us the location of a shared mapping 9371da177e4SLinus Torvalds */ 9381da177e4SLinus Torvalds if (file && file->f_op->get_unmapped_area) { 9391da177e4SLinus Torvalds addr = file->f_op->get_unmapped_area(file, addr, len, 9401da177e4SLinus Torvalds pgoff, flags); 9411da177e4SLinus Torvalds if (IS_ERR((void *) addr)) { 9421da177e4SLinus Torvalds ret = addr; 9431da177e4SLinus Torvalds if (ret != (unsigned long) -ENOSYS) 9441da177e4SLinus Torvalds goto error; 9451da177e4SLinus Torvalds 9461da177e4SLinus Torvalds /* the driver refused to tell us where to site 9471da177e4SLinus Torvalds * the mapping so we'll have to attempt to copy 9481da177e4SLinus Torvalds * it */ 9491da177e4SLinus Torvalds ret = (unsigned long) -ENODEV; 9501da177e4SLinus Torvalds if (!(capabilities & BDI_CAP_MAP_COPY)) 9511da177e4SLinus Torvalds goto error; 9521da177e4SLinus Torvalds 9531da177e4SLinus Torvalds capabilities &= ~BDI_CAP_MAP_DIRECT; 9541da177e4SLinus Torvalds } 9551da177e4SLinus Torvalds } 9561da177e4SLinus Torvalds } 9571da177e4SLinus Torvalds 9581da177e4SLinus Torvalds /* we're going to need a VMA struct as well */ 9594668edc3SBurman Yan vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); 9601da177e4SLinus Torvalds if (!vma) 9611da177e4SLinus Torvalds goto error_getting_vma; 9621da177e4SLinus Torvalds 9631da177e4SLinus Torvalds INIT_LIST_HEAD(&vma->anon_vma_node); 9641da177e4SLinus Torvalds atomic_set(&vma->vm_usage, 1); 9651da177e4SLinus Torvalds if (file) 9661da177e4SLinus Torvalds get_file(file); 9671da177e4SLinus Torvalds vma->vm_file = file; 9681da177e4SLinus Torvalds vma->vm_flags = vm_flags; 9691da177e4SLinus Torvalds vma->vm_start = addr; 9701da177e4SLinus Torvalds vma->vm_end = addr + len; 9711da177e4SLinus Torvalds vma->vm_pgoff = pgoff; 9721da177e4SLinus Torvalds 9731da177e4SLinus Torvalds vml->vma = vma; 9741da177e4SLinus Torvalds 9751da177e4SLinus Torvalds /* set up the mapping */ 9761da177e4SLinus Torvalds if (file && vma->vm_flags & VM_SHARED) 9771da177e4SLinus Torvalds ret = do_mmap_shared_file(vma, len); 9781da177e4SLinus Torvalds else 9791da177e4SLinus Torvalds ret = do_mmap_private(vma, len); 9801da177e4SLinus Torvalds if (ret < 0) 9811da177e4SLinus Torvalds goto error; 9821da177e4SLinus Torvalds 9831da177e4SLinus Torvalds /* okay... we have a mapping; now we have to register it */ 9841da177e4SLinus Torvalds result = (void *) vma->vm_start; 9851da177e4SLinus Torvalds 9861da177e4SLinus Torvalds if (vma->vm_flags & VM_MAPPED_COPY) { 9871da177e4SLinus Torvalds realalloc += kobjsize(result); 9881da177e4SLinus Torvalds askedalloc += len; 9891da177e4SLinus Torvalds } 9901da177e4SLinus Torvalds 9911da177e4SLinus Torvalds realalloc += kobjsize(vma); 9921da177e4SLinus Torvalds askedalloc += sizeof(*vma); 9931da177e4SLinus Torvalds 9941da177e4SLinus Torvalds current->mm->total_vm += len >> PAGE_SHIFT; 9951da177e4SLinus Torvalds 9961da177e4SLinus Torvalds add_nommu_vma(vma); 9971da177e4SLinus Torvalds 9981da177e4SLinus Torvalds shared: 9991da177e4SLinus Torvalds realalloc += kobjsize(vml); 10001da177e4SLinus Torvalds askedalloc += sizeof(*vml); 10011da177e4SLinus Torvalds 10023034097aSDavid Howells add_vma_to_mm(current->mm, vml); 10031da177e4SLinus Torvalds 10041da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10051da177e4SLinus Torvalds 10061da177e4SLinus Torvalds if (prot & PROT_EXEC) 10071da177e4SLinus Torvalds flush_icache_range((unsigned long) result, 10081da177e4SLinus Torvalds (unsigned long) result + len); 10091da177e4SLinus Torvalds 10101da177e4SLinus Torvalds #ifdef DEBUG 10111da177e4SLinus Torvalds printk("do_mmap:\n"); 10121da177e4SLinus Torvalds show_process_blocks(); 10131da177e4SLinus Torvalds #endif 10141da177e4SLinus Torvalds 10151da177e4SLinus Torvalds return (unsigned long) result; 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds error: 10181da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10191da177e4SLinus Torvalds kfree(vml); 10201da177e4SLinus Torvalds if (vma) { 10213fcd03e0SGavin Lambert if (vma->vm_file) 10221da177e4SLinus Torvalds fput(vma->vm_file); 10231da177e4SLinus Torvalds kfree(vma); 10241da177e4SLinus Torvalds } 10251da177e4SLinus Torvalds return ret; 10261da177e4SLinus Torvalds 10271da177e4SLinus Torvalds sharing_violation: 10281da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10291da177e4SLinus Torvalds printk("Attempt to share mismatched mappings\n"); 10301da177e4SLinus Torvalds kfree(vml); 10311da177e4SLinus Torvalds return -EINVAL; 10321da177e4SLinus Torvalds 10331da177e4SLinus Torvalds error_getting_vma: 10341da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10351da177e4SLinus Torvalds kfree(vml); 103666aa2b4bSGreg Ungerer printk("Allocation of vma for %lu byte allocation from process %d failed\n", 10371da177e4SLinus Torvalds len, current->pid); 10381da177e4SLinus Torvalds show_free_areas(); 10391da177e4SLinus Torvalds return -ENOMEM; 10401da177e4SLinus Torvalds 10411da177e4SLinus Torvalds error_getting_vml: 10421da177e4SLinus Torvalds printk("Allocation of vml for %lu byte allocation from process %d failed\n", 10431da177e4SLinus Torvalds len, current->pid); 10441da177e4SLinus Torvalds show_free_areas(); 10451da177e4SLinus Torvalds return -ENOMEM; 10461da177e4SLinus Torvalds } 1047b5073173SPaul Mundt EXPORT_SYMBOL(do_mmap_pgoff); 10481da177e4SLinus Torvalds 10491da177e4SLinus Torvalds /* 10501da177e4SLinus Torvalds * handle mapping disposal for uClinux 10511da177e4SLinus Torvalds */ 10521da177e4SLinus Torvalds static void put_vma(struct vm_area_struct *vma) 10531da177e4SLinus Torvalds { 10541da177e4SLinus Torvalds if (vma) { 10551da177e4SLinus Torvalds down_write(&nommu_vma_sem); 10561da177e4SLinus Torvalds 10571da177e4SLinus Torvalds if (atomic_dec_and_test(&vma->vm_usage)) { 10581da177e4SLinus Torvalds delete_nommu_vma(vma); 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds if (vma->vm_ops && vma->vm_ops->close) 10611da177e4SLinus Torvalds vma->vm_ops->close(vma); 10621da177e4SLinus Torvalds 10631da177e4SLinus Torvalds /* IO memory and memory shared directly out of the pagecache from 10641da177e4SLinus Torvalds * ramfs/tmpfs mustn't be released here */ 10651da177e4SLinus Torvalds if (vma->vm_flags & VM_MAPPED_COPY) { 10661da177e4SLinus Torvalds realalloc -= kobjsize((void *) vma->vm_start); 10671da177e4SLinus Torvalds askedalloc -= vma->vm_end - vma->vm_start; 10681da177e4SLinus Torvalds kfree((void *) vma->vm_start); 10691da177e4SLinus Torvalds } 10701da177e4SLinus Torvalds 10711da177e4SLinus Torvalds realalloc -= kobjsize(vma); 10721da177e4SLinus Torvalds askedalloc -= sizeof(*vma); 10731da177e4SLinus Torvalds 10741da177e4SLinus Torvalds if (vma->vm_file) 10751da177e4SLinus Torvalds fput(vma->vm_file); 10761da177e4SLinus Torvalds kfree(vma); 10771da177e4SLinus Torvalds } 10781da177e4SLinus Torvalds 10791da177e4SLinus Torvalds up_write(&nommu_vma_sem); 10801da177e4SLinus Torvalds } 10811da177e4SLinus Torvalds } 10821da177e4SLinus Torvalds 10833034097aSDavid Howells /* 10843034097aSDavid Howells * release a mapping 10853034097aSDavid Howells * - under NOMMU conditions the parameters must match exactly to the mapping to 10863034097aSDavid Howells * be removed 10873034097aSDavid Howells */ 10881da177e4SLinus Torvalds int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) 10891da177e4SLinus Torvalds { 10901da177e4SLinus Torvalds struct vm_list_struct *vml, **parent; 10911da177e4SLinus Torvalds unsigned long end = addr + len; 10921da177e4SLinus Torvalds 10931da177e4SLinus Torvalds #ifdef DEBUG 10941da177e4SLinus Torvalds printk("do_munmap:\n"); 10951da177e4SLinus Torvalds #endif 10961da177e4SLinus Torvalds 10973034097aSDavid Howells for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { 10983034097aSDavid Howells if ((*parent)->vma->vm_start > addr) 10993034097aSDavid Howells break; 11001da177e4SLinus Torvalds if ((*parent)->vma->vm_start == addr && 110166aa2b4bSGreg Ungerer ((len == 0) || ((*parent)->vma->vm_end == end))) 11021da177e4SLinus Torvalds goto found; 11033034097aSDavid Howells } 11041da177e4SLinus Torvalds 11051da177e4SLinus Torvalds printk("munmap of non-mmaped memory by process %d (%s): %p\n", 11061da177e4SLinus Torvalds current->pid, current->comm, (void *) addr); 11071da177e4SLinus Torvalds return -EINVAL; 11081da177e4SLinus Torvalds 11091da177e4SLinus Torvalds found: 11101da177e4SLinus Torvalds vml = *parent; 11111da177e4SLinus Torvalds 11121da177e4SLinus Torvalds put_vma(vml->vma); 11131da177e4SLinus Torvalds 11141da177e4SLinus Torvalds *parent = vml->next; 11151da177e4SLinus Torvalds realalloc -= kobjsize(vml); 11161da177e4SLinus Torvalds askedalloc -= sizeof(*vml); 11171da177e4SLinus Torvalds kfree(vml); 1118365e9c87SHugh Dickins 1119365e9c87SHugh Dickins update_hiwater_vm(mm); 11201da177e4SLinus Torvalds mm->total_vm -= len >> PAGE_SHIFT; 11211da177e4SLinus Torvalds 11221da177e4SLinus Torvalds #ifdef DEBUG 11231da177e4SLinus Torvalds show_process_blocks(); 11241da177e4SLinus Torvalds #endif 11251da177e4SLinus Torvalds 11261da177e4SLinus Torvalds return 0; 11271da177e4SLinus Torvalds } 1128b5073173SPaul Mundt EXPORT_SYMBOL(do_munmap); 11291da177e4SLinus Torvalds 11303034097aSDavid Howells asmlinkage long sys_munmap(unsigned long addr, size_t len) 11313034097aSDavid Howells { 11323034097aSDavid Howells int ret; 11333034097aSDavid Howells struct mm_struct *mm = current->mm; 11343034097aSDavid Howells 11353034097aSDavid Howells down_write(&mm->mmap_sem); 11363034097aSDavid Howells ret = do_munmap(mm, addr, len); 11373034097aSDavid Howells up_write(&mm->mmap_sem); 11383034097aSDavid Howells return ret; 11393034097aSDavid Howells } 11403034097aSDavid Howells 11413034097aSDavid Howells /* 11423034097aSDavid Howells * Release all mappings 11433034097aSDavid Howells */ 11441da177e4SLinus Torvalds void exit_mmap(struct mm_struct * mm) 11451da177e4SLinus Torvalds { 11461da177e4SLinus Torvalds struct vm_list_struct *tmp; 11471da177e4SLinus Torvalds 11481da177e4SLinus Torvalds if (mm) { 11491da177e4SLinus Torvalds #ifdef DEBUG 11501da177e4SLinus Torvalds printk("Exit_mmap:\n"); 11511da177e4SLinus Torvalds #endif 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds mm->total_vm = 0; 11541da177e4SLinus Torvalds 11551da177e4SLinus Torvalds while ((tmp = mm->context.vmlist)) { 11561da177e4SLinus Torvalds mm->context.vmlist = tmp->next; 11571da177e4SLinus Torvalds put_vma(tmp->vma); 11581da177e4SLinus Torvalds 11591da177e4SLinus Torvalds realalloc -= kobjsize(tmp); 11601da177e4SLinus Torvalds askedalloc -= sizeof(*tmp); 11611da177e4SLinus Torvalds kfree(tmp); 11621da177e4SLinus Torvalds } 11631da177e4SLinus Torvalds 11641da177e4SLinus Torvalds #ifdef DEBUG 11651da177e4SLinus Torvalds show_process_blocks(); 11661da177e4SLinus Torvalds #endif 11671da177e4SLinus Torvalds } 11681da177e4SLinus Torvalds } 11691da177e4SLinus Torvalds 11701da177e4SLinus Torvalds unsigned long do_brk(unsigned long addr, unsigned long len) 11711da177e4SLinus Torvalds { 11721da177e4SLinus Torvalds return -ENOMEM; 11731da177e4SLinus Torvalds } 11741da177e4SLinus Torvalds 11751da177e4SLinus Torvalds /* 11766fa5f80bSDavid Howells * expand (or shrink) an existing mapping, potentially moving it at the same 11776fa5f80bSDavid Howells * time (controlled by the MREMAP_MAYMOVE flag and available VM space) 11781da177e4SLinus Torvalds * 11796fa5f80bSDavid Howells * under NOMMU conditions, we only permit changing a mapping's size, and only 11806fa5f80bSDavid Howells * as long as it stays within the hole allocated by the kmalloc() call in 11816fa5f80bSDavid Howells * do_mmap_pgoff() and the block is not shareable 11821da177e4SLinus Torvalds * 11836fa5f80bSDavid Howells * MREMAP_FIXED is not supported under NOMMU conditions 11841da177e4SLinus Torvalds */ 11851da177e4SLinus Torvalds unsigned long do_mremap(unsigned long addr, 11861da177e4SLinus Torvalds unsigned long old_len, unsigned long new_len, 11871da177e4SLinus Torvalds unsigned long flags, unsigned long new_addr) 11881da177e4SLinus Torvalds { 11896fa5f80bSDavid Howells struct vm_area_struct *vma; 11901da177e4SLinus Torvalds 11911da177e4SLinus Torvalds /* insanity checks first */ 11921da177e4SLinus Torvalds if (new_len == 0) 11931da177e4SLinus Torvalds return (unsigned long) -EINVAL; 11941da177e4SLinus Torvalds 11951da177e4SLinus Torvalds if (flags & MREMAP_FIXED && new_addr != addr) 11961da177e4SLinus Torvalds return (unsigned long) -EINVAL; 11971da177e4SLinus Torvalds 11986fa5f80bSDavid Howells vma = find_vma_exact(current->mm, addr); 11996fa5f80bSDavid Howells if (!vma) 12001da177e4SLinus Torvalds return (unsigned long) -EINVAL; 12011da177e4SLinus Torvalds 12026fa5f80bSDavid Howells if (vma->vm_end != vma->vm_start + old_len) 12031da177e4SLinus Torvalds return (unsigned long) -EFAULT; 12041da177e4SLinus Torvalds 12056fa5f80bSDavid Howells if (vma->vm_flags & VM_MAYSHARE) 12061da177e4SLinus Torvalds return (unsigned long) -EPERM; 12071da177e4SLinus Torvalds 12081da177e4SLinus Torvalds if (new_len > kobjsize((void *) addr)) 12091da177e4SLinus Torvalds return (unsigned long) -ENOMEM; 12101da177e4SLinus Torvalds 12111da177e4SLinus Torvalds /* all checks complete - do it */ 12126fa5f80bSDavid Howells vma->vm_end = vma->vm_start + new_len; 12131da177e4SLinus Torvalds 12141da177e4SLinus Torvalds askedalloc -= old_len; 12151da177e4SLinus Torvalds askedalloc += new_len; 12161da177e4SLinus Torvalds 12176fa5f80bSDavid Howells return vma->vm_start; 12186fa5f80bSDavid Howells } 1219b5073173SPaul Mundt EXPORT_SYMBOL(do_mremap); 12206fa5f80bSDavid Howells 12216fa5f80bSDavid Howells asmlinkage unsigned long sys_mremap(unsigned long addr, 12226fa5f80bSDavid Howells unsigned long old_len, unsigned long new_len, 12236fa5f80bSDavid Howells unsigned long flags, unsigned long new_addr) 12246fa5f80bSDavid Howells { 12256fa5f80bSDavid Howells unsigned long ret; 12266fa5f80bSDavid Howells 12276fa5f80bSDavid Howells down_write(¤t->mm->mmap_sem); 12286fa5f80bSDavid Howells ret = do_mremap(addr, old_len, new_len, flags, new_addr); 12296fa5f80bSDavid Howells up_write(¤t->mm->mmap_sem); 12306fa5f80bSDavid Howells return ret; 12311da177e4SLinus Torvalds } 12321da177e4SLinus Torvalds 12336aab341eSLinus Torvalds struct page *follow_page(struct vm_area_struct *vma, unsigned long address, 1234deceb6cdSHugh Dickins unsigned int foll_flags) 12351da177e4SLinus Torvalds { 12361da177e4SLinus Torvalds return NULL; 12371da177e4SLinus Torvalds } 12381da177e4SLinus Torvalds 12391da177e4SLinus Torvalds int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, 12401da177e4SLinus Torvalds unsigned long to, unsigned long size, pgprot_t prot) 12411da177e4SLinus Torvalds { 124266aa2b4bSGreg Ungerer vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; 124366aa2b4bSGreg Ungerer return 0; 12441da177e4SLinus Torvalds } 124522c4af40SLuke Yang EXPORT_SYMBOL(remap_pfn_range); 12461da177e4SLinus Torvalds 1247*f905bc44SPaul Mundt int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 1248*f905bc44SPaul Mundt unsigned long pgoff) 1249*f905bc44SPaul Mundt { 1250*f905bc44SPaul Mundt unsigned int size = vma->vm_end - vma->vm_start; 1251*f905bc44SPaul Mundt 1252*f905bc44SPaul Mundt if (!(vma->vm_flags & VM_USERMAP)) 1253*f905bc44SPaul Mundt return -EINVAL; 1254*f905bc44SPaul Mundt 1255*f905bc44SPaul Mundt vma->vm_start = (unsigned long)(addr + (pgoff << PAGE_SHIFT)); 1256*f905bc44SPaul Mundt vma->vm_end = vma->vm_start + size; 1257*f905bc44SPaul Mundt 1258*f905bc44SPaul Mundt return 0; 1259*f905bc44SPaul Mundt } 1260*f905bc44SPaul Mundt EXPORT_SYMBOL(remap_vmalloc_range); 1261*f905bc44SPaul Mundt 12621da177e4SLinus Torvalds void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 12631da177e4SLinus Torvalds { 12641da177e4SLinus Torvalds } 12651da177e4SLinus Torvalds 12661da177e4SLinus Torvalds unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, 12671da177e4SLinus Torvalds unsigned long len, unsigned long pgoff, unsigned long flags) 12681da177e4SLinus Torvalds { 12691da177e4SLinus Torvalds return -ENOMEM; 12701da177e4SLinus Torvalds } 12711da177e4SLinus Torvalds 12721363c3cdSWolfgang Wander void arch_unmap_area(struct mm_struct *mm, unsigned long addr) 12731da177e4SLinus Torvalds { 12741da177e4SLinus Torvalds } 12751da177e4SLinus Torvalds 12761da177e4SLinus Torvalds void unmap_mapping_range(struct address_space *mapping, 12771da177e4SLinus Torvalds loff_t const holebegin, loff_t const holelen, 12781da177e4SLinus Torvalds int even_cows) 12791da177e4SLinus Torvalds { 12801da177e4SLinus Torvalds } 128122c4af40SLuke Yang EXPORT_SYMBOL(unmap_mapping_range); 12821da177e4SLinus Torvalds 12831da177e4SLinus Torvalds /* 1284d56e03cdSDavid Howells * ask for an unmapped area at which to create a mapping on a file 1285d56e03cdSDavid Howells */ 1286d56e03cdSDavid Howells unsigned long get_unmapped_area(struct file *file, unsigned long addr, 1287d56e03cdSDavid Howells unsigned long len, unsigned long pgoff, 1288d56e03cdSDavid Howells unsigned long flags) 1289d56e03cdSDavid Howells { 1290d56e03cdSDavid Howells unsigned long (*get_area)(struct file *, unsigned long, unsigned long, 1291d56e03cdSDavid Howells unsigned long, unsigned long); 1292d56e03cdSDavid Howells 1293d56e03cdSDavid Howells get_area = current->mm->get_unmapped_area; 1294d56e03cdSDavid Howells if (file && file->f_op && file->f_op->get_unmapped_area) 1295d56e03cdSDavid Howells get_area = file->f_op->get_unmapped_area; 1296d56e03cdSDavid Howells 1297d56e03cdSDavid Howells if (!get_area) 1298d56e03cdSDavid Howells return -ENOSYS; 1299d56e03cdSDavid Howells 1300d56e03cdSDavid Howells return get_area(file, addr, len, pgoff, flags); 1301d56e03cdSDavid Howells } 1302d56e03cdSDavid Howells EXPORT_SYMBOL(get_unmapped_area); 1303d56e03cdSDavid Howells 1304d56e03cdSDavid Howells /* 13051da177e4SLinus Torvalds * Check that a process has enough memory to allocate a new virtual 13061da177e4SLinus Torvalds * mapping. 0 means there is enough memory for the allocation to 13071da177e4SLinus Torvalds * succeed and -ENOMEM implies there is not. 13081da177e4SLinus Torvalds * 13091da177e4SLinus Torvalds * We currently support three overcommit policies, which are set via the 13101da177e4SLinus Torvalds * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 13111da177e4SLinus Torvalds * 13121da177e4SLinus Torvalds * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 13131da177e4SLinus Torvalds * Additional code 2002 Jul 20 by Robert Love. 13141da177e4SLinus Torvalds * 13151da177e4SLinus Torvalds * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 13161da177e4SLinus Torvalds * 13171da177e4SLinus Torvalds * Note this is a helper function intended to be used by LSMs which 13181da177e4SLinus Torvalds * wish to use this logic. 13191da177e4SLinus Torvalds */ 132034b4e4aaSAlan Cox int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 13211da177e4SLinus Torvalds { 13221da177e4SLinus Torvalds unsigned long free, allowed; 13231da177e4SLinus Torvalds 13241da177e4SLinus Torvalds vm_acct_memory(pages); 13251da177e4SLinus Torvalds 13261da177e4SLinus Torvalds /* 13271da177e4SLinus Torvalds * Sometimes we want to use more memory than we have 13281da177e4SLinus Torvalds */ 13291da177e4SLinus Torvalds if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 13301da177e4SLinus Torvalds return 0; 13311da177e4SLinus Torvalds 13321da177e4SLinus Torvalds if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 13331da177e4SLinus Torvalds unsigned long n; 13341da177e4SLinus Torvalds 1335347ce434SChristoph Lameter free = global_page_state(NR_FILE_PAGES); 13361da177e4SLinus Torvalds free += nr_swap_pages; 13371da177e4SLinus Torvalds 13381da177e4SLinus Torvalds /* 13391da177e4SLinus Torvalds * Any slabs which are created with the 13401da177e4SLinus Torvalds * SLAB_RECLAIM_ACCOUNT flag claim to have contents 13411da177e4SLinus Torvalds * which are reclaimable, under pressure. The dentry 13421da177e4SLinus Torvalds * cache and most inode caches should fall into this 13431da177e4SLinus Torvalds */ 1344972d1a7bSChristoph Lameter free += global_page_state(NR_SLAB_RECLAIMABLE); 13451da177e4SLinus Torvalds 13461da177e4SLinus Torvalds /* 13471da177e4SLinus Torvalds * Leave the last 3% for root 13481da177e4SLinus Torvalds */ 13491da177e4SLinus Torvalds if (!cap_sys_admin) 13501da177e4SLinus Torvalds free -= free / 32; 13511da177e4SLinus Torvalds 13521da177e4SLinus Torvalds if (free > pages) 13531da177e4SLinus Torvalds return 0; 13541da177e4SLinus Torvalds 13551da177e4SLinus Torvalds /* 13561da177e4SLinus Torvalds * nr_free_pages() is very expensive on large systems, 13571da177e4SLinus Torvalds * only call if we're about to fail. 13581da177e4SLinus Torvalds */ 13591da177e4SLinus Torvalds n = nr_free_pages(); 1360d5ddc79bSHideo AOKI 1361d5ddc79bSHideo AOKI /* 1362d5ddc79bSHideo AOKI * Leave reserved pages. The pages are not for anonymous pages. 1363d5ddc79bSHideo AOKI */ 1364d5ddc79bSHideo AOKI if (n <= totalreserve_pages) 1365d5ddc79bSHideo AOKI goto error; 1366d5ddc79bSHideo AOKI else 1367d5ddc79bSHideo AOKI n -= totalreserve_pages; 1368d5ddc79bSHideo AOKI 1369d5ddc79bSHideo AOKI /* 1370d5ddc79bSHideo AOKI * Leave the last 3% for root 1371d5ddc79bSHideo AOKI */ 13721da177e4SLinus Torvalds if (!cap_sys_admin) 13731da177e4SLinus Torvalds n -= n / 32; 13741da177e4SLinus Torvalds free += n; 13751da177e4SLinus Torvalds 13761da177e4SLinus Torvalds if (free > pages) 13771da177e4SLinus Torvalds return 0; 1378d5ddc79bSHideo AOKI 1379d5ddc79bSHideo AOKI goto error; 13801da177e4SLinus Torvalds } 13811da177e4SLinus Torvalds 13821da177e4SLinus Torvalds allowed = totalram_pages * sysctl_overcommit_ratio / 100; 13831da177e4SLinus Torvalds /* 13841da177e4SLinus Torvalds * Leave the last 3% for root 13851da177e4SLinus Torvalds */ 13861da177e4SLinus Torvalds if (!cap_sys_admin) 13871da177e4SLinus Torvalds allowed -= allowed / 32; 13881da177e4SLinus Torvalds allowed += total_swap_pages; 13891da177e4SLinus Torvalds 13901da177e4SLinus Torvalds /* Don't let a single process grow too big: 13911da177e4SLinus Torvalds leave 3% of the size of this process for other processes */ 13921da177e4SLinus Torvalds allowed -= current->mm->total_vm / 32; 13931da177e4SLinus Torvalds 13942f60f8d3SSimon Derr /* 13952f60f8d3SSimon Derr * cast `allowed' as a signed long because vm_committed_space 13962f60f8d3SSimon Derr * sometimes has a negative value 13972f60f8d3SSimon Derr */ 13982f60f8d3SSimon Derr if (atomic_read(&vm_committed_space) < (long)allowed) 13991da177e4SLinus Torvalds return 0; 1400d5ddc79bSHideo AOKI error: 14011da177e4SLinus Torvalds vm_unacct_memory(pages); 14021da177e4SLinus Torvalds 14031da177e4SLinus Torvalds return -ENOMEM; 14041da177e4SLinus Torvalds } 14051da177e4SLinus Torvalds 14061da177e4SLinus Torvalds int in_gate_area_no_task(unsigned long addr) 14071da177e4SLinus Torvalds { 14081da177e4SLinus Torvalds return 0; 14091da177e4SLinus Torvalds } 1410b0e15190SDavid Howells 1411d0217ac0SNick Piggin int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1412b0e15190SDavid Howells { 1413b0e15190SDavid Howells BUG(); 1414d0217ac0SNick Piggin return 0; 1415b0e15190SDavid Howells } 1416b5073173SPaul Mundt EXPORT_SYMBOL(filemap_fault); 14170ec76a11SDavid Howells 14180ec76a11SDavid Howells /* 14190ec76a11SDavid Howells * Access another process' address space. 14200ec76a11SDavid Howells * - source/target buffer must be kernel space 14210ec76a11SDavid Howells */ 14220ec76a11SDavid Howells int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) 14230ec76a11SDavid Howells { 14240ec76a11SDavid Howells struct vm_area_struct *vma; 14250ec76a11SDavid Howells struct mm_struct *mm; 14260ec76a11SDavid Howells 14270ec76a11SDavid Howells if (addr + len < addr) 14280ec76a11SDavid Howells return 0; 14290ec76a11SDavid Howells 14300ec76a11SDavid Howells mm = get_task_mm(tsk); 14310ec76a11SDavid Howells if (!mm) 14320ec76a11SDavid Howells return 0; 14330ec76a11SDavid Howells 14340ec76a11SDavid Howells down_read(&mm->mmap_sem); 14350ec76a11SDavid Howells 14360ec76a11SDavid Howells /* the access must start within one of the target process's mappings */ 14370159b141SDavid Howells vma = find_vma(mm, addr); 14380159b141SDavid Howells if (vma) { 14390ec76a11SDavid Howells /* don't overrun this mapping */ 14400ec76a11SDavid Howells if (addr + len >= vma->vm_end) 14410ec76a11SDavid Howells len = vma->vm_end - addr; 14420ec76a11SDavid Howells 14430ec76a11SDavid Howells /* only read or write mappings where it is permitted */ 1444d00c7b99SDavid Howells if (write && vma->vm_flags & VM_MAYWRITE) 14450ec76a11SDavid Howells len -= copy_to_user((void *) addr, buf, len); 1446d00c7b99SDavid Howells else if (!write && vma->vm_flags & VM_MAYREAD) 14470ec76a11SDavid Howells len -= copy_from_user(buf, (void *) addr, len); 14480ec76a11SDavid Howells else 14490ec76a11SDavid Howells len = 0; 14500ec76a11SDavid Howells } else { 14510ec76a11SDavid Howells len = 0; 14520ec76a11SDavid Howells } 14530ec76a11SDavid Howells 14540ec76a11SDavid Howells up_read(&mm->mmap_sem); 14550ec76a11SDavid Howells mmput(mm); 14560ec76a11SDavid Howells return len; 14570ec76a11SDavid Howells } 1458