11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/vmalloc.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1993 Linus Torvalds 51da177e4SLinus Torvalds * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 61da177e4SLinus Torvalds * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000 71da177e4SLinus Torvalds * Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 8930fc45aSChristoph Lameter * Numa awareness, Christoph Lameter, SGI, June 2005 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/module.h> 131da177e4SLinus Torvalds #include <linux/highmem.h> 141da177e4SLinus Torvalds #include <linux/slab.h> 151da177e4SLinus Torvalds #include <linux/spinlock.h> 161da177e4SLinus Torvalds #include <linux/interrupt.h> 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds #include <linux/vmalloc.h> 191da177e4SLinus Torvalds 201da177e4SLinus Torvalds #include <asm/uaccess.h> 211da177e4SLinus Torvalds #include <asm/tlbflush.h> 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds DEFINE_RWLOCK(vmlist_lock); 251da177e4SLinus Torvalds struct vm_struct *vmlist; 261da177e4SLinus Torvalds 27b221385bSAdrian Bunk static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, 28b221385bSAdrian Bunk int node); 29b221385bSAdrian Bunk 301da177e4SLinus Torvalds static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) 311da177e4SLinus Torvalds { 321da177e4SLinus Torvalds pte_t *pte; 331da177e4SLinus Torvalds 341da177e4SLinus Torvalds pte = pte_offset_kernel(pmd, addr); 351da177e4SLinus Torvalds do { 361da177e4SLinus Torvalds pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); 371da177e4SLinus Torvalds WARN_ON(!pte_none(ptent) && !pte_present(ptent)); 381da177e4SLinus Torvalds } while (pte++, addr += PAGE_SIZE, addr != end); 391da177e4SLinus Torvalds } 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, 421da177e4SLinus Torvalds unsigned long end) 431da177e4SLinus Torvalds { 441da177e4SLinus Torvalds pmd_t *pmd; 451da177e4SLinus Torvalds unsigned long next; 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds pmd = pmd_offset(pud, addr); 481da177e4SLinus Torvalds do { 491da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 501da177e4SLinus Torvalds if (pmd_none_or_clear_bad(pmd)) 511da177e4SLinus Torvalds continue; 521da177e4SLinus Torvalds vunmap_pte_range(pmd, addr, next); 531da177e4SLinus Torvalds } while (pmd++, addr = next, addr != end); 541da177e4SLinus Torvalds } 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr, 571da177e4SLinus Torvalds unsigned long end) 581da177e4SLinus Torvalds { 591da177e4SLinus Torvalds pud_t *pud; 601da177e4SLinus Torvalds unsigned long next; 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds pud = pud_offset(pgd, addr); 631da177e4SLinus Torvalds do { 641da177e4SLinus Torvalds next = pud_addr_end(addr, end); 651da177e4SLinus Torvalds if (pud_none_or_clear_bad(pud)) 661da177e4SLinus Torvalds continue; 671da177e4SLinus Torvalds vunmap_pmd_range(pud, addr, next); 681da177e4SLinus Torvalds } while (pud++, addr = next, addr != end); 691da177e4SLinus Torvalds } 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds void unmap_vm_area(struct vm_struct *area) 721da177e4SLinus Torvalds { 731da177e4SLinus Torvalds pgd_t *pgd; 741da177e4SLinus Torvalds unsigned long next; 751da177e4SLinus Torvalds unsigned long addr = (unsigned long) area->addr; 761da177e4SLinus Torvalds unsigned long end = addr + area->size; 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds BUG_ON(addr >= end); 791da177e4SLinus Torvalds pgd = pgd_offset_k(addr); 801da177e4SLinus Torvalds flush_cache_vunmap(addr, end); 811da177e4SLinus Torvalds do { 821da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 831da177e4SLinus Torvalds if (pgd_none_or_clear_bad(pgd)) 841da177e4SLinus Torvalds continue; 851da177e4SLinus Torvalds vunmap_pud_range(pgd, addr, next); 861da177e4SLinus Torvalds } while (pgd++, addr = next, addr != end); 871da177e4SLinus Torvalds flush_tlb_kernel_range((unsigned long) area->addr, end); 881da177e4SLinus Torvalds } 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds static int vmap_pte_range(pmd_t *pmd, unsigned long addr, 911da177e4SLinus Torvalds unsigned long end, pgprot_t prot, struct page ***pages) 921da177e4SLinus Torvalds { 931da177e4SLinus Torvalds pte_t *pte; 941da177e4SLinus Torvalds 95872fec16SHugh Dickins pte = pte_alloc_kernel(pmd, addr); 961da177e4SLinus Torvalds if (!pte) 971da177e4SLinus Torvalds return -ENOMEM; 981da177e4SLinus Torvalds do { 991da177e4SLinus Torvalds struct page *page = **pages; 1001da177e4SLinus Torvalds WARN_ON(!pte_none(*pte)); 1011da177e4SLinus Torvalds if (!page) 1021da177e4SLinus Torvalds return -ENOMEM; 1031da177e4SLinus Torvalds set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); 1041da177e4SLinus Torvalds (*pages)++; 1051da177e4SLinus Torvalds } while (pte++, addr += PAGE_SIZE, addr != end); 1061da177e4SLinus Torvalds return 0; 1071da177e4SLinus Torvalds } 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds static inline int vmap_pmd_range(pud_t *pud, unsigned long addr, 1101da177e4SLinus Torvalds unsigned long end, pgprot_t prot, struct page ***pages) 1111da177e4SLinus Torvalds { 1121da177e4SLinus Torvalds pmd_t *pmd; 1131da177e4SLinus Torvalds unsigned long next; 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds pmd = pmd_alloc(&init_mm, pud, addr); 1161da177e4SLinus Torvalds if (!pmd) 1171da177e4SLinus Torvalds return -ENOMEM; 1181da177e4SLinus Torvalds do { 1191da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 1201da177e4SLinus Torvalds if (vmap_pte_range(pmd, addr, next, prot, pages)) 1211da177e4SLinus Torvalds return -ENOMEM; 1221da177e4SLinus Torvalds } while (pmd++, addr = next, addr != end); 1231da177e4SLinus Torvalds return 0; 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr, 1271da177e4SLinus Torvalds unsigned long end, pgprot_t prot, struct page ***pages) 1281da177e4SLinus Torvalds { 1291da177e4SLinus Torvalds pud_t *pud; 1301da177e4SLinus Torvalds unsigned long next; 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds pud = pud_alloc(&init_mm, pgd, addr); 1331da177e4SLinus Torvalds if (!pud) 1341da177e4SLinus Torvalds return -ENOMEM; 1351da177e4SLinus Torvalds do { 1361da177e4SLinus Torvalds next = pud_addr_end(addr, end); 1371da177e4SLinus Torvalds if (vmap_pmd_range(pud, addr, next, prot, pages)) 1381da177e4SLinus Torvalds return -ENOMEM; 1391da177e4SLinus Torvalds } while (pud++, addr = next, addr != end); 1401da177e4SLinus Torvalds return 0; 1411da177e4SLinus Torvalds } 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) 1441da177e4SLinus Torvalds { 1451da177e4SLinus Torvalds pgd_t *pgd; 1461da177e4SLinus Torvalds unsigned long next; 1471da177e4SLinus Torvalds unsigned long addr = (unsigned long) area->addr; 1481da177e4SLinus Torvalds unsigned long end = addr + area->size - PAGE_SIZE; 1491da177e4SLinus Torvalds int err; 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds BUG_ON(addr >= end); 1521da177e4SLinus Torvalds pgd = pgd_offset_k(addr); 1531da177e4SLinus Torvalds do { 1541da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 1551da177e4SLinus Torvalds err = vmap_pud_range(pgd, addr, next, prot, pages); 1561da177e4SLinus Torvalds if (err) 1571da177e4SLinus Torvalds break; 1581da177e4SLinus Torvalds } while (pgd++, addr = next, addr != end); 1591da177e4SLinus Torvalds flush_cache_vmap((unsigned long) area->addr, end); 1601da177e4SLinus Torvalds return err; 1611da177e4SLinus Torvalds } 1621da177e4SLinus Torvalds 16352fd24caSGiridhar Pemmasani static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, 16452fd24caSGiridhar Pemmasani unsigned long start, unsigned long end, 16552fd24caSGiridhar Pemmasani int node, gfp_t gfp_mask) 1661da177e4SLinus Torvalds { 1671da177e4SLinus Torvalds struct vm_struct **p, *tmp, *area; 1681da177e4SLinus Torvalds unsigned long align = 1; 1691da177e4SLinus Torvalds unsigned long addr; 1701da177e4SLinus Torvalds 17152fd24caSGiridhar Pemmasani BUG_ON(in_interrupt()); 1721da177e4SLinus Torvalds if (flags & VM_IOREMAP) { 1731da177e4SLinus Torvalds int bit = fls(size); 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds if (bit > IOREMAP_MAX_ORDER) 1761da177e4SLinus Torvalds bit = IOREMAP_MAX_ORDER; 1771da177e4SLinus Torvalds else if (bit < PAGE_SHIFT) 1781da177e4SLinus Torvalds bit = PAGE_SHIFT; 1791da177e4SLinus Torvalds 1801da177e4SLinus Torvalds align = 1ul << bit; 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds addr = ALIGN(start, align); 1831da177e4SLinus Torvalds size = PAGE_ALIGN(size); 184*31be8309SOGAWA Hirofumi if (unlikely(!size)) 185*31be8309SOGAWA Hirofumi return NULL; 1861da177e4SLinus Torvalds 1875211e6e6SGiridhar Pemmasani area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node); 1881da177e4SLinus Torvalds if (unlikely(!area)) 1891da177e4SLinus Torvalds return NULL; 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds /* 1921da177e4SLinus Torvalds * We always allocate a guard page. 1931da177e4SLinus Torvalds */ 1941da177e4SLinus Torvalds size += PAGE_SIZE; 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds write_lock(&vmlist_lock); 1971da177e4SLinus Torvalds for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { 1981da177e4SLinus Torvalds if ((unsigned long)tmp->addr < addr) { 1991da177e4SLinus Torvalds if((unsigned long)tmp->addr + tmp->size >= addr) 2001da177e4SLinus Torvalds addr = ALIGN(tmp->size + 2011da177e4SLinus Torvalds (unsigned long)tmp->addr, align); 2021da177e4SLinus Torvalds continue; 2031da177e4SLinus Torvalds } 2041da177e4SLinus Torvalds if ((size + addr) < addr) 2051da177e4SLinus Torvalds goto out; 2061da177e4SLinus Torvalds if (size + addr <= (unsigned long)tmp->addr) 2071da177e4SLinus Torvalds goto found; 2081da177e4SLinus Torvalds addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align); 2091da177e4SLinus Torvalds if (addr > end - size) 2101da177e4SLinus Torvalds goto out; 2111da177e4SLinus Torvalds } 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds found: 2141da177e4SLinus Torvalds area->next = *p; 2151da177e4SLinus Torvalds *p = area; 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds area->flags = flags; 2181da177e4SLinus Torvalds area->addr = (void *)addr; 2191da177e4SLinus Torvalds area->size = size; 2201da177e4SLinus Torvalds area->pages = NULL; 2211da177e4SLinus Torvalds area->nr_pages = 0; 2221da177e4SLinus Torvalds area->phys_addr = 0; 2231da177e4SLinus Torvalds write_unlock(&vmlist_lock); 2241da177e4SLinus Torvalds 2251da177e4SLinus Torvalds return area; 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds out: 2281da177e4SLinus Torvalds write_unlock(&vmlist_lock); 2291da177e4SLinus Torvalds kfree(area); 2301da177e4SLinus Torvalds if (printk_ratelimit()) 2311da177e4SLinus Torvalds printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n"); 2321da177e4SLinus Torvalds return NULL; 2331da177e4SLinus Torvalds } 2341da177e4SLinus Torvalds 235930fc45aSChristoph Lameter struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, 236930fc45aSChristoph Lameter unsigned long start, unsigned long end) 237930fc45aSChristoph Lameter { 23852fd24caSGiridhar Pemmasani return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL); 239930fc45aSChristoph Lameter } 240930fc45aSChristoph Lameter 2411da177e4SLinus Torvalds /** 2421da177e4SLinus Torvalds * get_vm_area - reserve a contingous kernel virtual area 2431da177e4SLinus Torvalds * @size: size of the area 2441da177e4SLinus Torvalds * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC 2451da177e4SLinus Torvalds * 2461da177e4SLinus Torvalds * Search an area of @size in the kernel virtual mapping area, 2471da177e4SLinus Torvalds * and reserved it for out purposes. Returns the area descriptor 2481da177e4SLinus Torvalds * on success or %NULL on failure. 2491da177e4SLinus Torvalds */ 2501da177e4SLinus Torvalds struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) 2511da177e4SLinus Torvalds { 2521da177e4SLinus Torvalds return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END); 2531da177e4SLinus Torvalds } 2541da177e4SLinus Torvalds 25552fd24caSGiridhar Pemmasani struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, 25652fd24caSGiridhar Pemmasani int node, gfp_t gfp_mask) 257930fc45aSChristoph Lameter { 25852fd24caSGiridhar Pemmasani return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node, 25952fd24caSGiridhar Pemmasani gfp_mask); 260930fc45aSChristoph Lameter } 261930fc45aSChristoph Lameter 2627856dfebSAndi Kleen /* Caller must hold vmlist_lock */ 26383342314SNick Piggin static struct vm_struct *__find_vm_area(void *addr) 26483342314SNick Piggin { 26583342314SNick Piggin struct vm_struct *tmp; 26683342314SNick Piggin 26783342314SNick Piggin for (tmp = vmlist; tmp != NULL; tmp = tmp->next) { 26883342314SNick Piggin if (tmp->addr == addr) 26983342314SNick Piggin break; 27083342314SNick Piggin } 27183342314SNick Piggin 27283342314SNick Piggin return tmp; 27383342314SNick Piggin } 27483342314SNick Piggin 27583342314SNick Piggin /* Caller must hold vmlist_lock */ 276d24afc57SRolf Eike Beer static struct vm_struct *__remove_vm_area(void *addr) 2777856dfebSAndi Kleen { 2787856dfebSAndi Kleen struct vm_struct **p, *tmp; 2797856dfebSAndi Kleen 2807856dfebSAndi Kleen for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) { 2817856dfebSAndi Kleen if (tmp->addr == addr) 2827856dfebSAndi Kleen goto found; 2837856dfebSAndi Kleen } 2847856dfebSAndi Kleen return NULL; 2857856dfebSAndi Kleen 2867856dfebSAndi Kleen found: 2877856dfebSAndi Kleen unmap_vm_area(tmp); 2887856dfebSAndi Kleen *p = tmp->next; 2897856dfebSAndi Kleen 2907856dfebSAndi Kleen /* 2917856dfebSAndi Kleen * Remove the guard page. 2927856dfebSAndi Kleen */ 2937856dfebSAndi Kleen tmp->size -= PAGE_SIZE; 2947856dfebSAndi Kleen return tmp; 2957856dfebSAndi Kleen } 2967856dfebSAndi Kleen 2971da177e4SLinus Torvalds /** 2981da177e4SLinus Torvalds * remove_vm_area - find and remove a contingous kernel virtual area 2991da177e4SLinus Torvalds * @addr: base address 3001da177e4SLinus Torvalds * 3011da177e4SLinus Torvalds * Search for the kernel VM area starting at @addr, and remove it. 3021da177e4SLinus Torvalds * This function returns the found VM area, but using it is NOT safe 3037856dfebSAndi Kleen * on SMP machines, except for its size or flags. 3041da177e4SLinus Torvalds */ 3051da177e4SLinus Torvalds struct vm_struct *remove_vm_area(void *addr) 3061da177e4SLinus Torvalds { 3077856dfebSAndi Kleen struct vm_struct *v; 3081da177e4SLinus Torvalds write_lock(&vmlist_lock); 3097856dfebSAndi Kleen v = __remove_vm_area(addr); 3101da177e4SLinus Torvalds write_unlock(&vmlist_lock); 3117856dfebSAndi Kleen return v; 3121da177e4SLinus Torvalds } 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds void __vunmap(void *addr, int deallocate_pages) 3151da177e4SLinus Torvalds { 3161da177e4SLinus Torvalds struct vm_struct *area; 3171da177e4SLinus Torvalds 3181da177e4SLinus Torvalds if (!addr) 3191da177e4SLinus Torvalds return; 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds if ((PAGE_SIZE-1) & (unsigned long)addr) { 3221da177e4SLinus Torvalds printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr); 3231da177e4SLinus Torvalds WARN_ON(1); 3241da177e4SLinus Torvalds return; 3251da177e4SLinus Torvalds } 3261da177e4SLinus Torvalds 3271da177e4SLinus Torvalds area = remove_vm_area(addr); 3281da177e4SLinus Torvalds if (unlikely(!area)) { 3291da177e4SLinus Torvalds printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", 3301da177e4SLinus Torvalds addr); 3311da177e4SLinus Torvalds WARN_ON(1); 3321da177e4SLinus Torvalds return; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds 3359a11b49aSIngo Molnar debug_check_no_locks_freed(addr, area->size); 3369a11b49aSIngo Molnar 3371da177e4SLinus Torvalds if (deallocate_pages) { 3381da177e4SLinus Torvalds int i; 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds for (i = 0; i < area->nr_pages; i++) { 3415aae277eSEric Sesterhenn BUG_ON(!area->pages[i]); 3421da177e4SLinus Torvalds __free_page(area->pages[i]); 3431da177e4SLinus Torvalds } 3441da177e4SLinus Torvalds 3458757d5faSJan Kiszka if (area->flags & VM_VPAGES) 3461da177e4SLinus Torvalds vfree(area->pages); 3471da177e4SLinus Torvalds else 3481da177e4SLinus Torvalds kfree(area->pages); 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds 3511da177e4SLinus Torvalds kfree(area); 3521da177e4SLinus Torvalds return; 3531da177e4SLinus Torvalds } 3541da177e4SLinus Torvalds 3551da177e4SLinus Torvalds /** 3561da177e4SLinus Torvalds * vfree - release memory allocated by vmalloc() 3571da177e4SLinus Torvalds * @addr: memory base address 3581da177e4SLinus Torvalds * 3591da177e4SLinus Torvalds * Free the virtually contiguous memory area starting at @addr, as 36080e93effSPekka Enberg * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is 36180e93effSPekka Enberg * NULL, no operation is performed. 3621da177e4SLinus Torvalds * 36380e93effSPekka Enberg * Must not be called in interrupt context. 3641da177e4SLinus Torvalds */ 3651da177e4SLinus Torvalds void vfree(void *addr) 3661da177e4SLinus Torvalds { 3671da177e4SLinus Torvalds BUG_ON(in_interrupt()); 3681da177e4SLinus Torvalds __vunmap(addr, 1); 3691da177e4SLinus Torvalds } 3701da177e4SLinus Torvalds EXPORT_SYMBOL(vfree); 3711da177e4SLinus Torvalds 3721da177e4SLinus Torvalds /** 3731da177e4SLinus Torvalds * vunmap - release virtual mapping obtained by vmap() 3741da177e4SLinus Torvalds * @addr: memory base address 3751da177e4SLinus Torvalds * 3761da177e4SLinus Torvalds * Free the virtually contiguous memory area starting at @addr, 3771da177e4SLinus Torvalds * which was created from the page array passed to vmap(). 3781da177e4SLinus Torvalds * 37980e93effSPekka Enberg * Must not be called in interrupt context. 3801da177e4SLinus Torvalds */ 3811da177e4SLinus Torvalds void vunmap(void *addr) 3821da177e4SLinus Torvalds { 3831da177e4SLinus Torvalds BUG_ON(in_interrupt()); 3841da177e4SLinus Torvalds __vunmap(addr, 0); 3851da177e4SLinus Torvalds } 3861da177e4SLinus Torvalds EXPORT_SYMBOL(vunmap); 3871da177e4SLinus Torvalds 3881da177e4SLinus Torvalds /** 3891da177e4SLinus Torvalds * vmap - map an array of pages into virtually contiguous space 3901da177e4SLinus Torvalds * @pages: array of page pointers 3911da177e4SLinus Torvalds * @count: number of pages to map 3921da177e4SLinus Torvalds * @flags: vm_area->flags 3931da177e4SLinus Torvalds * @prot: page protection for the mapping 3941da177e4SLinus Torvalds * 3951da177e4SLinus Torvalds * Maps @count pages from @pages into contiguous kernel virtual 3961da177e4SLinus Torvalds * space. 3971da177e4SLinus Torvalds */ 3981da177e4SLinus Torvalds void *vmap(struct page **pages, unsigned int count, 3991da177e4SLinus Torvalds unsigned long flags, pgprot_t prot) 4001da177e4SLinus Torvalds { 4011da177e4SLinus Torvalds struct vm_struct *area; 4021da177e4SLinus Torvalds 4031da177e4SLinus Torvalds if (count > num_physpages) 4041da177e4SLinus Torvalds return NULL; 4051da177e4SLinus Torvalds 4061da177e4SLinus Torvalds area = get_vm_area((count << PAGE_SHIFT), flags); 4071da177e4SLinus Torvalds if (!area) 4081da177e4SLinus Torvalds return NULL; 4091da177e4SLinus Torvalds if (map_vm_area(area, prot, &pages)) { 4101da177e4SLinus Torvalds vunmap(area->addr); 4111da177e4SLinus Torvalds return NULL; 4121da177e4SLinus Torvalds } 4131da177e4SLinus Torvalds 4141da177e4SLinus Torvalds return area->addr; 4151da177e4SLinus Torvalds } 4161da177e4SLinus Torvalds EXPORT_SYMBOL(vmap); 4171da177e4SLinus Torvalds 418930fc45aSChristoph Lameter void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, 419930fc45aSChristoph Lameter pgprot_t prot, int node) 4201da177e4SLinus Torvalds { 4211da177e4SLinus Torvalds struct page **pages; 4221da177e4SLinus Torvalds unsigned int nr_pages, array_size, i; 4231da177e4SLinus Torvalds 4241da177e4SLinus Torvalds nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; 4251da177e4SLinus Torvalds array_size = (nr_pages * sizeof(struct page *)); 4261da177e4SLinus Torvalds 4271da177e4SLinus Torvalds area->nr_pages = nr_pages; 4281da177e4SLinus Torvalds /* Please note that the recursion is strictly bounded. */ 4298757d5faSJan Kiszka if (array_size > PAGE_SIZE) { 430930fc45aSChristoph Lameter pages = __vmalloc_node(array_size, gfp_mask, PAGE_KERNEL, node); 4318757d5faSJan Kiszka area->flags |= VM_VPAGES; 432286e1ea3SAndrew Morton } else { 433286e1ea3SAndrew Morton pages = kmalloc_node(array_size, 434286e1ea3SAndrew Morton (gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO)), 435286e1ea3SAndrew Morton node); 436286e1ea3SAndrew Morton } 4371da177e4SLinus Torvalds area->pages = pages; 4381da177e4SLinus Torvalds if (!area->pages) { 4391da177e4SLinus Torvalds remove_vm_area(area->addr); 4401da177e4SLinus Torvalds kfree(area); 4411da177e4SLinus Torvalds return NULL; 4421da177e4SLinus Torvalds } 4431da177e4SLinus Torvalds memset(area->pages, 0, array_size); 4441da177e4SLinus Torvalds 4451da177e4SLinus Torvalds for (i = 0; i < area->nr_pages; i++) { 446930fc45aSChristoph Lameter if (node < 0) 4471da177e4SLinus Torvalds area->pages[i] = alloc_page(gfp_mask); 448930fc45aSChristoph Lameter else 449930fc45aSChristoph Lameter area->pages[i] = alloc_pages_node(node, gfp_mask, 0); 4501da177e4SLinus Torvalds if (unlikely(!area->pages[i])) { 4511da177e4SLinus Torvalds /* Successfully allocated i pages, free them in __vunmap() */ 4521da177e4SLinus Torvalds area->nr_pages = i; 4531da177e4SLinus Torvalds goto fail; 4541da177e4SLinus Torvalds } 4551da177e4SLinus Torvalds } 4561da177e4SLinus Torvalds 4571da177e4SLinus Torvalds if (map_vm_area(area, prot, &pages)) 4581da177e4SLinus Torvalds goto fail; 4591da177e4SLinus Torvalds return area->addr; 4601da177e4SLinus Torvalds 4611da177e4SLinus Torvalds fail: 4621da177e4SLinus Torvalds vfree(area->addr); 4631da177e4SLinus Torvalds return NULL; 4641da177e4SLinus Torvalds } 4651da177e4SLinus Torvalds 466930fc45aSChristoph Lameter void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) 467930fc45aSChristoph Lameter { 468930fc45aSChristoph Lameter return __vmalloc_area_node(area, gfp_mask, prot, -1); 469930fc45aSChristoph Lameter } 470930fc45aSChristoph Lameter 4711da177e4SLinus Torvalds /** 472930fc45aSChristoph Lameter * __vmalloc_node - allocate virtually contiguous memory 4731da177e4SLinus Torvalds * @size: allocation size 4741da177e4SLinus Torvalds * @gfp_mask: flags for the page level allocator 4751da177e4SLinus Torvalds * @prot: protection mask for the allocated pages 476d44e0780SRandy Dunlap * @node: node to use for allocation or -1 4771da177e4SLinus Torvalds * 4781da177e4SLinus Torvalds * Allocate enough pages to cover @size from the page level 4791da177e4SLinus Torvalds * allocator with @gfp_mask flags. Map them into contiguous 4801da177e4SLinus Torvalds * kernel virtual space, using a pagetable protection of @prot. 4811da177e4SLinus Torvalds */ 482b221385bSAdrian Bunk static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, 483930fc45aSChristoph Lameter int node) 4841da177e4SLinus Torvalds { 4851da177e4SLinus Torvalds struct vm_struct *area; 4861da177e4SLinus Torvalds 4871da177e4SLinus Torvalds size = PAGE_ALIGN(size); 4881da177e4SLinus Torvalds if (!size || (size >> PAGE_SHIFT) > num_physpages) 4891da177e4SLinus Torvalds return NULL; 4901da177e4SLinus Torvalds 49152fd24caSGiridhar Pemmasani area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask); 4921da177e4SLinus Torvalds if (!area) 4931da177e4SLinus Torvalds return NULL; 4941da177e4SLinus Torvalds 495930fc45aSChristoph Lameter return __vmalloc_area_node(area, gfp_mask, prot, node); 4961da177e4SLinus Torvalds } 4971da177e4SLinus Torvalds 498930fc45aSChristoph Lameter void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 499930fc45aSChristoph Lameter { 500930fc45aSChristoph Lameter return __vmalloc_node(size, gfp_mask, prot, -1); 501930fc45aSChristoph Lameter } 5021da177e4SLinus Torvalds EXPORT_SYMBOL(__vmalloc); 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds /** 5051da177e4SLinus Torvalds * vmalloc - allocate virtually contiguous memory 5061da177e4SLinus Torvalds * @size: allocation size 5071da177e4SLinus Torvalds * Allocate enough pages to cover @size from the page level 5081da177e4SLinus Torvalds * allocator and map them into contiguous kernel virtual space. 5091da177e4SLinus Torvalds * 510c1c8897fSMichael Opdenacker * For tight control over page level allocator and protection flags 5111da177e4SLinus Torvalds * use __vmalloc() instead. 5121da177e4SLinus Torvalds */ 5131da177e4SLinus Torvalds void *vmalloc(unsigned long size) 5141da177e4SLinus Torvalds { 5151da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds EXPORT_SYMBOL(vmalloc); 5181da177e4SLinus Torvalds 519930fc45aSChristoph Lameter /** 520ead04089SRolf Eike Beer * vmalloc_user - allocate zeroed virtually contiguous memory for userspace 52183342314SNick Piggin * @size: allocation size 522ead04089SRolf Eike Beer * 523ead04089SRolf Eike Beer * The resulting memory area is zeroed so it can be mapped to userspace 524ead04089SRolf Eike Beer * without leaking data. 52583342314SNick Piggin */ 52683342314SNick Piggin void *vmalloc_user(unsigned long size) 52783342314SNick Piggin { 52883342314SNick Piggin struct vm_struct *area; 52983342314SNick Piggin void *ret; 53083342314SNick Piggin 53183342314SNick Piggin ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); 5322b4ac44eSEric Dumazet if (ret) { 53383342314SNick Piggin write_lock(&vmlist_lock); 53483342314SNick Piggin area = __find_vm_area(ret); 53583342314SNick Piggin area->flags |= VM_USERMAP; 53683342314SNick Piggin write_unlock(&vmlist_lock); 5372b4ac44eSEric Dumazet } 53883342314SNick Piggin return ret; 53983342314SNick Piggin } 54083342314SNick Piggin EXPORT_SYMBOL(vmalloc_user); 54183342314SNick Piggin 54283342314SNick Piggin /** 543930fc45aSChristoph Lameter * vmalloc_node - allocate memory on a specific node 544930fc45aSChristoph Lameter * @size: allocation size 545d44e0780SRandy Dunlap * @node: numa node 546930fc45aSChristoph Lameter * 547930fc45aSChristoph Lameter * Allocate enough pages to cover @size from the page level 548930fc45aSChristoph Lameter * allocator and map them into contiguous kernel virtual space. 549930fc45aSChristoph Lameter * 550c1c8897fSMichael Opdenacker * For tight control over page level allocator and protection flags 551930fc45aSChristoph Lameter * use __vmalloc() instead. 552930fc45aSChristoph Lameter */ 553930fc45aSChristoph Lameter void *vmalloc_node(unsigned long size, int node) 554930fc45aSChristoph Lameter { 555930fc45aSChristoph Lameter return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, node); 556930fc45aSChristoph Lameter } 557930fc45aSChristoph Lameter EXPORT_SYMBOL(vmalloc_node); 558930fc45aSChristoph Lameter 5594dc3b16bSPavel Pisa #ifndef PAGE_KERNEL_EXEC 5604dc3b16bSPavel Pisa # define PAGE_KERNEL_EXEC PAGE_KERNEL 5614dc3b16bSPavel Pisa #endif 5624dc3b16bSPavel Pisa 5631da177e4SLinus Torvalds /** 5641da177e4SLinus Torvalds * vmalloc_exec - allocate virtually contiguous, executable memory 5651da177e4SLinus Torvalds * @size: allocation size 5661da177e4SLinus Torvalds * 5671da177e4SLinus Torvalds * Kernel-internal function to allocate enough pages to cover @size 5681da177e4SLinus Torvalds * the page level allocator and map them into contiguous and 5691da177e4SLinus Torvalds * executable kernel virtual space. 5701da177e4SLinus Torvalds * 571c1c8897fSMichael Opdenacker * For tight control over page level allocator and protection flags 5721da177e4SLinus Torvalds * use __vmalloc() instead. 5731da177e4SLinus Torvalds */ 5741da177e4SLinus Torvalds 5751da177e4SLinus Torvalds void *vmalloc_exec(unsigned long size) 5761da177e4SLinus Torvalds { 5771da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); 5781da177e4SLinus Torvalds } 5791da177e4SLinus Torvalds 5801da177e4SLinus Torvalds /** 5811da177e4SLinus Torvalds * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) 5821da177e4SLinus Torvalds * @size: allocation size 5831da177e4SLinus Torvalds * 5841da177e4SLinus Torvalds * Allocate enough 32bit PA addressable pages to cover @size from the 5851da177e4SLinus Torvalds * page level allocator and map them into contiguous kernel virtual space. 5861da177e4SLinus Torvalds */ 5871da177e4SLinus Torvalds void *vmalloc_32(unsigned long size) 5881da177e4SLinus Torvalds { 5891da177e4SLinus Torvalds return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); 5901da177e4SLinus Torvalds } 5911da177e4SLinus Torvalds EXPORT_SYMBOL(vmalloc_32); 5921da177e4SLinus Torvalds 59383342314SNick Piggin /** 594ead04089SRolf Eike Beer * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory 59583342314SNick Piggin * @size: allocation size 596ead04089SRolf Eike Beer * 597ead04089SRolf Eike Beer * The resulting memory area is 32bit addressable and zeroed so it can be 598ead04089SRolf Eike Beer * mapped to userspace without leaking data. 59983342314SNick Piggin */ 60083342314SNick Piggin void *vmalloc_32_user(unsigned long size) 60183342314SNick Piggin { 60283342314SNick Piggin struct vm_struct *area; 60383342314SNick Piggin void *ret; 60483342314SNick Piggin 60583342314SNick Piggin ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 6062b4ac44eSEric Dumazet if (ret) { 60783342314SNick Piggin write_lock(&vmlist_lock); 60883342314SNick Piggin area = __find_vm_area(ret); 60983342314SNick Piggin area->flags |= VM_USERMAP; 61083342314SNick Piggin write_unlock(&vmlist_lock); 6112b4ac44eSEric Dumazet } 61283342314SNick Piggin return ret; 61383342314SNick Piggin } 61483342314SNick Piggin EXPORT_SYMBOL(vmalloc_32_user); 61583342314SNick Piggin 6161da177e4SLinus Torvalds long vread(char *buf, char *addr, unsigned long count) 6171da177e4SLinus Torvalds { 6181da177e4SLinus Torvalds struct vm_struct *tmp; 6191da177e4SLinus Torvalds char *vaddr, *buf_start = buf; 6201da177e4SLinus Torvalds unsigned long n; 6211da177e4SLinus Torvalds 6221da177e4SLinus Torvalds /* Don't allow overflow */ 6231da177e4SLinus Torvalds if ((unsigned long) addr + count < count) 6241da177e4SLinus Torvalds count = -(unsigned long) addr; 6251da177e4SLinus Torvalds 6261da177e4SLinus Torvalds read_lock(&vmlist_lock); 6271da177e4SLinus Torvalds for (tmp = vmlist; tmp; tmp = tmp->next) { 6281da177e4SLinus Torvalds vaddr = (char *) tmp->addr; 6291da177e4SLinus Torvalds if (addr >= vaddr + tmp->size - PAGE_SIZE) 6301da177e4SLinus Torvalds continue; 6311da177e4SLinus Torvalds while (addr < vaddr) { 6321da177e4SLinus Torvalds if (count == 0) 6331da177e4SLinus Torvalds goto finished; 6341da177e4SLinus Torvalds *buf = '\0'; 6351da177e4SLinus Torvalds buf++; 6361da177e4SLinus Torvalds addr++; 6371da177e4SLinus Torvalds count--; 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds n = vaddr + tmp->size - PAGE_SIZE - addr; 6401da177e4SLinus Torvalds do { 6411da177e4SLinus Torvalds if (count == 0) 6421da177e4SLinus Torvalds goto finished; 6431da177e4SLinus Torvalds *buf = *addr; 6441da177e4SLinus Torvalds buf++; 6451da177e4SLinus Torvalds addr++; 6461da177e4SLinus Torvalds count--; 6471da177e4SLinus Torvalds } while (--n > 0); 6481da177e4SLinus Torvalds } 6491da177e4SLinus Torvalds finished: 6501da177e4SLinus Torvalds read_unlock(&vmlist_lock); 6511da177e4SLinus Torvalds return buf - buf_start; 6521da177e4SLinus Torvalds } 6531da177e4SLinus Torvalds 6541da177e4SLinus Torvalds long vwrite(char *buf, char *addr, unsigned long count) 6551da177e4SLinus Torvalds { 6561da177e4SLinus Torvalds struct vm_struct *tmp; 6571da177e4SLinus Torvalds char *vaddr, *buf_start = buf; 6581da177e4SLinus Torvalds unsigned long n; 6591da177e4SLinus Torvalds 6601da177e4SLinus Torvalds /* Don't allow overflow */ 6611da177e4SLinus Torvalds if ((unsigned long) addr + count < count) 6621da177e4SLinus Torvalds count = -(unsigned long) addr; 6631da177e4SLinus Torvalds 6641da177e4SLinus Torvalds read_lock(&vmlist_lock); 6651da177e4SLinus Torvalds for (tmp = vmlist; tmp; tmp = tmp->next) { 6661da177e4SLinus Torvalds vaddr = (char *) tmp->addr; 6671da177e4SLinus Torvalds if (addr >= vaddr + tmp->size - PAGE_SIZE) 6681da177e4SLinus Torvalds continue; 6691da177e4SLinus Torvalds while (addr < vaddr) { 6701da177e4SLinus Torvalds if (count == 0) 6711da177e4SLinus Torvalds goto finished; 6721da177e4SLinus Torvalds buf++; 6731da177e4SLinus Torvalds addr++; 6741da177e4SLinus Torvalds count--; 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds n = vaddr + tmp->size - PAGE_SIZE - addr; 6771da177e4SLinus Torvalds do { 6781da177e4SLinus Torvalds if (count == 0) 6791da177e4SLinus Torvalds goto finished; 6801da177e4SLinus Torvalds *addr = *buf; 6811da177e4SLinus Torvalds buf++; 6821da177e4SLinus Torvalds addr++; 6831da177e4SLinus Torvalds count--; 6841da177e4SLinus Torvalds } while (--n > 0); 6851da177e4SLinus Torvalds } 6861da177e4SLinus Torvalds finished: 6871da177e4SLinus Torvalds read_unlock(&vmlist_lock); 6881da177e4SLinus Torvalds return buf - buf_start; 6891da177e4SLinus Torvalds } 69083342314SNick Piggin 69183342314SNick Piggin /** 69283342314SNick Piggin * remap_vmalloc_range - map vmalloc pages to userspace 69383342314SNick Piggin * @vma: vma to cover (map full range of vma) 69483342314SNick Piggin * @addr: vmalloc memory 69583342314SNick Piggin * @pgoff: number of pages into addr before first page to map 69683342314SNick Piggin * @returns: 0 for success, -Exxx on failure 69783342314SNick Piggin * 69883342314SNick Piggin * This function checks that addr is a valid vmalloc'ed area, and 69983342314SNick Piggin * that it is big enough to cover the vma. Will return failure if 70083342314SNick Piggin * that criteria isn't met. 70183342314SNick Piggin * 70283342314SNick Piggin * Similar to remap_pfn_range (see mm/memory.c) 70383342314SNick Piggin */ 70483342314SNick Piggin int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, 70583342314SNick Piggin unsigned long pgoff) 70683342314SNick Piggin { 70783342314SNick Piggin struct vm_struct *area; 70883342314SNick Piggin unsigned long uaddr = vma->vm_start; 70983342314SNick Piggin unsigned long usize = vma->vm_end - vma->vm_start; 71083342314SNick Piggin int ret; 71183342314SNick Piggin 71283342314SNick Piggin if ((PAGE_SIZE-1) & (unsigned long)addr) 71383342314SNick Piggin return -EINVAL; 71483342314SNick Piggin 71583342314SNick Piggin read_lock(&vmlist_lock); 71683342314SNick Piggin area = __find_vm_area(addr); 71783342314SNick Piggin if (!area) 71883342314SNick Piggin goto out_einval_locked; 71983342314SNick Piggin 72083342314SNick Piggin if (!(area->flags & VM_USERMAP)) 72183342314SNick Piggin goto out_einval_locked; 72283342314SNick Piggin 72383342314SNick Piggin if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) 72483342314SNick Piggin goto out_einval_locked; 72583342314SNick Piggin read_unlock(&vmlist_lock); 72683342314SNick Piggin 72783342314SNick Piggin addr += pgoff << PAGE_SHIFT; 72883342314SNick Piggin do { 72983342314SNick Piggin struct page *page = vmalloc_to_page(addr); 73083342314SNick Piggin ret = vm_insert_page(vma, uaddr, page); 73183342314SNick Piggin if (ret) 73283342314SNick Piggin return ret; 73383342314SNick Piggin 73483342314SNick Piggin uaddr += PAGE_SIZE; 73583342314SNick Piggin addr += PAGE_SIZE; 73683342314SNick Piggin usize -= PAGE_SIZE; 73783342314SNick Piggin } while (usize > 0); 73883342314SNick Piggin 73983342314SNick Piggin /* Prevent "things" like memory migration? VM_flags need a cleanup... */ 74083342314SNick Piggin vma->vm_flags |= VM_RESERVED; 74183342314SNick Piggin 74283342314SNick Piggin return ret; 74383342314SNick Piggin 74483342314SNick Piggin out_einval_locked: 74583342314SNick Piggin read_unlock(&vmlist_lock); 74683342314SNick Piggin return -EINVAL; 74783342314SNick Piggin } 74883342314SNick Piggin EXPORT_SYMBOL(remap_vmalloc_range); 74983342314SNick Piggin 750