11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/memory.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds 71da177e4SLinus Torvalds /* 81da177e4SLinus Torvalds * demand-loading started 01.12.91 - seems it is high on the list of 91da177e4SLinus Torvalds * things wanted, and it should be easy to implement. - Linus 101da177e4SLinus Torvalds */ 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds /* 131da177e4SLinus Torvalds * Ok, demand-loading was easy, shared pages a little bit tricker. Shared 141da177e4SLinus Torvalds * pages started 02.12.91, seems to work. - Linus. 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * Tested sharing by executing about 30 /bin/sh: under the old kernel it 171da177e4SLinus Torvalds * would have taken more than the 6M I have free, but it worked well as 181da177e4SLinus Torvalds * far as I could see. 191da177e4SLinus Torvalds * 201da177e4SLinus Torvalds * Also corrected some "invalidate()"s - I wasn't doing enough of them. 211da177e4SLinus Torvalds */ 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds /* 241da177e4SLinus Torvalds * Real VM (paging to/from disk) started 18.12.91. Much more work and 251da177e4SLinus Torvalds * thought has to go into this. Oh, well.. 261da177e4SLinus Torvalds * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. 271da177e4SLinus Torvalds * Found it. Everything seems to work now. 281da177e4SLinus Torvalds * 20.12.91 - Ok, making the swap-device changeable like the root. 291da177e4SLinus Torvalds */ 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds /* 321da177e4SLinus Torvalds * 05.04.94 - Multi-page memory management added for v1.1. 331da177e4SLinus Torvalds * Idea by Alex Bligh (alex@cconcepts.co.uk) 341da177e4SLinus Torvalds * 351da177e4SLinus Torvalds * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG 361da177e4SLinus Torvalds * (Gerhard.Wichert@pdb.siemens.de) 371da177e4SLinus Torvalds * 381da177e4SLinus Torvalds * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) 391da177e4SLinus Torvalds */ 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds #include <linux/kernel_stat.h> 421da177e4SLinus Torvalds #include <linux/mm.h> 431da177e4SLinus Torvalds #include <linux/hugetlb.h> 441da177e4SLinus Torvalds #include <linux/mman.h> 451da177e4SLinus Torvalds #include <linux/swap.h> 461da177e4SLinus Torvalds #include <linux/highmem.h> 471da177e4SLinus Torvalds #include <linux/pagemap.h> 481da177e4SLinus Torvalds #include <linux/rmap.h> 491da177e4SLinus Torvalds #include <linux/module.h> 501da177e4SLinus Torvalds #include <linux/init.h> 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds #include <asm/pgalloc.h> 531da177e4SLinus Torvalds #include <asm/uaccess.h> 541da177e4SLinus Torvalds #include <asm/tlb.h> 551da177e4SLinus Torvalds #include <asm/tlbflush.h> 561da177e4SLinus Torvalds #include <asm/pgtable.h> 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds #include <linux/swapops.h> 591da177e4SLinus Torvalds #include <linux/elf.h> 601da177e4SLinus Torvalds 61d41dee36SAndy Whitcroft #ifndef CONFIG_NEED_MULTIPLE_NODES 621da177e4SLinus Torvalds /* use the per-pgdat data instead for discontigmem - mbligh */ 631da177e4SLinus Torvalds unsigned long max_mapnr; 641da177e4SLinus Torvalds struct page *mem_map; 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds EXPORT_SYMBOL(max_mapnr); 671da177e4SLinus Torvalds EXPORT_SYMBOL(mem_map); 681da177e4SLinus Torvalds #endif 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds unsigned long num_physpages; 711da177e4SLinus Torvalds /* 721da177e4SLinus Torvalds * A number of key systems in x86 including ioremap() rely on the assumption 731da177e4SLinus Torvalds * that high_memory defines the upper bound on direct map memory, then end 741da177e4SLinus Torvalds * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and 751da177e4SLinus Torvalds * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL 761da177e4SLinus Torvalds * and ZONE_HIGHMEM. 771da177e4SLinus Torvalds */ 781da177e4SLinus Torvalds void * high_memory; 791da177e4SLinus Torvalds unsigned long vmalloc_earlyreserve; 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds EXPORT_SYMBOL(num_physpages); 821da177e4SLinus Torvalds EXPORT_SYMBOL(high_memory); 831da177e4SLinus Torvalds EXPORT_SYMBOL(vmalloc_earlyreserve); 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds /* 861da177e4SLinus Torvalds * If a p?d_bad entry is found while walking page tables, report 871da177e4SLinus Torvalds * the error, before resetting entry to p?d_none. Usually (but 881da177e4SLinus Torvalds * very seldom) called out from the p?d_none_or_clear_bad macros. 891da177e4SLinus Torvalds */ 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds void pgd_clear_bad(pgd_t *pgd) 921da177e4SLinus Torvalds { 931da177e4SLinus Torvalds pgd_ERROR(*pgd); 941da177e4SLinus Torvalds pgd_clear(pgd); 951da177e4SLinus Torvalds } 961da177e4SLinus Torvalds 971da177e4SLinus Torvalds void pud_clear_bad(pud_t *pud) 981da177e4SLinus Torvalds { 991da177e4SLinus Torvalds pud_ERROR(*pud); 1001da177e4SLinus Torvalds pud_clear(pud); 1011da177e4SLinus Torvalds } 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds void pmd_clear_bad(pmd_t *pmd) 1041da177e4SLinus Torvalds { 1051da177e4SLinus Torvalds pmd_ERROR(*pmd); 1061da177e4SLinus Torvalds pmd_clear(pmd); 1071da177e4SLinus Torvalds } 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds /* 1101da177e4SLinus Torvalds * Note: this doesn't free the actual pages themselves. That 1111da177e4SLinus Torvalds * has been handled earlier when unmapping all the memory regions. 1121da177e4SLinus Torvalds */ 113e0da382cSHugh Dickins static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) 1141da177e4SLinus Torvalds { 1151da177e4SLinus Torvalds struct page *page = pmd_page(*pmd); 1161da177e4SLinus Torvalds pmd_clear(pmd); 117e0da382cSHugh Dickins pte_free_tlb(tlb, page); 1181da177e4SLinus Torvalds dec_page_state(nr_page_table_pages); 1191da177e4SLinus Torvalds tlb->mm->nr_ptes--; 1201da177e4SLinus Torvalds } 1211da177e4SLinus Torvalds 122e0da382cSHugh Dickins static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 123e0da382cSHugh Dickins unsigned long addr, unsigned long end, 124e0da382cSHugh Dickins unsigned long floor, unsigned long ceiling) 1251da177e4SLinus Torvalds { 1261da177e4SLinus Torvalds pmd_t *pmd; 1271da177e4SLinus Torvalds unsigned long next; 128e0da382cSHugh Dickins unsigned long start; 1291da177e4SLinus Torvalds 130e0da382cSHugh Dickins start = addr; 1311da177e4SLinus Torvalds pmd = pmd_offset(pud, addr); 1321da177e4SLinus Torvalds do { 1331da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 1341da177e4SLinus Torvalds if (pmd_none_or_clear_bad(pmd)) 1351da177e4SLinus Torvalds continue; 136e0da382cSHugh Dickins free_pte_range(tlb, pmd); 1371da177e4SLinus Torvalds } while (pmd++, addr = next, addr != end); 1381da177e4SLinus Torvalds 139e0da382cSHugh Dickins start &= PUD_MASK; 140e0da382cSHugh Dickins if (start < floor) 141e0da382cSHugh Dickins return; 142e0da382cSHugh Dickins if (ceiling) { 143e0da382cSHugh Dickins ceiling &= PUD_MASK; 144e0da382cSHugh Dickins if (!ceiling) 145e0da382cSHugh Dickins return; 1461da177e4SLinus Torvalds } 147e0da382cSHugh Dickins if (end - 1 > ceiling - 1) 148e0da382cSHugh Dickins return; 149e0da382cSHugh Dickins 150e0da382cSHugh Dickins pmd = pmd_offset(pud, start); 151e0da382cSHugh Dickins pud_clear(pud); 152e0da382cSHugh Dickins pmd_free_tlb(tlb, pmd); 1531da177e4SLinus Torvalds } 1541da177e4SLinus Torvalds 155e0da382cSHugh Dickins static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 156e0da382cSHugh Dickins unsigned long addr, unsigned long end, 157e0da382cSHugh Dickins unsigned long floor, unsigned long ceiling) 1581da177e4SLinus Torvalds { 1591da177e4SLinus Torvalds pud_t *pud; 1601da177e4SLinus Torvalds unsigned long next; 161e0da382cSHugh Dickins unsigned long start; 1621da177e4SLinus Torvalds 163e0da382cSHugh Dickins start = addr; 1641da177e4SLinus Torvalds pud = pud_offset(pgd, addr); 1651da177e4SLinus Torvalds do { 1661da177e4SLinus Torvalds next = pud_addr_end(addr, end); 1671da177e4SLinus Torvalds if (pud_none_or_clear_bad(pud)) 1681da177e4SLinus Torvalds continue; 169e0da382cSHugh Dickins free_pmd_range(tlb, pud, addr, next, floor, ceiling); 1701da177e4SLinus Torvalds } while (pud++, addr = next, addr != end); 1711da177e4SLinus Torvalds 172e0da382cSHugh Dickins start &= PGDIR_MASK; 173e0da382cSHugh Dickins if (start < floor) 174e0da382cSHugh Dickins return; 175e0da382cSHugh Dickins if (ceiling) { 176e0da382cSHugh Dickins ceiling &= PGDIR_MASK; 177e0da382cSHugh Dickins if (!ceiling) 178e0da382cSHugh Dickins return; 1791da177e4SLinus Torvalds } 180e0da382cSHugh Dickins if (end - 1 > ceiling - 1) 181e0da382cSHugh Dickins return; 182e0da382cSHugh Dickins 183e0da382cSHugh Dickins pud = pud_offset(pgd, start); 184e0da382cSHugh Dickins pgd_clear(pgd); 185e0da382cSHugh Dickins pud_free_tlb(tlb, pud); 1861da177e4SLinus Torvalds } 1871da177e4SLinus Torvalds 1881da177e4SLinus Torvalds /* 189e0da382cSHugh Dickins * This function frees user-level page tables of a process. 190e0da382cSHugh Dickins * 1911da177e4SLinus Torvalds * Must be called with pagetable lock held. 1921da177e4SLinus Torvalds */ 1933bf5ee95SHugh Dickins void free_pgd_range(struct mmu_gather **tlb, 194e0da382cSHugh Dickins unsigned long addr, unsigned long end, 195e0da382cSHugh Dickins unsigned long floor, unsigned long ceiling) 1961da177e4SLinus Torvalds { 1971da177e4SLinus Torvalds pgd_t *pgd; 1981da177e4SLinus Torvalds unsigned long next; 199e0da382cSHugh Dickins unsigned long start; 2001da177e4SLinus Torvalds 201e0da382cSHugh Dickins /* 202e0da382cSHugh Dickins * The next few lines have given us lots of grief... 203e0da382cSHugh Dickins * 204e0da382cSHugh Dickins * Why are we testing PMD* at this top level? Because often 205e0da382cSHugh Dickins * there will be no work to do at all, and we'd prefer not to 206e0da382cSHugh Dickins * go all the way down to the bottom just to discover that. 207e0da382cSHugh Dickins * 208e0da382cSHugh Dickins * Why all these "- 1"s? Because 0 represents both the bottom 209e0da382cSHugh Dickins * of the address space and the top of it (using -1 for the 210e0da382cSHugh Dickins * top wouldn't help much: the masks would do the wrong thing). 211e0da382cSHugh Dickins * The rule is that addr 0 and floor 0 refer to the bottom of 212e0da382cSHugh Dickins * the address space, but end 0 and ceiling 0 refer to the top 213e0da382cSHugh Dickins * Comparisons need to use "end - 1" and "ceiling - 1" (though 214e0da382cSHugh Dickins * that end 0 case should be mythical). 215e0da382cSHugh Dickins * 216e0da382cSHugh Dickins * Wherever addr is brought up or ceiling brought down, we must 217e0da382cSHugh Dickins * be careful to reject "the opposite 0" before it confuses the 218e0da382cSHugh Dickins * subsequent tests. But what about where end is brought down 219e0da382cSHugh Dickins * by PMD_SIZE below? no, end can't go down to 0 there. 220e0da382cSHugh Dickins * 221e0da382cSHugh Dickins * Whereas we round start (addr) and ceiling down, by different 222e0da382cSHugh Dickins * masks at different levels, in order to test whether a table 223e0da382cSHugh Dickins * now has no other vmas using it, so can be freed, we don't 224e0da382cSHugh Dickins * bother to round floor or end up - the tests don't need that. 225e0da382cSHugh Dickins */ 226e0da382cSHugh Dickins 227e0da382cSHugh Dickins addr &= PMD_MASK; 228e0da382cSHugh Dickins if (addr < floor) { 229e0da382cSHugh Dickins addr += PMD_SIZE; 230e0da382cSHugh Dickins if (!addr) 231e0da382cSHugh Dickins return; 232e0da382cSHugh Dickins } 233e0da382cSHugh Dickins if (ceiling) { 234e0da382cSHugh Dickins ceiling &= PMD_MASK; 235e0da382cSHugh Dickins if (!ceiling) 236e0da382cSHugh Dickins return; 237e0da382cSHugh Dickins } 238e0da382cSHugh Dickins if (end - 1 > ceiling - 1) 239e0da382cSHugh Dickins end -= PMD_SIZE; 240e0da382cSHugh Dickins if (addr > end - 1) 241e0da382cSHugh Dickins return; 242e0da382cSHugh Dickins 243e0da382cSHugh Dickins start = addr; 2443bf5ee95SHugh Dickins pgd = pgd_offset((*tlb)->mm, addr); 2451da177e4SLinus Torvalds do { 2461da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 2471da177e4SLinus Torvalds if (pgd_none_or_clear_bad(pgd)) 2481da177e4SLinus Torvalds continue; 2493bf5ee95SHugh Dickins free_pud_range(*tlb, pgd, addr, next, floor, ceiling); 2501da177e4SLinus Torvalds } while (pgd++, addr = next, addr != end); 251e0da382cSHugh Dickins 2524d6ddfa9SHugh Dickins if (!(*tlb)->fullmm) 2533bf5ee95SHugh Dickins flush_tlb_pgtables((*tlb)->mm, start, end); 254e0da382cSHugh Dickins } 255e0da382cSHugh Dickins 256e0da382cSHugh Dickins void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, 257e0da382cSHugh Dickins unsigned long floor, unsigned long ceiling) 258e0da382cSHugh Dickins { 259e0da382cSHugh Dickins while (vma) { 260e0da382cSHugh Dickins struct vm_area_struct *next = vma->vm_next; 261e0da382cSHugh Dickins unsigned long addr = vma->vm_start; 262e0da382cSHugh Dickins 2633bf5ee95SHugh Dickins if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) { 2643bf5ee95SHugh Dickins hugetlb_free_pgd_range(tlb, addr, vma->vm_end, 2653bf5ee95SHugh Dickins floor, next? next->vm_start: ceiling); 2663bf5ee95SHugh Dickins } else { 2673bf5ee95SHugh Dickins /* 2683bf5ee95SHugh Dickins * Optimization: gather nearby vmas into one call down 2693bf5ee95SHugh Dickins */ 2703bf5ee95SHugh Dickins while (next && next->vm_start <= vma->vm_end + PMD_SIZE 2713bf5ee95SHugh Dickins && !is_hugepage_only_range(vma->vm_mm, next->vm_start, 2723bf5ee95SHugh Dickins HPAGE_SIZE)) { 273e0da382cSHugh Dickins vma = next; 274e0da382cSHugh Dickins next = vma->vm_next; 275e0da382cSHugh Dickins } 2763bf5ee95SHugh Dickins free_pgd_range(tlb, addr, vma->vm_end, 277e0da382cSHugh Dickins floor, next? next->vm_start: ceiling); 2783bf5ee95SHugh Dickins } 279e0da382cSHugh Dickins vma = next; 280e0da382cSHugh Dickins } 2811da177e4SLinus Torvalds } 2821da177e4SLinus Torvalds 283*1bb3630eSHugh Dickins int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) 2841da177e4SLinus Torvalds { 2851da177e4SLinus Torvalds struct page *new; 2861da177e4SLinus Torvalds 2871da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 2881da177e4SLinus Torvalds new = pte_alloc_one(mm, address); 2891da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 2901da177e4SLinus Torvalds if (!new) 291*1bb3630eSHugh Dickins return -ENOMEM; 292*1bb3630eSHugh Dickins 293*1bb3630eSHugh Dickins if (pmd_present(*pmd)) /* Another has populated it */ 2941da177e4SLinus Torvalds pte_free(new); 295*1bb3630eSHugh Dickins else { 2961da177e4SLinus Torvalds mm->nr_ptes++; 2971da177e4SLinus Torvalds inc_page_state(nr_page_table_pages); 2981da177e4SLinus Torvalds pmd_populate(mm, pmd, new); 2991da177e4SLinus Torvalds } 300*1bb3630eSHugh Dickins return 0; 3011da177e4SLinus Torvalds } 3021da177e4SLinus Torvalds 303*1bb3630eSHugh Dickins int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) 3041da177e4SLinus Torvalds { 305*1bb3630eSHugh Dickins pte_t *new = pte_alloc_one_kernel(&init_mm, address); 3061da177e4SLinus Torvalds if (!new) 307*1bb3630eSHugh Dickins return -ENOMEM; 3081da177e4SLinus Torvalds 309872fec16SHugh Dickins spin_lock(&init_mm.page_table_lock); 310*1bb3630eSHugh Dickins if (pmd_present(*pmd)) /* Another has populated it */ 3111da177e4SLinus Torvalds pte_free_kernel(new); 312872fec16SHugh Dickins else 313872fec16SHugh Dickins pmd_populate_kernel(&init_mm, pmd, new); 314872fec16SHugh Dickins spin_unlock(&init_mm.page_table_lock); 315*1bb3630eSHugh Dickins return 0; 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds 318ae859762SHugh Dickins static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) 319ae859762SHugh Dickins { 320ae859762SHugh Dickins if (file_rss) 321ae859762SHugh Dickins add_mm_counter(mm, file_rss, file_rss); 322ae859762SHugh Dickins if (anon_rss) 323ae859762SHugh Dickins add_mm_counter(mm, anon_rss, anon_rss); 324ae859762SHugh Dickins } 325ae859762SHugh Dickins 3261da177e4SLinus Torvalds /* 327b5810039SNick Piggin * This function is called to print an error when a pte in a 328b5810039SNick Piggin * !VM_RESERVED region is found pointing to an invalid pfn (which 329b5810039SNick Piggin * is an error. 330b5810039SNick Piggin * 331b5810039SNick Piggin * The calling function must still handle the error. 332b5810039SNick Piggin */ 333b5810039SNick Piggin void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) 334b5810039SNick Piggin { 335b5810039SNick Piggin printk(KERN_ERR "Bad pte = %08llx, process = %s, " 336b5810039SNick Piggin "vm_flags = %lx, vaddr = %lx\n", 337b5810039SNick Piggin (long long)pte_val(pte), 338b5810039SNick Piggin (vma->vm_mm == current->mm ? current->comm : "???"), 339b5810039SNick Piggin vma->vm_flags, vaddr); 340b5810039SNick Piggin dump_stack(); 341b5810039SNick Piggin } 342b5810039SNick Piggin 343b5810039SNick Piggin /* 3441da177e4SLinus Torvalds * copy one vm_area from one task to the other. Assumes the page tables 3451da177e4SLinus Torvalds * already present in the new task to be cleared in the whole range 3461da177e4SLinus Torvalds * covered by this vma. 3471da177e4SLinus Torvalds * 3481da177e4SLinus Torvalds * dst->page_table_lock is held on entry and exit, 3491da177e4SLinus Torvalds * but may be dropped within p[mg]d_alloc() and pte_alloc_map(). 3501da177e4SLinus Torvalds */ 3511da177e4SLinus Torvalds 3528c103762SHugh Dickins static inline void 3531da177e4SLinus Torvalds copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, 354b5810039SNick Piggin pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, 3558c103762SHugh Dickins unsigned long addr, int *rss) 3561da177e4SLinus Torvalds { 357b5810039SNick Piggin unsigned long vm_flags = vma->vm_flags; 3581da177e4SLinus Torvalds pte_t pte = *src_pte; 3591da177e4SLinus Torvalds struct page *page; 3601da177e4SLinus Torvalds unsigned long pfn; 3611da177e4SLinus Torvalds 3621da177e4SLinus Torvalds /* pte contains position in swap or file, so copy. */ 3631da177e4SLinus Torvalds if (unlikely(!pte_present(pte))) { 3641da177e4SLinus Torvalds if (!pte_file(pte)) { 3651da177e4SLinus Torvalds swap_duplicate(pte_to_swp_entry(pte)); 3661da177e4SLinus Torvalds /* make sure dst_mm is on swapoff's mmlist. */ 3671da177e4SLinus Torvalds if (unlikely(list_empty(&dst_mm->mmlist))) { 3681da177e4SLinus Torvalds spin_lock(&mmlist_lock); 3691da177e4SLinus Torvalds list_add(&dst_mm->mmlist, &src_mm->mmlist); 3701da177e4SLinus Torvalds spin_unlock(&mmlist_lock); 3711da177e4SLinus Torvalds } 3721da177e4SLinus Torvalds } 373ae859762SHugh Dickins goto out_set_pte; 3741da177e4SLinus Torvalds } 3751da177e4SLinus Torvalds 376b5810039SNick Piggin /* If the region is VM_RESERVED, the mapping is not 377b5810039SNick Piggin * mapped via rmap - duplicate the pte as is. 3781da177e4SLinus Torvalds */ 379b5810039SNick Piggin if (vm_flags & VM_RESERVED) 380ae859762SHugh Dickins goto out_set_pte; 3811da177e4SLinus Torvalds 382b5810039SNick Piggin pfn = pte_pfn(pte); 383b5810039SNick Piggin /* If the pte points outside of valid memory but 384b5810039SNick Piggin * the region is not VM_RESERVED, we have a problem. 385b5810039SNick Piggin */ 386b5810039SNick Piggin if (unlikely(!pfn_valid(pfn))) { 387b5810039SNick Piggin print_bad_pte(vma, pte, addr); 388b5810039SNick Piggin goto out_set_pte; /* try to do something sane */ 389b5810039SNick Piggin } 390b5810039SNick Piggin 391b5810039SNick Piggin page = pfn_to_page(pfn); 392b5810039SNick Piggin 3931da177e4SLinus Torvalds /* 3941da177e4SLinus Torvalds * If it's a COW mapping, write protect it both 3951da177e4SLinus Torvalds * in the parent and the child 3961da177e4SLinus Torvalds */ 3971da177e4SLinus Torvalds if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) { 3981da177e4SLinus Torvalds ptep_set_wrprotect(src_mm, addr, src_pte); 3991da177e4SLinus Torvalds pte = *src_pte; 4001da177e4SLinus Torvalds } 4011da177e4SLinus Torvalds 4021da177e4SLinus Torvalds /* 4031da177e4SLinus Torvalds * If it's a shared mapping, mark it clean in 4041da177e4SLinus Torvalds * the child 4051da177e4SLinus Torvalds */ 4061da177e4SLinus Torvalds if (vm_flags & VM_SHARED) 4071da177e4SLinus Torvalds pte = pte_mkclean(pte); 4081da177e4SLinus Torvalds pte = pte_mkold(pte); 4091da177e4SLinus Torvalds get_page(page); 4101da177e4SLinus Torvalds page_dup_rmap(page); 4118c103762SHugh Dickins rss[!!PageAnon(page)]++; 412ae859762SHugh Dickins 413ae859762SHugh Dickins out_set_pte: 414ae859762SHugh Dickins set_pte_at(dst_mm, addr, dst_pte, pte); 4151da177e4SLinus Torvalds } 4161da177e4SLinus Torvalds 4171da177e4SLinus Torvalds static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 4181da177e4SLinus Torvalds pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, 4191da177e4SLinus Torvalds unsigned long addr, unsigned long end) 4201da177e4SLinus Torvalds { 4211da177e4SLinus Torvalds pte_t *src_pte, *dst_pte; 422e040f218SHugh Dickins int progress = 0; 4238c103762SHugh Dickins int rss[2]; 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds again: 426ae859762SHugh Dickins rss[1] = rss[0] = 0; 4271da177e4SLinus Torvalds dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr); 4281da177e4SLinus Torvalds if (!dst_pte) 4291da177e4SLinus Torvalds return -ENOMEM; 4301da177e4SLinus Torvalds src_pte = pte_offset_map_nested(src_pmd, addr); 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds spin_lock(&src_mm->page_table_lock); 4331da177e4SLinus Torvalds do { 4341da177e4SLinus Torvalds /* 4351da177e4SLinus Torvalds * We are holding two locks at this point - either of them 4361da177e4SLinus Torvalds * could generate latencies in another task on another CPU. 4371da177e4SLinus Torvalds */ 438e040f218SHugh Dickins if (progress >= 32) { 439e040f218SHugh Dickins progress = 0; 440e040f218SHugh Dickins if (need_resched() || 4411da177e4SLinus Torvalds need_lockbreak(&src_mm->page_table_lock) || 442e040f218SHugh Dickins need_lockbreak(&dst_mm->page_table_lock)) 4431da177e4SLinus Torvalds break; 444e040f218SHugh Dickins } 4451da177e4SLinus Torvalds if (pte_none(*src_pte)) { 4461da177e4SLinus Torvalds progress++; 4471da177e4SLinus Torvalds continue; 4481da177e4SLinus Torvalds } 4498c103762SHugh Dickins copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); 4501da177e4SLinus Torvalds progress += 8; 4511da177e4SLinus Torvalds } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); 4521da177e4SLinus Torvalds spin_unlock(&src_mm->page_table_lock); 4531da177e4SLinus Torvalds 4541da177e4SLinus Torvalds pte_unmap_nested(src_pte - 1); 4551da177e4SLinus Torvalds pte_unmap(dst_pte - 1); 456ae859762SHugh Dickins add_mm_rss(dst_mm, rss[0], rss[1]); 4571da177e4SLinus Torvalds cond_resched_lock(&dst_mm->page_table_lock); 4581da177e4SLinus Torvalds if (addr != end) 4591da177e4SLinus Torvalds goto again; 4601da177e4SLinus Torvalds return 0; 4611da177e4SLinus Torvalds } 4621da177e4SLinus Torvalds 4631da177e4SLinus Torvalds static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 4641da177e4SLinus Torvalds pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma, 4651da177e4SLinus Torvalds unsigned long addr, unsigned long end) 4661da177e4SLinus Torvalds { 4671da177e4SLinus Torvalds pmd_t *src_pmd, *dst_pmd; 4681da177e4SLinus Torvalds unsigned long next; 4691da177e4SLinus Torvalds 4701da177e4SLinus Torvalds dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); 4711da177e4SLinus Torvalds if (!dst_pmd) 4721da177e4SLinus Torvalds return -ENOMEM; 4731da177e4SLinus Torvalds src_pmd = pmd_offset(src_pud, addr); 4741da177e4SLinus Torvalds do { 4751da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 4761da177e4SLinus Torvalds if (pmd_none_or_clear_bad(src_pmd)) 4771da177e4SLinus Torvalds continue; 4781da177e4SLinus Torvalds if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, 4791da177e4SLinus Torvalds vma, addr, next)) 4801da177e4SLinus Torvalds return -ENOMEM; 4811da177e4SLinus Torvalds } while (dst_pmd++, src_pmd++, addr = next, addr != end); 4821da177e4SLinus Torvalds return 0; 4831da177e4SLinus Torvalds } 4841da177e4SLinus Torvalds 4851da177e4SLinus Torvalds static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 4861da177e4SLinus Torvalds pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, 4871da177e4SLinus Torvalds unsigned long addr, unsigned long end) 4881da177e4SLinus Torvalds { 4891da177e4SLinus Torvalds pud_t *src_pud, *dst_pud; 4901da177e4SLinus Torvalds unsigned long next; 4911da177e4SLinus Torvalds 4921da177e4SLinus Torvalds dst_pud = pud_alloc(dst_mm, dst_pgd, addr); 4931da177e4SLinus Torvalds if (!dst_pud) 4941da177e4SLinus Torvalds return -ENOMEM; 4951da177e4SLinus Torvalds src_pud = pud_offset(src_pgd, addr); 4961da177e4SLinus Torvalds do { 4971da177e4SLinus Torvalds next = pud_addr_end(addr, end); 4981da177e4SLinus Torvalds if (pud_none_or_clear_bad(src_pud)) 4991da177e4SLinus Torvalds continue; 5001da177e4SLinus Torvalds if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, 5011da177e4SLinus Torvalds vma, addr, next)) 5021da177e4SLinus Torvalds return -ENOMEM; 5031da177e4SLinus Torvalds } while (dst_pud++, src_pud++, addr = next, addr != end); 5041da177e4SLinus Torvalds return 0; 5051da177e4SLinus Torvalds } 5061da177e4SLinus Torvalds 5071da177e4SLinus Torvalds int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 5081da177e4SLinus Torvalds struct vm_area_struct *vma) 5091da177e4SLinus Torvalds { 5101da177e4SLinus Torvalds pgd_t *src_pgd, *dst_pgd; 5111da177e4SLinus Torvalds unsigned long next; 5121da177e4SLinus Torvalds unsigned long addr = vma->vm_start; 5131da177e4SLinus Torvalds unsigned long end = vma->vm_end; 5141da177e4SLinus Torvalds 515d992895bSNick Piggin /* 516d992895bSNick Piggin * Don't copy ptes where a page fault will fill them correctly. 517d992895bSNick Piggin * Fork becomes much lighter when there are big shared or private 518d992895bSNick Piggin * readonly mappings. The tradeoff is that copy_page_range is more 519d992895bSNick Piggin * efficient than faulting. 520d992895bSNick Piggin */ 521d992895bSNick Piggin if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { 522d992895bSNick Piggin if (!vma->anon_vma) 523d992895bSNick Piggin return 0; 524d992895bSNick Piggin } 525d992895bSNick Piggin 5261da177e4SLinus Torvalds if (is_vm_hugetlb_page(vma)) 5271da177e4SLinus Torvalds return copy_hugetlb_page_range(dst_mm, src_mm, vma); 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds dst_pgd = pgd_offset(dst_mm, addr); 5301da177e4SLinus Torvalds src_pgd = pgd_offset(src_mm, addr); 5311da177e4SLinus Torvalds do { 5321da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 5331da177e4SLinus Torvalds if (pgd_none_or_clear_bad(src_pgd)) 5341da177e4SLinus Torvalds continue; 5351da177e4SLinus Torvalds if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, 5361da177e4SLinus Torvalds vma, addr, next)) 5371da177e4SLinus Torvalds return -ENOMEM; 5381da177e4SLinus Torvalds } while (dst_pgd++, src_pgd++, addr = next, addr != end); 5391da177e4SLinus Torvalds return 0; 5401da177e4SLinus Torvalds } 5411da177e4SLinus Torvalds 542b5810039SNick Piggin static void zap_pte_range(struct mmu_gather *tlb, 543b5810039SNick Piggin struct vm_area_struct *vma, pmd_t *pmd, 5441da177e4SLinus Torvalds unsigned long addr, unsigned long end, 5451da177e4SLinus Torvalds struct zap_details *details) 5461da177e4SLinus Torvalds { 547b5810039SNick Piggin struct mm_struct *mm = tlb->mm; 5481da177e4SLinus Torvalds pte_t *pte; 549ae859762SHugh Dickins int file_rss = 0; 550ae859762SHugh Dickins int anon_rss = 0; 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds pte = pte_offset_map(pmd, addr); 5531da177e4SLinus Torvalds do { 5541da177e4SLinus Torvalds pte_t ptent = *pte; 5551da177e4SLinus Torvalds if (pte_none(ptent)) 5561da177e4SLinus Torvalds continue; 5571da177e4SLinus Torvalds if (pte_present(ptent)) { 5581da177e4SLinus Torvalds struct page *page = NULL; 559b5810039SNick Piggin if (!(vma->vm_flags & VM_RESERVED)) { 5601da177e4SLinus Torvalds unsigned long pfn = pte_pfn(ptent); 561b5810039SNick Piggin if (unlikely(!pfn_valid(pfn))) 562b5810039SNick Piggin print_bad_pte(vma, ptent, addr); 563b5810039SNick Piggin else 5641da177e4SLinus Torvalds page = pfn_to_page(pfn); 5651da177e4SLinus Torvalds } 5661da177e4SLinus Torvalds if (unlikely(details) && page) { 5671da177e4SLinus Torvalds /* 5681da177e4SLinus Torvalds * unmap_shared_mapping_pages() wants to 5691da177e4SLinus Torvalds * invalidate cache without truncating: 5701da177e4SLinus Torvalds * unmap shared but keep private pages. 5711da177e4SLinus Torvalds */ 5721da177e4SLinus Torvalds if (details->check_mapping && 5731da177e4SLinus Torvalds details->check_mapping != page->mapping) 5741da177e4SLinus Torvalds continue; 5751da177e4SLinus Torvalds /* 5761da177e4SLinus Torvalds * Each page->index must be checked when 5771da177e4SLinus Torvalds * invalidating or truncating nonlinear. 5781da177e4SLinus Torvalds */ 5791da177e4SLinus Torvalds if (details->nonlinear_vma && 5801da177e4SLinus Torvalds (page->index < details->first_index || 5811da177e4SLinus Torvalds page->index > details->last_index)) 5821da177e4SLinus Torvalds continue; 5831da177e4SLinus Torvalds } 584b5810039SNick Piggin ptent = ptep_get_and_clear_full(mm, addr, pte, 585a600388dSZachary Amsden tlb->fullmm); 5861da177e4SLinus Torvalds tlb_remove_tlb_entry(tlb, pte, addr); 5871da177e4SLinus Torvalds if (unlikely(!page)) 5881da177e4SLinus Torvalds continue; 5891da177e4SLinus Torvalds if (unlikely(details) && details->nonlinear_vma 5901da177e4SLinus Torvalds && linear_page_index(details->nonlinear_vma, 5911da177e4SLinus Torvalds addr) != page->index) 592b5810039SNick Piggin set_pte_at(mm, addr, pte, 5931da177e4SLinus Torvalds pgoff_to_pte(page->index)); 5941da177e4SLinus Torvalds if (PageAnon(page)) 59586d912f4SHugh Dickins anon_rss--; 5966237bcd9SHugh Dickins else { 5976237bcd9SHugh Dickins if (pte_dirty(ptent)) 5986237bcd9SHugh Dickins set_page_dirty(page); 5996237bcd9SHugh Dickins if (pte_young(ptent)) 6001da177e4SLinus Torvalds mark_page_accessed(page); 60186d912f4SHugh Dickins file_rss--; 6026237bcd9SHugh Dickins } 6031da177e4SLinus Torvalds page_remove_rmap(page); 6041da177e4SLinus Torvalds tlb_remove_page(tlb, page); 6051da177e4SLinus Torvalds continue; 6061da177e4SLinus Torvalds } 6071da177e4SLinus Torvalds /* 6081da177e4SLinus Torvalds * If details->check_mapping, we leave swap entries; 6091da177e4SLinus Torvalds * if details->nonlinear_vma, we leave file entries. 6101da177e4SLinus Torvalds */ 6111da177e4SLinus Torvalds if (unlikely(details)) 6121da177e4SLinus Torvalds continue; 6131da177e4SLinus Torvalds if (!pte_file(ptent)) 6141da177e4SLinus Torvalds free_swap_and_cache(pte_to_swp_entry(ptent)); 615b5810039SNick Piggin pte_clear_full(mm, addr, pte, tlb->fullmm); 6161da177e4SLinus Torvalds } while (pte++, addr += PAGE_SIZE, addr != end); 617ae859762SHugh Dickins 61886d912f4SHugh Dickins add_mm_rss(mm, file_rss, anon_rss); 6191da177e4SLinus Torvalds pte_unmap(pte - 1); 6201da177e4SLinus Torvalds } 6211da177e4SLinus Torvalds 622b5810039SNick Piggin static inline void zap_pmd_range(struct mmu_gather *tlb, 623b5810039SNick Piggin struct vm_area_struct *vma, pud_t *pud, 6241da177e4SLinus Torvalds unsigned long addr, unsigned long end, 6251da177e4SLinus Torvalds struct zap_details *details) 6261da177e4SLinus Torvalds { 6271da177e4SLinus Torvalds pmd_t *pmd; 6281da177e4SLinus Torvalds unsigned long next; 6291da177e4SLinus Torvalds 6301da177e4SLinus Torvalds pmd = pmd_offset(pud, addr); 6311da177e4SLinus Torvalds do { 6321da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 6331da177e4SLinus Torvalds if (pmd_none_or_clear_bad(pmd)) 6341da177e4SLinus Torvalds continue; 635b5810039SNick Piggin zap_pte_range(tlb, vma, pmd, addr, next, details); 6361da177e4SLinus Torvalds } while (pmd++, addr = next, addr != end); 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds 639b5810039SNick Piggin static inline void zap_pud_range(struct mmu_gather *tlb, 640b5810039SNick Piggin struct vm_area_struct *vma, pgd_t *pgd, 6411da177e4SLinus Torvalds unsigned long addr, unsigned long end, 6421da177e4SLinus Torvalds struct zap_details *details) 6431da177e4SLinus Torvalds { 6441da177e4SLinus Torvalds pud_t *pud; 6451da177e4SLinus Torvalds unsigned long next; 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds pud = pud_offset(pgd, addr); 6481da177e4SLinus Torvalds do { 6491da177e4SLinus Torvalds next = pud_addr_end(addr, end); 6501da177e4SLinus Torvalds if (pud_none_or_clear_bad(pud)) 6511da177e4SLinus Torvalds continue; 652b5810039SNick Piggin zap_pmd_range(tlb, vma, pud, addr, next, details); 6531da177e4SLinus Torvalds } while (pud++, addr = next, addr != end); 6541da177e4SLinus Torvalds } 6551da177e4SLinus Torvalds 6561da177e4SLinus Torvalds static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, 6571da177e4SLinus Torvalds unsigned long addr, unsigned long end, 6581da177e4SLinus Torvalds struct zap_details *details) 6591da177e4SLinus Torvalds { 6601da177e4SLinus Torvalds pgd_t *pgd; 6611da177e4SLinus Torvalds unsigned long next; 6621da177e4SLinus Torvalds 6631da177e4SLinus Torvalds if (details && !details->check_mapping && !details->nonlinear_vma) 6641da177e4SLinus Torvalds details = NULL; 6651da177e4SLinus Torvalds 6661da177e4SLinus Torvalds BUG_ON(addr >= end); 6671da177e4SLinus Torvalds tlb_start_vma(tlb, vma); 6681da177e4SLinus Torvalds pgd = pgd_offset(vma->vm_mm, addr); 6691da177e4SLinus Torvalds do { 6701da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 6711da177e4SLinus Torvalds if (pgd_none_or_clear_bad(pgd)) 6721da177e4SLinus Torvalds continue; 673b5810039SNick Piggin zap_pud_range(tlb, vma, pgd, addr, next, details); 6741da177e4SLinus Torvalds } while (pgd++, addr = next, addr != end); 6751da177e4SLinus Torvalds tlb_end_vma(tlb, vma); 6761da177e4SLinus Torvalds } 6771da177e4SLinus Torvalds 6781da177e4SLinus Torvalds #ifdef CONFIG_PREEMPT 6791da177e4SLinus Torvalds # define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) 6801da177e4SLinus Torvalds #else 6811da177e4SLinus Torvalds /* No preempt: go for improved straight-line efficiency */ 6821da177e4SLinus Torvalds # define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) 6831da177e4SLinus Torvalds #endif 6841da177e4SLinus Torvalds 6851da177e4SLinus Torvalds /** 6861da177e4SLinus Torvalds * unmap_vmas - unmap a range of memory covered by a list of vma's 6871da177e4SLinus Torvalds * @tlbp: address of the caller's struct mmu_gather 6881da177e4SLinus Torvalds * @mm: the controlling mm_struct 6891da177e4SLinus Torvalds * @vma: the starting vma 6901da177e4SLinus Torvalds * @start_addr: virtual address at which to start unmapping 6911da177e4SLinus Torvalds * @end_addr: virtual address at which to end unmapping 6921da177e4SLinus Torvalds * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here 6931da177e4SLinus Torvalds * @details: details of nonlinear truncation or shared cache invalidation 6941da177e4SLinus Torvalds * 695ee39b37bSHugh Dickins * Returns the end address of the unmapping (restart addr if interrupted). 6961da177e4SLinus Torvalds * 6971da177e4SLinus Torvalds * Unmap all pages in the vma list. Called under page_table_lock. 6981da177e4SLinus Torvalds * 6991da177e4SLinus Torvalds * We aim to not hold page_table_lock for too long (for scheduling latency 7001da177e4SLinus Torvalds * reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to 7011da177e4SLinus Torvalds * return the ending mmu_gather to the caller. 7021da177e4SLinus Torvalds * 7031da177e4SLinus Torvalds * Only addresses between `start' and `end' will be unmapped. 7041da177e4SLinus Torvalds * 7051da177e4SLinus Torvalds * The VMA list must be sorted in ascending virtual address order. 7061da177e4SLinus Torvalds * 7071da177e4SLinus Torvalds * unmap_vmas() assumes that the caller will flush the whole unmapped address 7081da177e4SLinus Torvalds * range after unmap_vmas() returns. So the only responsibility here is to 7091da177e4SLinus Torvalds * ensure that any thus-far unmapped pages are flushed before unmap_vmas() 7101da177e4SLinus Torvalds * drops the lock and schedules. 7111da177e4SLinus Torvalds */ 712ee39b37bSHugh Dickins unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, 7131da177e4SLinus Torvalds struct vm_area_struct *vma, unsigned long start_addr, 7141da177e4SLinus Torvalds unsigned long end_addr, unsigned long *nr_accounted, 7151da177e4SLinus Torvalds struct zap_details *details) 7161da177e4SLinus Torvalds { 7171da177e4SLinus Torvalds unsigned long zap_bytes = ZAP_BLOCK_SIZE; 7181da177e4SLinus Torvalds unsigned long tlb_start = 0; /* For tlb_finish_mmu */ 7191da177e4SLinus Torvalds int tlb_start_valid = 0; 720ee39b37bSHugh Dickins unsigned long start = start_addr; 7211da177e4SLinus Torvalds spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; 7224d6ddfa9SHugh Dickins int fullmm = (*tlbp)->fullmm; 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { 7251da177e4SLinus Torvalds unsigned long end; 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds start = max(vma->vm_start, start_addr); 7281da177e4SLinus Torvalds if (start >= vma->vm_end) 7291da177e4SLinus Torvalds continue; 7301da177e4SLinus Torvalds end = min(vma->vm_end, end_addr); 7311da177e4SLinus Torvalds if (end <= vma->vm_start) 7321da177e4SLinus Torvalds continue; 7331da177e4SLinus Torvalds 7341da177e4SLinus Torvalds if (vma->vm_flags & VM_ACCOUNT) 7351da177e4SLinus Torvalds *nr_accounted += (end - start) >> PAGE_SHIFT; 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds while (start != end) { 7381da177e4SLinus Torvalds unsigned long block; 7391da177e4SLinus Torvalds 7401da177e4SLinus Torvalds if (!tlb_start_valid) { 7411da177e4SLinus Torvalds tlb_start = start; 7421da177e4SLinus Torvalds tlb_start_valid = 1; 7431da177e4SLinus Torvalds } 7441da177e4SLinus Torvalds 7451da177e4SLinus Torvalds if (is_vm_hugetlb_page(vma)) { 7461da177e4SLinus Torvalds block = end - start; 7471da177e4SLinus Torvalds unmap_hugepage_range(vma, start, end); 7481da177e4SLinus Torvalds } else { 7491da177e4SLinus Torvalds block = min(zap_bytes, end - start); 7501da177e4SLinus Torvalds unmap_page_range(*tlbp, vma, start, 7511da177e4SLinus Torvalds start + block, details); 7521da177e4SLinus Torvalds } 7531da177e4SLinus Torvalds 7541da177e4SLinus Torvalds start += block; 7551da177e4SLinus Torvalds zap_bytes -= block; 7561da177e4SLinus Torvalds if ((long)zap_bytes > 0) 7571da177e4SLinus Torvalds continue; 7581da177e4SLinus Torvalds 7591da177e4SLinus Torvalds tlb_finish_mmu(*tlbp, tlb_start, start); 7601da177e4SLinus Torvalds 7611da177e4SLinus Torvalds if (need_resched() || 7621da177e4SLinus Torvalds need_lockbreak(&mm->page_table_lock) || 7631da177e4SLinus Torvalds (i_mmap_lock && need_lockbreak(i_mmap_lock))) { 7641da177e4SLinus Torvalds if (i_mmap_lock) { 7651da177e4SLinus Torvalds /* must reset count of rss freed */ 7661da177e4SLinus Torvalds *tlbp = tlb_gather_mmu(mm, fullmm); 7671da177e4SLinus Torvalds goto out; 7681da177e4SLinus Torvalds } 7691da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 7701da177e4SLinus Torvalds cond_resched(); 7711da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 7721da177e4SLinus Torvalds } 7731da177e4SLinus Torvalds 7741da177e4SLinus Torvalds *tlbp = tlb_gather_mmu(mm, fullmm); 7751da177e4SLinus Torvalds tlb_start_valid = 0; 7761da177e4SLinus Torvalds zap_bytes = ZAP_BLOCK_SIZE; 7771da177e4SLinus Torvalds } 7781da177e4SLinus Torvalds } 7791da177e4SLinus Torvalds out: 780ee39b37bSHugh Dickins return start; /* which is now the end (or restart) address */ 7811da177e4SLinus Torvalds } 7821da177e4SLinus Torvalds 7831da177e4SLinus Torvalds /** 7841da177e4SLinus Torvalds * zap_page_range - remove user pages in a given range 7851da177e4SLinus Torvalds * @vma: vm_area_struct holding the applicable pages 7861da177e4SLinus Torvalds * @address: starting address of pages to zap 7871da177e4SLinus Torvalds * @size: number of bytes to zap 7881da177e4SLinus Torvalds * @details: details of nonlinear truncation or shared cache invalidation 7891da177e4SLinus Torvalds */ 790ee39b37bSHugh Dickins unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 7911da177e4SLinus Torvalds unsigned long size, struct zap_details *details) 7921da177e4SLinus Torvalds { 7931da177e4SLinus Torvalds struct mm_struct *mm = vma->vm_mm; 7941da177e4SLinus Torvalds struct mmu_gather *tlb; 7951da177e4SLinus Torvalds unsigned long end = address + size; 7961da177e4SLinus Torvalds unsigned long nr_accounted = 0; 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds if (is_vm_hugetlb_page(vma)) { 7991da177e4SLinus Torvalds zap_hugepage_range(vma, address, size); 800ee39b37bSHugh Dickins return end; 8011da177e4SLinus Torvalds } 8021da177e4SLinus Torvalds 8031da177e4SLinus Torvalds lru_add_drain(); 8041da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 8051da177e4SLinus Torvalds tlb = tlb_gather_mmu(mm, 0); 806365e9c87SHugh Dickins update_hiwater_rss(mm); 807ee39b37bSHugh Dickins end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); 8081da177e4SLinus Torvalds tlb_finish_mmu(tlb, address, end); 8091da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 810ee39b37bSHugh Dickins return end; 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds /* 8141da177e4SLinus Torvalds * Do a quick page-table lookup for a single page. 8151da177e4SLinus Torvalds * mm->page_table_lock must be held. 8161da177e4SLinus Torvalds */ 8171aaf18ffSAndrew Morton static struct page *__follow_page(struct mm_struct *mm, unsigned long address, 8181aaf18ffSAndrew Morton int read, int write, int accessed) 8191da177e4SLinus Torvalds { 8201da177e4SLinus Torvalds pgd_t *pgd; 8211da177e4SLinus Torvalds pud_t *pud; 8221da177e4SLinus Torvalds pmd_t *pmd; 8231da177e4SLinus Torvalds pte_t *ptep, pte; 8241da177e4SLinus Torvalds unsigned long pfn; 8251da177e4SLinus Torvalds struct page *page; 8261da177e4SLinus Torvalds 8271da177e4SLinus Torvalds page = follow_huge_addr(mm, address, write); 8281da177e4SLinus Torvalds if (! IS_ERR(page)) 8291da177e4SLinus Torvalds return page; 8301da177e4SLinus Torvalds 8311da177e4SLinus Torvalds pgd = pgd_offset(mm, address); 8321da177e4SLinus Torvalds if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 8331da177e4SLinus Torvalds goto out; 8341da177e4SLinus Torvalds 8351da177e4SLinus Torvalds pud = pud_offset(pgd, address); 8361da177e4SLinus Torvalds if (pud_none(*pud) || unlikely(pud_bad(*pud))) 8371da177e4SLinus Torvalds goto out; 8381da177e4SLinus Torvalds 8391da177e4SLinus Torvalds pmd = pmd_offset(pud, address); 8401da177e4SLinus Torvalds if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 8411da177e4SLinus Torvalds goto out; 8421da177e4SLinus Torvalds if (pmd_huge(*pmd)) 8431da177e4SLinus Torvalds return follow_huge_pmd(mm, address, pmd, write); 8441da177e4SLinus Torvalds 8451da177e4SLinus Torvalds ptep = pte_offset_map(pmd, address); 8461da177e4SLinus Torvalds if (!ptep) 8471da177e4SLinus Torvalds goto out; 8481da177e4SLinus Torvalds 8491da177e4SLinus Torvalds pte = *ptep; 8501da177e4SLinus Torvalds pte_unmap(ptep); 8511da177e4SLinus Torvalds if (pte_present(pte)) { 852f33ea7f4SNick Piggin if (write && !pte_write(pte)) 8531da177e4SLinus Torvalds goto out; 8541da177e4SLinus Torvalds if (read && !pte_read(pte)) 8551da177e4SLinus Torvalds goto out; 8561da177e4SLinus Torvalds pfn = pte_pfn(pte); 8571da177e4SLinus Torvalds if (pfn_valid(pfn)) { 8581da177e4SLinus Torvalds page = pfn_to_page(pfn); 859f33ea7f4SNick Piggin if (accessed) { 860f33ea7f4SNick Piggin if (write && !pte_dirty(pte) &&!PageDirty(page)) 861f33ea7f4SNick Piggin set_page_dirty(page); 8621da177e4SLinus Torvalds mark_page_accessed(page); 863f33ea7f4SNick Piggin } 8641da177e4SLinus Torvalds return page; 8651da177e4SLinus Torvalds } 8661da177e4SLinus Torvalds } 8671da177e4SLinus Torvalds 8681da177e4SLinus Torvalds out: 8691da177e4SLinus Torvalds return NULL; 8701da177e4SLinus Torvalds } 8711da177e4SLinus Torvalds 8721aaf18ffSAndrew Morton inline struct page * 8731da177e4SLinus Torvalds follow_page(struct mm_struct *mm, unsigned long address, int write) 8741da177e4SLinus Torvalds { 8751aaf18ffSAndrew Morton return __follow_page(mm, address, 0, write, 1); 8761da177e4SLinus Torvalds } 8771da177e4SLinus Torvalds 8781aaf18ffSAndrew Morton /* 8791aaf18ffSAndrew Morton * check_user_page_readable() can be called frm niterrupt context by oprofile, 8801aaf18ffSAndrew Morton * so we need to avoid taking any non-irq-safe locks 8811aaf18ffSAndrew Morton */ 8821aaf18ffSAndrew Morton int check_user_page_readable(struct mm_struct *mm, unsigned long address) 8831da177e4SLinus Torvalds { 8841aaf18ffSAndrew Morton return __follow_page(mm, address, 1, 0, 0) != NULL; 8851da177e4SLinus Torvalds } 8861da177e4SLinus Torvalds EXPORT_SYMBOL(check_user_page_readable); 8871da177e4SLinus Torvalds 8881da177e4SLinus Torvalds static inline int 8891da177e4SLinus Torvalds untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma, 8901da177e4SLinus Torvalds unsigned long address) 8911da177e4SLinus Torvalds { 8921da177e4SLinus Torvalds pgd_t *pgd; 8931da177e4SLinus Torvalds pud_t *pud; 8941da177e4SLinus Torvalds pmd_t *pmd; 8951da177e4SLinus Torvalds 8961da177e4SLinus Torvalds /* Check if the vma is for an anonymous mapping. */ 8971da177e4SLinus Torvalds if (vma->vm_ops && vma->vm_ops->nopage) 8981da177e4SLinus Torvalds return 0; 8991da177e4SLinus Torvalds 9001da177e4SLinus Torvalds /* Check if page directory entry exists. */ 9011da177e4SLinus Torvalds pgd = pgd_offset(mm, address); 9021da177e4SLinus Torvalds if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 9031da177e4SLinus Torvalds return 1; 9041da177e4SLinus Torvalds 9051da177e4SLinus Torvalds pud = pud_offset(pgd, address); 9061da177e4SLinus Torvalds if (pud_none(*pud) || unlikely(pud_bad(*pud))) 9071da177e4SLinus Torvalds return 1; 9081da177e4SLinus Torvalds 9091da177e4SLinus Torvalds /* Check if page middle directory entry exists. */ 9101da177e4SLinus Torvalds pmd = pmd_offset(pud, address); 9111da177e4SLinus Torvalds if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 9121da177e4SLinus Torvalds return 1; 9131da177e4SLinus Torvalds 9141da177e4SLinus Torvalds /* There is a pte slot for 'address' in 'mm'. */ 9151da177e4SLinus Torvalds return 0; 9161da177e4SLinus Torvalds } 9171da177e4SLinus Torvalds 9181da177e4SLinus Torvalds int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 9191da177e4SLinus Torvalds unsigned long start, int len, int write, int force, 9201da177e4SLinus Torvalds struct page **pages, struct vm_area_struct **vmas) 9211da177e4SLinus Torvalds { 9221da177e4SLinus Torvalds int i; 9231da177e4SLinus Torvalds unsigned int flags; 9241da177e4SLinus Torvalds 9251da177e4SLinus Torvalds /* 9261da177e4SLinus Torvalds * Require read or write permissions. 9271da177e4SLinus Torvalds * If 'force' is set, we only require the "MAY" flags. 9281da177e4SLinus Torvalds */ 9291da177e4SLinus Torvalds flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); 9301da177e4SLinus Torvalds flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 9311da177e4SLinus Torvalds i = 0; 9321da177e4SLinus Torvalds 9331da177e4SLinus Torvalds do { 9341da177e4SLinus Torvalds struct vm_area_struct * vma; 9351da177e4SLinus Torvalds 9361da177e4SLinus Torvalds vma = find_extend_vma(mm, start); 9371da177e4SLinus Torvalds if (!vma && in_gate_area(tsk, start)) { 9381da177e4SLinus Torvalds unsigned long pg = start & PAGE_MASK; 9391da177e4SLinus Torvalds struct vm_area_struct *gate_vma = get_gate_vma(tsk); 9401da177e4SLinus Torvalds pgd_t *pgd; 9411da177e4SLinus Torvalds pud_t *pud; 9421da177e4SLinus Torvalds pmd_t *pmd; 9431da177e4SLinus Torvalds pte_t *pte; 9441da177e4SLinus Torvalds if (write) /* user gate pages are read-only */ 9451da177e4SLinus Torvalds return i ? : -EFAULT; 9461da177e4SLinus Torvalds if (pg > TASK_SIZE) 9471da177e4SLinus Torvalds pgd = pgd_offset_k(pg); 9481da177e4SLinus Torvalds else 9491da177e4SLinus Torvalds pgd = pgd_offset_gate(mm, pg); 9501da177e4SLinus Torvalds BUG_ON(pgd_none(*pgd)); 9511da177e4SLinus Torvalds pud = pud_offset(pgd, pg); 9521da177e4SLinus Torvalds BUG_ON(pud_none(*pud)); 9531da177e4SLinus Torvalds pmd = pmd_offset(pud, pg); 954690dbe1cSHugh Dickins if (pmd_none(*pmd)) 955690dbe1cSHugh Dickins return i ? : -EFAULT; 9561da177e4SLinus Torvalds pte = pte_offset_map(pmd, pg); 957690dbe1cSHugh Dickins if (pte_none(*pte)) { 958690dbe1cSHugh Dickins pte_unmap(pte); 959690dbe1cSHugh Dickins return i ? : -EFAULT; 960690dbe1cSHugh Dickins } 9611da177e4SLinus Torvalds if (pages) { 9621da177e4SLinus Torvalds pages[i] = pte_page(*pte); 9631da177e4SLinus Torvalds get_page(pages[i]); 9641da177e4SLinus Torvalds } 9651da177e4SLinus Torvalds pte_unmap(pte); 9661da177e4SLinus Torvalds if (vmas) 9671da177e4SLinus Torvalds vmas[i] = gate_vma; 9681da177e4SLinus Torvalds i++; 9691da177e4SLinus Torvalds start += PAGE_SIZE; 9701da177e4SLinus Torvalds len--; 9711da177e4SLinus Torvalds continue; 9721da177e4SLinus Torvalds } 9731da177e4SLinus Torvalds 974b5810039SNick Piggin if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED)) 9751da177e4SLinus Torvalds || !(flags & vma->vm_flags)) 9761da177e4SLinus Torvalds return i ? : -EFAULT; 9771da177e4SLinus Torvalds 9781da177e4SLinus Torvalds if (is_vm_hugetlb_page(vma)) { 9791da177e4SLinus Torvalds i = follow_hugetlb_page(mm, vma, pages, vmas, 9801da177e4SLinus Torvalds &start, &len, i); 9811da177e4SLinus Torvalds continue; 9821da177e4SLinus Torvalds } 9831da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 9841da177e4SLinus Torvalds do { 985f33ea7f4SNick Piggin int write_access = write; 98608ef4729SHugh Dickins struct page *page; 9871da177e4SLinus Torvalds 9881da177e4SLinus Torvalds cond_resched_lock(&mm->page_table_lock); 989f33ea7f4SNick Piggin while (!(page = follow_page(mm, start, write_access))) { 990a68d2ebcSLinus Torvalds int ret; 991a68d2ebcSLinus Torvalds 9921da177e4SLinus Torvalds /* 9931da177e4SLinus Torvalds * Shortcut for anonymous pages. We don't want 9941da177e4SLinus Torvalds * to force the creation of pages tables for 99508ef4729SHugh Dickins * insanely big anonymously mapped areas that 9961da177e4SLinus Torvalds * nobody touched so far. This is important 9971da177e4SLinus Torvalds * for doing a core dump for these mappings. 9981da177e4SLinus Torvalds */ 9994ceb5db9SLinus Torvalds if (!write && untouched_anonymous_page(mm,vma,start)) { 100008ef4729SHugh Dickins page = ZERO_PAGE(start); 10011da177e4SLinus Torvalds break; 10021da177e4SLinus Torvalds } 10031da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 1004a68d2ebcSLinus Torvalds ret = __handle_mm_fault(mm, vma, start, write_access); 1005a68d2ebcSLinus Torvalds 1006f33ea7f4SNick Piggin /* 1007a68d2ebcSLinus Torvalds * The VM_FAULT_WRITE bit tells us that do_wp_page has 1008a68d2ebcSLinus Torvalds * broken COW when necessary, even if maybe_mkwrite 1009a68d2ebcSLinus Torvalds * decided not to set pte_write. We can thus safely do 1010a68d2ebcSLinus Torvalds * subsequent page lookups as if they were reads. 1011f33ea7f4SNick Piggin */ 1012a68d2ebcSLinus Torvalds if (ret & VM_FAULT_WRITE) 1013f33ea7f4SNick Piggin write_access = 0; 1014a68d2ebcSLinus Torvalds 1015a68d2ebcSLinus Torvalds switch (ret & ~VM_FAULT_WRITE) { 10161da177e4SLinus Torvalds case VM_FAULT_MINOR: 10171da177e4SLinus Torvalds tsk->min_flt++; 10181da177e4SLinus Torvalds break; 10191da177e4SLinus Torvalds case VM_FAULT_MAJOR: 10201da177e4SLinus Torvalds tsk->maj_flt++; 10211da177e4SLinus Torvalds break; 10221da177e4SLinus Torvalds case VM_FAULT_SIGBUS: 10231da177e4SLinus Torvalds return i ? i : -EFAULT; 10241da177e4SLinus Torvalds case VM_FAULT_OOM: 10251da177e4SLinus Torvalds return i ? i : -ENOMEM; 10261da177e4SLinus Torvalds default: 10271da177e4SLinus Torvalds BUG(); 10281da177e4SLinus Torvalds } 10291da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 10301da177e4SLinus Torvalds } 10311da177e4SLinus Torvalds if (pages) { 103208ef4729SHugh Dickins pages[i] = page; 103308ef4729SHugh Dickins flush_dcache_page(page); 103408ef4729SHugh Dickins page_cache_get(page); 10351da177e4SLinus Torvalds } 10361da177e4SLinus Torvalds if (vmas) 10371da177e4SLinus Torvalds vmas[i] = vma; 10381da177e4SLinus Torvalds i++; 10391da177e4SLinus Torvalds start += PAGE_SIZE; 10401da177e4SLinus Torvalds len--; 10411da177e4SLinus Torvalds } while (len && start < vma->vm_end); 10421da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 10431da177e4SLinus Torvalds } while (len); 10441da177e4SLinus Torvalds return i; 10451da177e4SLinus Torvalds } 10461da177e4SLinus Torvalds EXPORT_SYMBOL(get_user_pages); 10471da177e4SLinus Torvalds 10481da177e4SLinus Torvalds static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, 10491da177e4SLinus Torvalds unsigned long addr, unsigned long end, pgprot_t prot) 10501da177e4SLinus Torvalds { 10511da177e4SLinus Torvalds pte_t *pte; 10521da177e4SLinus Torvalds 10531da177e4SLinus Torvalds pte = pte_alloc_map(mm, pmd, addr); 10541da177e4SLinus Torvalds if (!pte) 10551da177e4SLinus Torvalds return -ENOMEM; 10561da177e4SLinus Torvalds do { 1057b5810039SNick Piggin struct page *page = ZERO_PAGE(addr); 1058b5810039SNick Piggin pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); 1059b5810039SNick Piggin page_cache_get(page); 1060b5810039SNick Piggin page_add_file_rmap(page); 1061b5810039SNick Piggin inc_mm_counter(mm, file_rss); 10621da177e4SLinus Torvalds BUG_ON(!pte_none(*pte)); 10631da177e4SLinus Torvalds set_pte_at(mm, addr, pte, zero_pte); 10641da177e4SLinus Torvalds } while (pte++, addr += PAGE_SIZE, addr != end); 10651da177e4SLinus Torvalds pte_unmap(pte - 1); 10661da177e4SLinus Torvalds return 0; 10671da177e4SLinus Torvalds } 10681da177e4SLinus Torvalds 10691da177e4SLinus Torvalds static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, 10701da177e4SLinus Torvalds unsigned long addr, unsigned long end, pgprot_t prot) 10711da177e4SLinus Torvalds { 10721da177e4SLinus Torvalds pmd_t *pmd; 10731da177e4SLinus Torvalds unsigned long next; 10741da177e4SLinus Torvalds 10751da177e4SLinus Torvalds pmd = pmd_alloc(mm, pud, addr); 10761da177e4SLinus Torvalds if (!pmd) 10771da177e4SLinus Torvalds return -ENOMEM; 10781da177e4SLinus Torvalds do { 10791da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 10801da177e4SLinus Torvalds if (zeromap_pte_range(mm, pmd, addr, next, prot)) 10811da177e4SLinus Torvalds return -ENOMEM; 10821da177e4SLinus Torvalds } while (pmd++, addr = next, addr != end); 10831da177e4SLinus Torvalds return 0; 10841da177e4SLinus Torvalds } 10851da177e4SLinus Torvalds 10861da177e4SLinus Torvalds static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, 10871da177e4SLinus Torvalds unsigned long addr, unsigned long end, pgprot_t prot) 10881da177e4SLinus Torvalds { 10891da177e4SLinus Torvalds pud_t *pud; 10901da177e4SLinus Torvalds unsigned long next; 10911da177e4SLinus Torvalds 10921da177e4SLinus Torvalds pud = pud_alloc(mm, pgd, addr); 10931da177e4SLinus Torvalds if (!pud) 10941da177e4SLinus Torvalds return -ENOMEM; 10951da177e4SLinus Torvalds do { 10961da177e4SLinus Torvalds next = pud_addr_end(addr, end); 10971da177e4SLinus Torvalds if (zeromap_pmd_range(mm, pud, addr, next, prot)) 10981da177e4SLinus Torvalds return -ENOMEM; 10991da177e4SLinus Torvalds } while (pud++, addr = next, addr != end); 11001da177e4SLinus Torvalds return 0; 11011da177e4SLinus Torvalds } 11021da177e4SLinus Torvalds 11031da177e4SLinus Torvalds int zeromap_page_range(struct vm_area_struct *vma, 11041da177e4SLinus Torvalds unsigned long addr, unsigned long size, pgprot_t prot) 11051da177e4SLinus Torvalds { 11061da177e4SLinus Torvalds pgd_t *pgd; 11071da177e4SLinus Torvalds unsigned long next; 11081da177e4SLinus Torvalds unsigned long end = addr + size; 11091da177e4SLinus Torvalds struct mm_struct *mm = vma->vm_mm; 11101da177e4SLinus Torvalds int err; 11111da177e4SLinus Torvalds 11121da177e4SLinus Torvalds BUG_ON(addr >= end); 11131da177e4SLinus Torvalds pgd = pgd_offset(mm, addr); 11141da177e4SLinus Torvalds flush_cache_range(vma, addr, end); 11151da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 11161da177e4SLinus Torvalds do { 11171da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 11181da177e4SLinus Torvalds err = zeromap_pud_range(mm, pgd, addr, next, prot); 11191da177e4SLinus Torvalds if (err) 11201da177e4SLinus Torvalds break; 11211da177e4SLinus Torvalds } while (pgd++, addr = next, addr != end); 11221da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 11231da177e4SLinus Torvalds return err; 11241da177e4SLinus Torvalds } 11251da177e4SLinus Torvalds 11261da177e4SLinus Torvalds /* 11271da177e4SLinus Torvalds * maps a range of physical memory into the requested pages. the old 11281da177e4SLinus Torvalds * mappings are removed. any references to nonexistent pages results 11291da177e4SLinus Torvalds * in null mappings (currently treated as "copy-on-access") 11301da177e4SLinus Torvalds */ 11311da177e4SLinus Torvalds static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, 11321da177e4SLinus Torvalds unsigned long addr, unsigned long end, 11331da177e4SLinus Torvalds unsigned long pfn, pgprot_t prot) 11341da177e4SLinus Torvalds { 11351da177e4SLinus Torvalds pte_t *pte; 11361da177e4SLinus Torvalds 11371da177e4SLinus Torvalds pte = pte_alloc_map(mm, pmd, addr); 11381da177e4SLinus Torvalds if (!pte) 11391da177e4SLinus Torvalds return -ENOMEM; 11401da177e4SLinus Torvalds do { 11411da177e4SLinus Torvalds BUG_ON(!pte_none(*pte)); 11421da177e4SLinus Torvalds set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); 11431da177e4SLinus Torvalds pfn++; 11441da177e4SLinus Torvalds } while (pte++, addr += PAGE_SIZE, addr != end); 11451da177e4SLinus Torvalds pte_unmap(pte - 1); 11461da177e4SLinus Torvalds return 0; 11471da177e4SLinus Torvalds } 11481da177e4SLinus Torvalds 11491da177e4SLinus Torvalds static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, 11501da177e4SLinus Torvalds unsigned long addr, unsigned long end, 11511da177e4SLinus Torvalds unsigned long pfn, pgprot_t prot) 11521da177e4SLinus Torvalds { 11531da177e4SLinus Torvalds pmd_t *pmd; 11541da177e4SLinus Torvalds unsigned long next; 11551da177e4SLinus Torvalds 11561da177e4SLinus Torvalds pfn -= addr >> PAGE_SHIFT; 11571da177e4SLinus Torvalds pmd = pmd_alloc(mm, pud, addr); 11581da177e4SLinus Torvalds if (!pmd) 11591da177e4SLinus Torvalds return -ENOMEM; 11601da177e4SLinus Torvalds do { 11611da177e4SLinus Torvalds next = pmd_addr_end(addr, end); 11621da177e4SLinus Torvalds if (remap_pte_range(mm, pmd, addr, next, 11631da177e4SLinus Torvalds pfn + (addr >> PAGE_SHIFT), prot)) 11641da177e4SLinus Torvalds return -ENOMEM; 11651da177e4SLinus Torvalds } while (pmd++, addr = next, addr != end); 11661da177e4SLinus Torvalds return 0; 11671da177e4SLinus Torvalds } 11681da177e4SLinus Torvalds 11691da177e4SLinus Torvalds static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, 11701da177e4SLinus Torvalds unsigned long addr, unsigned long end, 11711da177e4SLinus Torvalds unsigned long pfn, pgprot_t prot) 11721da177e4SLinus Torvalds { 11731da177e4SLinus Torvalds pud_t *pud; 11741da177e4SLinus Torvalds unsigned long next; 11751da177e4SLinus Torvalds 11761da177e4SLinus Torvalds pfn -= addr >> PAGE_SHIFT; 11771da177e4SLinus Torvalds pud = pud_alloc(mm, pgd, addr); 11781da177e4SLinus Torvalds if (!pud) 11791da177e4SLinus Torvalds return -ENOMEM; 11801da177e4SLinus Torvalds do { 11811da177e4SLinus Torvalds next = pud_addr_end(addr, end); 11821da177e4SLinus Torvalds if (remap_pmd_range(mm, pud, addr, next, 11831da177e4SLinus Torvalds pfn + (addr >> PAGE_SHIFT), prot)) 11841da177e4SLinus Torvalds return -ENOMEM; 11851da177e4SLinus Torvalds } while (pud++, addr = next, addr != end); 11861da177e4SLinus Torvalds return 0; 11871da177e4SLinus Torvalds } 11881da177e4SLinus Torvalds 11891da177e4SLinus Torvalds /* Note: this is only safe if the mm semaphore is held when called. */ 11901da177e4SLinus Torvalds int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, 11911da177e4SLinus Torvalds unsigned long pfn, unsigned long size, pgprot_t prot) 11921da177e4SLinus Torvalds { 11931da177e4SLinus Torvalds pgd_t *pgd; 11941da177e4SLinus Torvalds unsigned long next; 11952d15cab8SHugh Dickins unsigned long end = addr + PAGE_ALIGN(size); 11961da177e4SLinus Torvalds struct mm_struct *mm = vma->vm_mm; 11971da177e4SLinus Torvalds int err; 11981da177e4SLinus Torvalds 11991da177e4SLinus Torvalds /* 12001da177e4SLinus Torvalds * Physically remapped pages are special. Tell the 12011da177e4SLinus Torvalds * rest of the world about it: 12021da177e4SLinus Torvalds * VM_IO tells people not to look at these pages 12031da177e4SLinus Torvalds * (accesses can have side effects). 1204b5810039SNick Piggin * VM_RESERVED tells the core MM not to "manage" these pages 1205b5810039SNick Piggin * (e.g. refcount, mapcount, try to swap them out). 12061da177e4SLinus Torvalds */ 12071da177e4SLinus Torvalds vma->vm_flags |= VM_IO | VM_RESERVED; 12081da177e4SLinus Torvalds 12091da177e4SLinus Torvalds BUG_ON(addr >= end); 12101da177e4SLinus Torvalds pfn -= addr >> PAGE_SHIFT; 12111da177e4SLinus Torvalds pgd = pgd_offset(mm, addr); 12121da177e4SLinus Torvalds flush_cache_range(vma, addr, end); 12131da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 12141da177e4SLinus Torvalds do { 12151da177e4SLinus Torvalds next = pgd_addr_end(addr, end); 12161da177e4SLinus Torvalds err = remap_pud_range(mm, pgd, addr, next, 12171da177e4SLinus Torvalds pfn + (addr >> PAGE_SHIFT), prot); 12181da177e4SLinus Torvalds if (err) 12191da177e4SLinus Torvalds break; 12201da177e4SLinus Torvalds } while (pgd++, addr = next, addr != end); 12211da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 12221da177e4SLinus Torvalds return err; 12231da177e4SLinus Torvalds } 12241da177e4SLinus Torvalds EXPORT_SYMBOL(remap_pfn_range); 12251da177e4SLinus Torvalds 12261da177e4SLinus Torvalds /* 12271da177e4SLinus Torvalds * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when 12281da177e4SLinus Torvalds * servicing faults for write access. In the normal case, do always want 12291da177e4SLinus Torvalds * pte_mkwrite. But get_user_pages can cause write faults for mappings 12301da177e4SLinus Torvalds * that do not have writing enabled, when used by access_process_vm. 12311da177e4SLinus Torvalds */ 12321da177e4SLinus Torvalds static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) 12331da177e4SLinus Torvalds { 12341da177e4SLinus Torvalds if (likely(vma->vm_flags & VM_WRITE)) 12351da177e4SLinus Torvalds pte = pte_mkwrite(pte); 12361da177e4SLinus Torvalds return pte; 12371da177e4SLinus Torvalds } 12381da177e4SLinus Torvalds 12391da177e4SLinus Torvalds /* 12401da177e4SLinus Torvalds * This routine handles present pages, when users try to write 12411da177e4SLinus Torvalds * to a shared page. It is done by copying the page to a new address 12421da177e4SLinus Torvalds * and decrementing the shared-page counter for the old page. 12431da177e4SLinus Torvalds * 12441da177e4SLinus Torvalds * Note that this routine assumes that the protection checks have been 12451da177e4SLinus Torvalds * done by the caller (the low-level page fault routine in most cases). 12461da177e4SLinus Torvalds * Thus we can safely just mark it writable once we've done any necessary 12471da177e4SLinus Torvalds * COW. 12481da177e4SLinus Torvalds * 12491da177e4SLinus Torvalds * We also mark the page dirty at this point even though the page will 12501da177e4SLinus Torvalds * change only once the write actually happens. This avoids a few races, 12511da177e4SLinus Torvalds * and potentially makes it more efficient. 12521da177e4SLinus Torvalds * 12531da177e4SLinus Torvalds * We hold the mm semaphore and the page_table_lock on entry and exit 12541da177e4SLinus Torvalds * with the page_table_lock released. 12551da177e4SLinus Torvalds */ 12561da177e4SLinus Torvalds static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, 125765500d23SHugh Dickins unsigned long address, pte_t *page_table, pmd_t *pmd, 125865500d23SHugh Dickins pte_t orig_pte) 12591da177e4SLinus Torvalds { 12601da177e4SLinus Torvalds struct page *old_page, *new_page; 126165500d23SHugh Dickins unsigned long pfn = pte_pfn(orig_pte); 12621da177e4SLinus Torvalds pte_t entry; 126365500d23SHugh Dickins int ret = VM_FAULT_MINOR; 12641da177e4SLinus Torvalds 1265b5810039SNick Piggin BUG_ON(vma->vm_flags & VM_RESERVED); 1266b5810039SNick Piggin 12671da177e4SLinus Torvalds if (unlikely(!pfn_valid(pfn))) { 12681da177e4SLinus Torvalds /* 126965500d23SHugh Dickins * Page table corrupted: show pte and kill process. 12701da177e4SLinus Torvalds */ 1271b5810039SNick Piggin print_bad_pte(vma, orig_pte, address); 127265500d23SHugh Dickins ret = VM_FAULT_OOM; 127365500d23SHugh Dickins goto unlock; 12741da177e4SLinus Torvalds } 12751da177e4SLinus Torvalds old_page = pfn_to_page(pfn); 12761da177e4SLinus Torvalds 1277d296e9cdSHugh Dickins if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { 12781da177e4SLinus Torvalds int reuse = can_share_swap_page(old_page); 12791da177e4SLinus Torvalds unlock_page(old_page); 12801da177e4SLinus Torvalds if (reuse) { 12811da177e4SLinus Torvalds flush_cache_page(vma, address, pfn); 128265500d23SHugh Dickins entry = pte_mkyoung(orig_pte); 128365500d23SHugh Dickins entry = maybe_mkwrite(pte_mkdirty(entry), vma); 12841da177e4SLinus Torvalds ptep_set_access_flags(vma, address, page_table, entry, 1); 12851da177e4SLinus Torvalds update_mmu_cache(vma, address, entry); 12861da177e4SLinus Torvalds lazy_mmu_prot_update(entry); 128765500d23SHugh Dickins ret |= VM_FAULT_WRITE; 128865500d23SHugh Dickins goto unlock; 12891da177e4SLinus Torvalds } 12901da177e4SLinus Torvalds } 12911da177e4SLinus Torvalds 12921da177e4SLinus Torvalds /* 12931da177e4SLinus Torvalds * Ok, we need to copy. Oh, well.. 12941da177e4SLinus Torvalds */ 12951da177e4SLinus Torvalds page_cache_get(old_page); 129665500d23SHugh Dickins pte_unmap(page_table); 12971da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 12981da177e4SLinus Torvalds 12991da177e4SLinus Torvalds if (unlikely(anon_vma_prepare(vma))) 130065500d23SHugh Dickins goto oom; 13011da177e4SLinus Torvalds if (old_page == ZERO_PAGE(address)) { 13021da177e4SLinus Torvalds new_page = alloc_zeroed_user_highpage(vma, address); 13031da177e4SLinus Torvalds if (!new_page) 130465500d23SHugh Dickins goto oom; 13051da177e4SLinus Torvalds } else { 13061da177e4SLinus Torvalds new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); 13071da177e4SLinus Torvalds if (!new_page) 130865500d23SHugh Dickins goto oom; 13091da177e4SLinus Torvalds copy_user_highpage(new_page, old_page, address); 13101da177e4SLinus Torvalds } 131165500d23SHugh Dickins 13121da177e4SLinus Torvalds /* 13131da177e4SLinus Torvalds * Re-check the pte - we dropped the lock 13141da177e4SLinus Torvalds */ 13151da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 13161da177e4SLinus Torvalds page_table = pte_offset_map(pmd, address); 131765500d23SHugh Dickins if (likely(pte_same(*page_table, orig_pte))) { 13181da177e4SLinus Torvalds page_remove_rmap(old_page); 13194294621fSHugh Dickins if (!PageAnon(old_page)) { 13204294621fSHugh Dickins inc_mm_counter(mm, anon_rss); 13214294621fSHugh Dickins dec_mm_counter(mm, file_rss); 13224294621fSHugh Dickins } 13231da177e4SLinus Torvalds flush_cache_page(vma, address, pfn); 132465500d23SHugh Dickins entry = mk_pte(new_page, vma->vm_page_prot); 132565500d23SHugh Dickins entry = maybe_mkwrite(pte_mkdirty(entry), vma); 132665500d23SHugh Dickins ptep_establish(vma, address, page_table, entry); 132765500d23SHugh Dickins update_mmu_cache(vma, address, entry); 132865500d23SHugh Dickins lazy_mmu_prot_update(entry); 132965500d23SHugh Dickins 13301da177e4SLinus Torvalds lru_cache_add_active(new_page); 13311da177e4SLinus Torvalds page_add_anon_rmap(new_page, vma, address); 13321da177e4SLinus Torvalds 13331da177e4SLinus Torvalds /* Free the old page.. */ 13341da177e4SLinus Torvalds new_page = old_page; 1335f33ea7f4SNick Piggin ret |= VM_FAULT_WRITE; 13361da177e4SLinus Torvalds } 13371da177e4SLinus Torvalds page_cache_release(new_page); 13381da177e4SLinus Torvalds page_cache_release(old_page); 133965500d23SHugh Dickins unlock: 134065500d23SHugh Dickins pte_unmap(page_table); 13411da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 1342f33ea7f4SNick Piggin return ret; 134365500d23SHugh Dickins oom: 13441da177e4SLinus Torvalds page_cache_release(old_page); 13451da177e4SLinus Torvalds return VM_FAULT_OOM; 13461da177e4SLinus Torvalds } 13471da177e4SLinus Torvalds 13481da177e4SLinus Torvalds /* 13491da177e4SLinus Torvalds * Helper functions for unmap_mapping_range(). 13501da177e4SLinus Torvalds * 13511da177e4SLinus Torvalds * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __ 13521da177e4SLinus Torvalds * 13531da177e4SLinus Torvalds * We have to restart searching the prio_tree whenever we drop the lock, 13541da177e4SLinus Torvalds * since the iterator is only valid while the lock is held, and anyway 13551da177e4SLinus Torvalds * a later vma might be split and reinserted earlier while lock dropped. 13561da177e4SLinus Torvalds * 13571da177e4SLinus Torvalds * The list of nonlinear vmas could be handled more efficiently, using 13581da177e4SLinus Torvalds * a placeholder, but handle it in the same way until a need is shown. 13591da177e4SLinus Torvalds * It is important to search the prio_tree before nonlinear list: a vma 13601da177e4SLinus Torvalds * may become nonlinear and be shifted from prio_tree to nonlinear list 13611da177e4SLinus Torvalds * while the lock is dropped; but never shifted from list to prio_tree. 13621da177e4SLinus Torvalds * 13631da177e4SLinus Torvalds * In order to make forward progress despite restarting the search, 13641da177e4SLinus Torvalds * vm_truncate_count is used to mark a vma as now dealt with, so we can 13651da177e4SLinus Torvalds * quickly skip it next time around. Since the prio_tree search only 13661da177e4SLinus Torvalds * shows us those vmas affected by unmapping the range in question, we 13671da177e4SLinus Torvalds * can't efficiently keep all vmas in step with mapping->truncate_count: 13681da177e4SLinus Torvalds * so instead reset them all whenever it wraps back to 0 (then go to 1). 13691da177e4SLinus Torvalds * mapping->truncate_count and vma->vm_truncate_count are protected by 13701da177e4SLinus Torvalds * i_mmap_lock. 13711da177e4SLinus Torvalds * 13721da177e4SLinus Torvalds * In order to make forward progress despite repeatedly restarting some 1373ee39b37bSHugh Dickins * large vma, note the restart_addr from unmap_vmas when it breaks out: 13741da177e4SLinus Torvalds * and restart from that address when we reach that vma again. It might 13751da177e4SLinus Torvalds * have been split or merged, shrunk or extended, but never shifted: so 13761da177e4SLinus Torvalds * restart_addr remains valid so long as it remains in the vma's range. 13771da177e4SLinus Torvalds * unmap_mapping_range forces truncate_count to leap over page-aligned 13781da177e4SLinus Torvalds * values so we can save vma's restart_addr in its truncate_count field. 13791da177e4SLinus Torvalds */ 13801da177e4SLinus Torvalds #define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK)) 13811da177e4SLinus Torvalds 13821da177e4SLinus Torvalds static void reset_vma_truncate_counts(struct address_space *mapping) 13831da177e4SLinus Torvalds { 13841da177e4SLinus Torvalds struct vm_area_struct *vma; 13851da177e4SLinus Torvalds struct prio_tree_iter iter; 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) 13881da177e4SLinus Torvalds vma->vm_truncate_count = 0; 13891da177e4SLinus Torvalds list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) 13901da177e4SLinus Torvalds vma->vm_truncate_count = 0; 13911da177e4SLinus Torvalds } 13921da177e4SLinus Torvalds 13931da177e4SLinus Torvalds static int unmap_mapping_range_vma(struct vm_area_struct *vma, 13941da177e4SLinus Torvalds unsigned long start_addr, unsigned long end_addr, 13951da177e4SLinus Torvalds struct zap_details *details) 13961da177e4SLinus Torvalds { 13971da177e4SLinus Torvalds unsigned long restart_addr; 13981da177e4SLinus Torvalds int need_break; 13991da177e4SLinus Torvalds 14001da177e4SLinus Torvalds again: 14011da177e4SLinus Torvalds restart_addr = vma->vm_truncate_count; 14021da177e4SLinus Torvalds if (is_restart_addr(restart_addr) && start_addr < restart_addr) { 14031da177e4SLinus Torvalds start_addr = restart_addr; 14041da177e4SLinus Torvalds if (start_addr >= end_addr) { 14051da177e4SLinus Torvalds /* Top of vma has been split off since last time */ 14061da177e4SLinus Torvalds vma->vm_truncate_count = details->truncate_count; 14071da177e4SLinus Torvalds return 0; 14081da177e4SLinus Torvalds } 14091da177e4SLinus Torvalds } 14101da177e4SLinus Torvalds 1411ee39b37bSHugh Dickins restart_addr = zap_page_range(vma, start_addr, 1412ee39b37bSHugh Dickins end_addr - start_addr, details); 14131da177e4SLinus Torvalds 14141da177e4SLinus Torvalds /* 14151da177e4SLinus Torvalds * We cannot rely on the break test in unmap_vmas: 14161da177e4SLinus Torvalds * on the one hand, we don't want to restart our loop 14171da177e4SLinus Torvalds * just because that broke out for the page_table_lock; 14181da177e4SLinus Torvalds * on the other hand, it does no test when vma is small. 14191da177e4SLinus Torvalds */ 14201da177e4SLinus Torvalds need_break = need_resched() || 14211da177e4SLinus Torvalds need_lockbreak(details->i_mmap_lock); 14221da177e4SLinus Torvalds 1423ee39b37bSHugh Dickins if (restart_addr >= end_addr) { 14241da177e4SLinus Torvalds /* We have now completed this vma: mark it so */ 14251da177e4SLinus Torvalds vma->vm_truncate_count = details->truncate_count; 14261da177e4SLinus Torvalds if (!need_break) 14271da177e4SLinus Torvalds return 0; 14281da177e4SLinus Torvalds } else { 14291da177e4SLinus Torvalds /* Note restart_addr in vma's truncate_count field */ 1430ee39b37bSHugh Dickins vma->vm_truncate_count = restart_addr; 14311da177e4SLinus Torvalds if (!need_break) 14321da177e4SLinus Torvalds goto again; 14331da177e4SLinus Torvalds } 14341da177e4SLinus Torvalds 14351da177e4SLinus Torvalds spin_unlock(details->i_mmap_lock); 14361da177e4SLinus Torvalds cond_resched(); 14371da177e4SLinus Torvalds spin_lock(details->i_mmap_lock); 14381da177e4SLinus Torvalds return -EINTR; 14391da177e4SLinus Torvalds } 14401da177e4SLinus Torvalds 14411da177e4SLinus Torvalds static inline void unmap_mapping_range_tree(struct prio_tree_root *root, 14421da177e4SLinus Torvalds struct zap_details *details) 14431da177e4SLinus Torvalds { 14441da177e4SLinus Torvalds struct vm_area_struct *vma; 14451da177e4SLinus Torvalds struct prio_tree_iter iter; 14461da177e4SLinus Torvalds pgoff_t vba, vea, zba, zea; 14471da177e4SLinus Torvalds 14481da177e4SLinus Torvalds restart: 14491da177e4SLinus Torvalds vma_prio_tree_foreach(vma, &iter, root, 14501da177e4SLinus Torvalds details->first_index, details->last_index) { 14511da177e4SLinus Torvalds /* Skip quickly over those we have already dealt with */ 14521da177e4SLinus Torvalds if (vma->vm_truncate_count == details->truncate_count) 14531da177e4SLinus Torvalds continue; 14541da177e4SLinus Torvalds 14551da177e4SLinus Torvalds vba = vma->vm_pgoff; 14561da177e4SLinus Torvalds vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; 14571da177e4SLinus Torvalds /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ 14581da177e4SLinus Torvalds zba = details->first_index; 14591da177e4SLinus Torvalds if (zba < vba) 14601da177e4SLinus Torvalds zba = vba; 14611da177e4SLinus Torvalds zea = details->last_index; 14621da177e4SLinus Torvalds if (zea > vea) 14631da177e4SLinus Torvalds zea = vea; 14641da177e4SLinus Torvalds 14651da177e4SLinus Torvalds if (unmap_mapping_range_vma(vma, 14661da177e4SLinus Torvalds ((zba - vba) << PAGE_SHIFT) + vma->vm_start, 14671da177e4SLinus Torvalds ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, 14681da177e4SLinus Torvalds details) < 0) 14691da177e4SLinus Torvalds goto restart; 14701da177e4SLinus Torvalds } 14711da177e4SLinus Torvalds } 14721da177e4SLinus Torvalds 14731da177e4SLinus Torvalds static inline void unmap_mapping_range_list(struct list_head *head, 14741da177e4SLinus Torvalds struct zap_details *details) 14751da177e4SLinus Torvalds { 14761da177e4SLinus Torvalds struct vm_area_struct *vma; 14771da177e4SLinus Torvalds 14781da177e4SLinus Torvalds /* 14791da177e4SLinus Torvalds * In nonlinear VMAs there is no correspondence between virtual address 14801da177e4SLinus Torvalds * offset and file offset. So we must perform an exhaustive search 14811da177e4SLinus Torvalds * across *all* the pages in each nonlinear VMA, not just the pages 14821da177e4SLinus Torvalds * whose virtual address lies outside the file truncation point. 14831da177e4SLinus Torvalds */ 14841da177e4SLinus Torvalds restart: 14851da177e4SLinus Torvalds list_for_each_entry(vma, head, shared.vm_set.list) { 14861da177e4SLinus Torvalds /* Skip quickly over those we have already dealt with */ 14871da177e4SLinus Torvalds if (vma->vm_truncate_count == details->truncate_count) 14881da177e4SLinus Torvalds continue; 14891da177e4SLinus Torvalds details->nonlinear_vma = vma; 14901da177e4SLinus Torvalds if (unmap_mapping_range_vma(vma, vma->vm_start, 14911da177e4SLinus Torvalds vma->vm_end, details) < 0) 14921da177e4SLinus Torvalds goto restart; 14931da177e4SLinus Torvalds } 14941da177e4SLinus Torvalds } 14951da177e4SLinus Torvalds 14961da177e4SLinus Torvalds /** 14971da177e4SLinus Torvalds * unmap_mapping_range - unmap the portion of all mmaps 14981da177e4SLinus Torvalds * in the specified address_space corresponding to the specified 14991da177e4SLinus Torvalds * page range in the underlying file. 15003d41088fSMartin Waitz * @mapping: the address space containing mmaps to be unmapped. 15011da177e4SLinus Torvalds * @holebegin: byte in first page to unmap, relative to the start of 15021da177e4SLinus Torvalds * the underlying file. This will be rounded down to a PAGE_SIZE 15031da177e4SLinus Torvalds * boundary. Note that this is different from vmtruncate(), which 15041da177e4SLinus Torvalds * must keep the partial page. In contrast, we must get rid of 15051da177e4SLinus Torvalds * partial pages. 15061da177e4SLinus Torvalds * @holelen: size of prospective hole in bytes. This will be rounded 15071da177e4SLinus Torvalds * up to a PAGE_SIZE boundary. A holelen of zero truncates to the 15081da177e4SLinus Torvalds * end of the file. 15091da177e4SLinus Torvalds * @even_cows: 1 when truncating a file, unmap even private COWed pages; 15101da177e4SLinus Torvalds * but 0 when invalidating pagecache, don't throw away private data. 15111da177e4SLinus Torvalds */ 15121da177e4SLinus Torvalds void unmap_mapping_range(struct address_space *mapping, 15131da177e4SLinus Torvalds loff_t const holebegin, loff_t const holelen, int even_cows) 15141da177e4SLinus Torvalds { 15151da177e4SLinus Torvalds struct zap_details details; 15161da177e4SLinus Torvalds pgoff_t hba = holebegin >> PAGE_SHIFT; 15171da177e4SLinus Torvalds pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds /* Check for overflow. */ 15201da177e4SLinus Torvalds if (sizeof(holelen) > sizeof(hlen)) { 15211da177e4SLinus Torvalds long long holeend = 15221da177e4SLinus Torvalds (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; 15231da177e4SLinus Torvalds if (holeend & ~(long long)ULONG_MAX) 15241da177e4SLinus Torvalds hlen = ULONG_MAX - hba + 1; 15251da177e4SLinus Torvalds } 15261da177e4SLinus Torvalds 15271da177e4SLinus Torvalds details.check_mapping = even_cows? NULL: mapping; 15281da177e4SLinus Torvalds details.nonlinear_vma = NULL; 15291da177e4SLinus Torvalds details.first_index = hba; 15301da177e4SLinus Torvalds details.last_index = hba + hlen - 1; 15311da177e4SLinus Torvalds if (details.last_index < details.first_index) 15321da177e4SLinus Torvalds details.last_index = ULONG_MAX; 15331da177e4SLinus Torvalds details.i_mmap_lock = &mapping->i_mmap_lock; 15341da177e4SLinus Torvalds 15351da177e4SLinus Torvalds spin_lock(&mapping->i_mmap_lock); 15361da177e4SLinus Torvalds 15371da177e4SLinus Torvalds /* serialize i_size write against truncate_count write */ 15381da177e4SLinus Torvalds smp_wmb(); 15391da177e4SLinus Torvalds /* Protect against page faults, and endless unmapping loops */ 15401da177e4SLinus Torvalds mapping->truncate_count++; 15411da177e4SLinus Torvalds /* 15421da177e4SLinus Torvalds * For archs where spin_lock has inclusive semantics like ia64 15431da177e4SLinus Torvalds * this smp_mb() will prevent to read pagetable contents 15441da177e4SLinus Torvalds * before the truncate_count increment is visible to 15451da177e4SLinus Torvalds * other cpus. 15461da177e4SLinus Torvalds */ 15471da177e4SLinus Torvalds smp_mb(); 15481da177e4SLinus Torvalds if (unlikely(is_restart_addr(mapping->truncate_count))) { 15491da177e4SLinus Torvalds if (mapping->truncate_count == 0) 15501da177e4SLinus Torvalds reset_vma_truncate_counts(mapping); 15511da177e4SLinus Torvalds mapping->truncate_count++; 15521da177e4SLinus Torvalds } 15531da177e4SLinus Torvalds details.truncate_count = mapping->truncate_count; 15541da177e4SLinus Torvalds 15551da177e4SLinus Torvalds if (unlikely(!prio_tree_empty(&mapping->i_mmap))) 15561da177e4SLinus Torvalds unmap_mapping_range_tree(&mapping->i_mmap, &details); 15571da177e4SLinus Torvalds if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 15581da177e4SLinus Torvalds unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 15591da177e4SLinus Torvalds spin_unlock(&mapping->i_mmap_lock); 15601da177e4SLinus Torvalds } 15611da177e4SLinus Torvalds EXPORT_SYMBOL(unmap_mapping_range); 15621da177e4SLinus Torvalds 15631da177e4SLinus Torvalds /* 15641da177e4SLinus Torvalds * Handle all mappings that got truncated by a "truncate()" 15651da177e4SLinus Torvalds * system call. 15661da177e4SLinus Torvalds * 15671da177e4SLinus Torvalds * NOTE! We have to be ready to update the memory sharing 15681da177e4SLinus Torvalds * between the file and the memory map for a potential last 15691da177e4SLinus Torvalds * incomplete page. Ugly, but necessary. 15701da177e4SLinus Torvalds */ 15711da177e4SLinus Torvalds int vmtruncate(struct inode * inode, loff_t offset) 15721da177e4SLinus Torvalds { 15731da177e4SLinus Torvalds struct address_space *mapping = inode->i_mapping; 15741da177e4SLinus Torvalds unsigned long limit; 15751da177e4SLinus Torvalds 15761da177e4SLinus Torvalds if (inode->i_size < offset) 15771da177e4SLinus Torvalds goto do_expand; 15781da177e4SLinus Torvalds /* 15791da177e4SLinus Torvalds * truncation of in-use swapfiles is disallowed - it would cause 15801da177e4SLinus Torvalds * subsequent swapout to scribble on the now-freed blocks. 15811da177e4SLinus Torvalds */ 15821da177e4SLinus Torvalds if (IS_SWAPFILE(inode)) 15831da177e4SLinus Torvalds goto out_busy; 15841da177e4SLinus Torvalds i_size_write(inode, offset); 15851da177e4SLinus Torvalds unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 15861da177e4SLinus Torvalds truncate_inode_pages(mapping, offset); 15871da177e4SLinus Torvalds goto out_truncate; 15881da177e4SLinus Torvalds 15891da177e4SLinus Torvalds do_expand: 15901da177e4SLinus Torvalds limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 15911da177e4SLinus Torvalds if (limit != RLIM_INFINITY && offset > limit) 15921da177e4SLinus Torvalds goto out_sig; 15931da177e4SLinus Torvalds if (offset > inode->i_sb->s_maxbytes) 15941da177e4SLinus Torvalds goto out_big; 15951da177e4SLinus Torvalds i_size_write(inode, offset); 15961da177e4SLinus Torvalds 15971da177e4SLinus Torvalds out_truncate: 15981da177e4SLinus Torvalds if (inode->i_op && inode->i_op->truncate) 15991da177e4SLinus Torvalds inode->i_op->truncate(inode); 16001da177e4SLinus Torvalds return 0; 16011da177e4SLinus Torvalds out_sig: 16021da177e4SLinus Torvalds send_sig(SIGXFSZ, current, 0); 16031da177e4SLinus Torvalds out_big: 16041da177e4SLinus Torvalds return -EFBIG; 16051da177e4SLinus Torvalds out_busy: 16061da177e4SLinus Torvalds return -ETXTBSY; 16071da177e4SLinus Torvalds } 16081da177e4SLinus Torvalds 16091da177e4SLinus Torvalds EXPORT_SYMBOL(vmtruncate); 16101da177e4SLinus Torvalds 16111da177e4SLinus Torvalds /* 16121da177e4SLinus Torvalds * Primitive swap readahead code. We simply read an aligned block of 16131da177e4SLinus Torvalds * (1 << page_cluster) entries in the swap area. This method is chosen 16141da177e4SLinus Torvalds * because it doesn't cost us any seek time. We also make sure to queue 16151da177e4SLinus Torvalds * the 'original' request together with the readahead ones... 16161da177e4SLinus Torvalds * 16171da177e4SLinus Torvalds * This has been extended to use the NUMA policies from the mm triggering 16181da177e4SLinus Torvalds * the readahead. 16191da177e4SLinus Torvalds * 16201da177e4SLinus Torvalds * Caller must hold down_read on the vma->vm_mm if vma is not NULL. 16211da177e4SLinus Torvalds */ 16221da177e4SLinus Torvalds void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma) 16231da177e4SLinus Torvalds { 16241da177e4SLinus Torvalds #ifdef CONFIG_NUMA 16251da177e4SLinus Torvalds struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL; 16261da177e4SLinus Torvalds #endif 16271da177e4SLinus Torvalds int i, num; 16281da177e4SLinus Torvalds struct page *new_page; 16291da177e4SLinus Torvalds unsigned long offset; 16301da177e4SLinus Torvalds 16311da177e4SLinus Torvalds /* 16321da177e4SLinus Torvalds * Get the number of handles we should do readahead io to. 16331da177e4SLinus Torvalds */ 16341da177e4SLinus Torvalds num = valid_swaphandles(entry, &offset); 16351da177e4SLinus Torvalds for (i = 0; i < num; offset++, i++) { 16361da177e4SLinus Torvalds /* Ok, do the async read-ahead now */ 16371da177e4SLinus Torvalds new_page = read_swap_cache_async(swp_entry(swp_type(entry), 16381da177e4SLinus Torvalds offset), vma, addr); 16391da177e4SLinus Torvalds if (!new_page) 16401da177e4SLinus Torvalds break; 16411da177e4SLinus Torvalds page_cache_release(new_page); 16421da177e4SLinus Torvalds #ifdef CONFIG_NUMA 16431da177e4SLinus Torvalds /* 16441da177e4SLinus Torvalds * Find the next applicable VMA for the NUMA policy. 16451da177e4SLinus Torvalds */ 16461da177e4SLinus Torvalds addr += PAGE_SIZE; 16471da177e4SLinus Torvalds if (addr == 0) 16481da177e4SLinus Torvalds vma = NULL; 16491da177e4SLinus Torvalds if (vma) { 16501da177e4SLinus Torvalds if (addr >= vma->vm_end) { 16511da177e4SLinus Torvalds vma = next_vma; 16521da177e4SLinus Torvalds next_vma = vma ? vma->vm_next : NULL; 16531da177e4SLinus Torvalds } 16541da177e4SLinus Torvalds if (vma && addr < vma->vm_start) 16551da177e4SLinus Torvalds vma = NULL; 16561da177e4SLinus Torvalds } else { 16571da177e4SLinus Torvalds if (next_vma && addr >= next_vma->vm_start) { 16581da177e4SLinus Torvalds vma = next_vma; 16591da177e4SLinus Torvalds next_vma = vma->vm_next; 16601da177e4SLinus Torvalds } 16611da177e4SLinus Torvalds } 16621da177e4SLinus Torvalds #endif 16631da177e4SLinus Torvalds } 16641da177e4SLinus Torvalds lru_add_drain(); /* Push any new pages onto the LRU now */ 16651da177e4SLinus Torvalds } 16661da177e4SLinus Torvalds 16671da177e4SLinus Torvalds /* 16681da177e4SLinus Torvalds * We hold the mm semaphore and the page_table_lock on entry and 16691da177e4SLinus Torvalds * should release the pagetable lock on exit.. 16701da177e4SLinus Torvalds */ 167165500d23SHugh Dickins static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, 167265500d23SHugh Dickins unsigned long address, pte_t *page_table, pmd_t *pmd, 167365500d23SHugh Dickins int write_access, pte_t orig_pte) 16741da177e4SLinus Torvalds { 16751da177e4SLinus Torvalds struct page *page; 167665500d23SHugh Dickins swp_entry_t entry; 16771da177e4SLinus Torvalds pte_t pte; 16781da177e4SLinus Torvalds int ret = VM_FAULT_MINOR; 16791da177e4SLinus Torvalds 16801da177e4SLinus Torvalds pte_unmap(page_table); 16811da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 168265500d23SHugh Dickins 168365500d23SHugh Dickins entry = pte_to_swp_entry(orig_pte); 16841da177e4SLinus Torvalds page = lookup_swap_cache(entry); 16851da177e4SLinus Torvalds if (!page) { 16861da177e4SLinus Torvalds swapin_readahead(entry, address, vma); 16871da177e4SLinus Torvalds page = read_swap_cache_async(entry, vma, address); 16881da177e4SLinus Torvalds if (!page) { 16891da177e4SLinus Torvalds /* 16901da177e4SLinus Torvalds * Back out if somebody else faulted in this pte while 16911da177e4SLinus Torvalds * we released the page table lock. 16921da177e4SLinus Torvalds */ 16931da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 16941da177e4SLinus Torvalds page_table = pte_offset_map(pmd, address); 16951da177e4SLinus Torvalds if (likely(pte_same(*page_table, orig_pte))) 16961da177e4SLinus Torvalds ret = VM_FAULT_OOM; 169765500d23SHugh Dickins goto unlock; 16981da177e4SLinus Torvalds } 16991da177e4SLinus Torvalds 17001da177e4SLinus Torvalds /* Had to read the page from swap area: Major fault */ 17011da177e4SLinus Torvalds ret = VM_FAULT_MAJOR; 17021da177e4SLinus Torvalds inc_page_state(pgmajfault); 17031da177e4SLinus Torvalds grab_swap_token(); 17041da177e4SLinus Torvalds } 17051da177e4SLinus Torvalds 17061da177e4SLinus Torvalds mark_page_accessed(page); 17071da177e4SLinus Torvalds lock_page(page); 17081da177e4SLinus Torvalds 17091da177e4SLinus Torvalds /* 17101da177e4SLinus Torvalds * Back out if somebody else faulted in this pte while we 17111da177e4SLinus Torvalds * released the page table lock. 17121da177e4SLinus Torvalds */ 17131da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 17141da177e4SLinus Torvalds page_table = pte_offset_map(pmd, address); 17159e9bef07SHugh Dickins if (unlikely(!pte_same(*page_table, orig_pte))) 1716b8107480SKirill Korotaev goto out_nomap; 1717b8107480SKirill Korotaev 1718b8107480SKirill Korotaev if (unlikely(!PageUptodate(page))) { 1719b8107480SKirill Korotaev ret = VM_FAULT_SIGBUS; 1720b8107480SKirill Korotaev goto out_nomap; 17211da177e4SLinus Torvalds } 17221da177e4SLinus Torvalds 17231da177e4SLinus Torvalds /* The page isn't present yet, go ahead with the fault. */ 17241da177e4SLinus Torvalds 17254294621fSHugh Dickins inc_mm_counter(mm, anon_rss); 17261da177e4SLinus Torvalds pte = mk_pte(page, vma->vm_page_prot); 17271da177e4SLinus Torvalds if (write_access && can_share_swap_page(page)) { 17281da177e4SLinus Torvalds pte = maybe_mkwrite(pte_mkdirty(pte), vma); 17291da177e4SLinus Torvalds write_access = 0; 17301da177e4SLinus Torvalds } 17311da177e4SLinus Torvalds 17321da177e4SLinus Torvalds flush_icache_page(vma, page); 17331da177e4SLinus Torvalds set_pte_at(mm, address, page_table, pte); 17341da177e4SLinus Torvalds page_add_anon_rmap(page, vma, address); 17351da177e4SLinus Torvalds 1736c475a8abSHugh Dickins swap_free(entry); 1737c475a8abSHugh Dickins if (vm_swap_full()) 1738c475a8abSHugh Dickins remove_exclusive_swap_page(page); 1739c475a8abSHugh Dickins unlock_page(page); 1740c475a8abSHugh Dickins 17411da177e4SLinus Torvalds if (write_access) { 17421da177e4SLinus Torvalds if (do_wp_page(mm, vma, address, 17431da177e4SLinus Torvalds page_table, pmd, pte) == VM_FAULT_OOM) 17441da177e4SLinus Torvalds ret = VM_FAULT_OOM; 17451da177e4SLinus Torvalds goto out; 17461da177e4SLinus Torvalds } 17471da177e4SLinus Torvalds 17481da177e4SLinus Torvalds /* No need to invalidate - it was non-present before */ 17491da177e4SLinus Torvalds update_mmu_cache(vma, address, pte); 17501da177e4SLinus Torvalds lazy_mmu_prot_update(pte); 175165500d23SHugh Dickins unlock: 17521da177e4SLinus Torvalds pte_unmap(page_table); 17531da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 17541da177e4SLinus Torvalds out: 17551da177e4SLinus Torvalds return ret; 1756b8107480SKirill Korotaev out_nomap: 1757b8107480SKirill Korotaev pte_unmap(page_table); 1758b8107480SKirill Korotaev spin_unlock(&mm->page_table_lock); 1759b8107480SKirill Korotaev unlock_page(page); 1760b8107480SKirill Korotaev page_cache_release(page); 176165500d23SHugh Dickins return ret; 17621da177e4SLinus Torvalds } 17631da177e4SLinus Torvalds 17641da177e4SLinus Torvalds /* 17651da177e4SLinus Torvalds * We are called with the MM semaphore and page_table_lock 17661da177e4SLinus Torvalds * spinlock held to protect against concurrent faults in 17671da177e4SLinus Torvalds * multithreaded programs. 17681da177e4SLinus Torvalds */ 176965500d23SHugh Dickins static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, 177065500d23SHugh Dickins unsigned long address, pte_t *page_table, pmd_t *pmd, 177165500d23SHugh Dickins int write_access) 17721da177e4SLinus Torvalds { 1773b5810039SNick Piggin struct page *page = ZERO_PAGE(addr); 17741da177e4SLinus Torvalds pte_t entry; 17751da177e4SLinus Torvalds 177672866f6fSHugh Dickins /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ 1777b5810039SNick Piggin entry = mk_pte(page, vma->vm_page_prot); 17781da177e4SLinus Torvalds 17791da177e4SLinus Torvalds if (write_access) { 17801da177e4SLinus Torvalds /* Allocate our own private page. */ 17811da177e4SLinus Torvalds pte_unmap(page_table); 17821da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 17831da177e4SLinus Torvalds 17841da177e4SLinus Torvalds if (unlikely(anon_vma_prepare(vma))) 178565500d23SHugh Dickins goto oom; 178665500d23SHugh Dickins page = alloc_zeroed_user_highpage(vma, address); 17871da177e4SLinus Torvalds if (!page) 178865500d23SHugh Dickins goto oom; 17891da177e4SLinus Torvalds 17901da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 179165500d23SHugh Dickins page_table = pte_offset_map(pmd, address); 17921da177e4SLinus Torvalds 17931da177e4SLinus Torvalds if (!pte_none(*page_table)) { 17941da177e4SLinus Torvalds page_cache_release(page); 179565500d23SHugh Dickins goto unlock; 17961da177e4SLinus Torvalds } 17974294621fSHugh Dickins inc_mm_counter(mm, anon_rss); 179865500d23SHugh Dickins entry = mk_pte(page, vma->vm_page_prot); 179965500d23SHugh Dickins entry = maybe_mkwrite(pte_mkdirty(entry), vma); 18001da177e4SLinus Torvalds lru_cache_add_active(page); 18011da177e4SLinus Torvalds SetPageReferenced(page); 180265500d23SHugh Dickins page_add_anon_rmap(page, vma, address); 1803b5810039SNick Piggin } else { 1804b5810039SNick Piggin inc_mm_counter(mm, file_rss); 1805b5810039SNick Piggin page_add_file_rmap(page); 1806b5810039SNick Piggin page_cache_get(page); 18071da177e4SLinus Torvalds } 18081da177e4SLinus Torvalds 180965500d23SHugh Dickins set_pte_at(mm, address, page_table, entry); 18101da177e4SLinus Torvalds 18111da177e4SLinus Torvalds /* No need to invalidate - it was non-present before */ 181265500d23SHugh Dickins update_mmu_cache(vma, address, entry); 18131da177e4SLinus Torvalds lazy_mmu_prot_update(entry); 181465500d23SHugh Dickins unlock: 181565500d23SHugh Dickins pte_unmap(page_table); 18161da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 18171da177e4SLinus Torvalds return VM_FAULT_MINOR; 181865500d23SHugh Dickins oom: 18191da177e4SLinus Torvalds return VM_FAULT_OOM; 18201da177e4SLinus Torvalds } 18211da177e4SLinus Torvalds 18221da177e4SLinus Torvalds /* 18231da177e4SLinus Torvalds * do_no_page() tries to create a new page mapping. It aggressively 18241da177e4SLinus Torvalds * tries to share with existing pages, but makes a separate copy if 18251da177e4SLinus Torvalds * the "write_access" parameter is true in order to avoid the next 18261da177e4SLinus Torvalds * page fault. 18271da177e4SLinus Torvalds * 18281da177e4SLinus Torvalds * As this is called only for pages that do not currently exist, we 18291da177e4SLinus Torvalds * do not need to flush old virtual caches or the TLB. 18301da177e4SLinus Torvalds * 18311da177e4SLinus Torvalds * This is called with the MM semaphore held and the page table 18321da177e4SLinus Torvalds * spinlock held. Exit with the spinlock released. 18331da177e4SLinus Torvalds */ 183465500d23SHugh Dickins static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 183565500d23SHugh Dickins unsigned long address, pte_t *page_table, pmd_t *pmd, 183665500d23SHugh Dickins int write_access) 18371da177e4SLinus Torvalds { 18381da177e4SLinus Torvalds struct page *new_page; 18391da177e4SLinus Torvalds struct address_space *mapping = NULL; 18401da177e4SLinus Torvalds pte_t entry; 18411da177e4SLinus Torvalds unsigned int sequence = 0; 18421da177e4SLinus Torvalds int ret = VM_FAULT_MINOR; 18431da177e4SLinus Torvalds int anon = 0; 18441da177e4SLinus Torvalds 18451da177e4SLinus Torvalds pte_unmap(page_table); 18461da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 18471da177e4SLinus Torvalds 18481da177e4SLinus Torvalds if (vma->vm_file) { 18491da177e4SLinus Torvalds mapping = vma->vm_file->f_mapping; 18501da177e4SLinus Torvalds sequence = mapping->truncate_count; 18511da177e4SLinus Torvalds smp_rmb(); /* serializes i_size against truncate_count */ 18521da177e4SLinus Torvalds } 18531da177e4SLinus Torvalds retry: 18541da177e4SLinus Torvalds new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); 18551da177e4SLinus Torvalds /* 18561da177e4SLinus Torvalds * No smp_rmb is needed here as long as there's a full 18571da177e4SLinus Torvalds * spin_lock/unlock sequence inside the ->nopage callback 18581da177e4SLinus Torvalds * (for the pagecache lookup) that acts as an implicit 18591da177e4SLinus Torvalds * smp_mb() and prevents the i_size read to happen 18601da177e4SLinus Torvalds * after the next truncate_count read. 18611da177e4SLinus Torvalds */ 18621da177e4SLinus Torvalds 18631da177e4SLinus Torvalds /* no page was available -- either SIGBUS or OOM */ 18641da177e4SLinus Torvalds if (new_page == NOPAGE_SIGBUS) 18651da177e4SLinus Torvalds return VM_FAULT_SIGBUS; 18661da177e4SLinus Torvalds if (new_page == NOPAGE_OOM) 18671da177e4SLinus Torvalds return VM_FAULT_OOM; 18681da177e4SLinus Torvalds 18691da177e4SLinus Torvalds /* 18701da177e4SLinus Torvalds * Should we do an early C-O-W break? 18711da177e4SLinus Torvalds */ 18721da177e4SLinus Torvalds if (write_access && !(vma->vm_flags & VM_SHARED)) { 18731da177e4SLinus Torvalds struct page *page; 18741da177e4SLinus Torvalds 18751da177e4SLinus Torvalds if (unlikely(anon_vma_prepare(vma))) 18761da177e4SLinus Torvalds goto oom; 18771da177e4SLinus Torvalds page = alloc_page_vma(GFP_HIGHUSER, vma, address); 18781da177e4SLinus Torvalds if (!page) 18791da177e4SLinus Torvalds goto oom; 18801da177e4SLinus Torvalds copy_user_highpage(page, new_page, address); 18811da177e4SLinus Torvalds page_cache_release(new_page); 18821da177e4SLinus Torvalds new_page = page; 18831da177e4SLinus Torvalds anon = 1; 18841da177e4SLinus Torvalds } 18851da177e4SLinus Torvalds 18861da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 18871da177e4SLinus Torvalds /* 18881da177e4SLinus Torvalds * For a file-backed vma, someone could have truncated or otherwise 18891da177e4SLinus Torvalds * invalidated this page. If unmap_mapping_range got called, 18901da177e4SLinus Torvalds * retry getting the page. 18911da177e4SLinus Torvalds */ 18921da177e4SLinus Torvalds if (mapping && unlikely(sequence != mapping->truncate_count)) { 18931da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 18941da177e4SLinus Torvalds page_cache_release(new_page); 189565500d23SHugh Dickins cond_resched(); 189665500d23SHugh Dickins sequence = mapping->truncate_count; 189765500d23SHugh Dickins smp_rmb(); 18981da177e4SLinus Torvalds goto retry; 18991da177e4SLinus Torvalds } 19001da177e4SLinus Torvalds page_table = pte_offset_map(pmd, address); 19011da177e4SLinus Torvalds 19021da177e4SLinus Torvalds /* 19031da177e4SLinus Torvalds * This silly early PAGE_DIRTY setting removes a race 19041da177e4SLinus Torvalds * due to the bad i386 page protection. But it's valid 19051da177e4SLinus Torvalds * for other architectures too. 19061da177e4SLinus Torvalds * 19071da177e4SLinus Torvalds * Note that if write_access is true, we either now have 19081da177e4SLinus Torvalds * an exclusive copy of the page, or this is a shared mapping, 19091da177e4SLinus Torvalds * so we can make it writable and dirty to avoid having to 19101da177e4SLinus Torvalds * handle that later. 19111da177e4SLinus Torvalds */ 19121da177e4SLinus Torvalds /* Only go through if we didn't race with anybody else... */ 19131da177e4SLinus Torvalds if (pte_none(*page_table)) { 19141da177e4SLinus Torvalds flush_icache_page(vma, new_page); 19151da177e4SLinus Torvalds entry = mk_pte(new_page, vma->vm_page_prot); 19161da177e4SLinus Torvalds if (write_access) 19171da177e4SLinus Torvalds entry = maybe_mkwrite(pte_mkdirty(entry), vma); 19181da177e4SLinus Torvalds set_pte_at(mm, address, page_table, entry); 19191da177e4SLinus Torvalds if (anon) { 19204294621fSHugh Dickins inc_mm_counter(mm, anon_rss); 19211da177e4SLinus Torvalds lru_cache_add_active(new_page); 19221da177e4SLinus Torvalds page_add_anon_rmap(new_page, vma, address); 1923b5810039SNick Piggin } else if (!(vma->vm_flags & VM_RESERVED)) { 19244294621fSHugh Dickins inc_mm_counter(mm, file_rss); 19251da177e4SLinus Torvalds page_add_file_rmap(new_page); 19264294621fSHugh Dickins } 19271da177e4SLinus Torvalds } else { 19281da177e4SLinus Torvalds /* One of our sibling threads was faster, back out. */ 19291da177e4SLinus Torvalds page_cache_release(new_page); 193065500d23SHugh Dickins goto unlock; 19311da177e4SLinus Torvalds } 19321da177e4SLinus Torvalds 19331da177e4SLinus Torvalds /* no need to invalidate: a not-present page shouldn't be cached */ 19341da177e4SLinus Torvalds update_mmu_cache(vma, address, entry); 19351da177e4SLinus Torvalds lazy_mmu_prot_update(entry); 193665500d23SHugh Dickins unlock: 193765500d23SHugh Dickins pte_unmap(page_table); 19381da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 19391da177e4SLinus Torvalds return ret; 19401da177e4SLinus Torvalds oom: 19411da177e4SLinus Torvalds page_cache_release(new_page); 194265500d23SHugh Dickins return VM_FAULT_OOM; 19431da177e4SLinus Torvalds } 19441da177e4SLinus Torvalds 19451da177e4SLinus Torvalds /* 19461da177e4SLinus Torvalds * Fault of a previously existing named mapping. Repopulate the pte 19471da177e4SLinus Torvalds * from the encoded file_pte if possible. This enables swappable 19481da177e4SLinus Torvalds * nonlinear vmas. 19491da177e4SLinus Torvalds */ 19501da177e4SLinus Torvalds static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, 195165500d23SHugh Dickins unsigned long address, pte_t *page_table, pmd_t *pmd, 195265500d23SHugh Dickins int write_access, pte_t orig_pte) 19531da177e4SLinus Torvalds { 195465500d23SHugh Dickins pgoff_t pgoff; 19551da177e4SLinus Torvalds int err; 19561da177e4SLinus Torvalds 195765500d23SHugh Dickins pte_unmap(page_table); 19581da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 19591da177e4SLinus Torvalds 196065500d23SHugh Dickins if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { 196165500d23SHugh Dickins /* 196265500d23SHugh Dickins * Page table corrupted: show pte and kill process. 196365500d23SHugh Dickins */ 1964b5810039SNick Piggin print_bad_pte(vma, orig_pte, address); 196565500d23SHugh Dickins return VM_FAULT_OOM; 196665500d23SHugh Dickins } 196765500d23SHugh Dickins /* We can then assume vm->vm_ops && vma->vm_ops->populate */ 196865500d23SHugh Dickins 196965500d23SHugh Dickins pgoff = pte_to_pgoff(orig_pte); 197065500d23SHugh Dickins err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, 197165500d23SHugh Dickins vma->vm_page_prot, pgoff, 0); 19721da177e4SLinus Torvalds if (err == -ENOMEM) 19731da177e4SLinus Torvalds return VM_FAULT_OOM; 19741da177e4SLinus Torvalds if (err) 19751da177e4SLinus Torvalds return VM_FAULT_SIGBUS; 19761da177e4SLinus Torvalds return VM_FAULT_MAJOR; 19771da177e4SLinus Torvalds } 19781da177e4SLinus Torvalds 19791da177e4SLinus Torvalds /* 19801da177e4SLinus Torvalds * These routines also need to handle stuff like marking pages dirty 19811da177e4SLinus Torvalds * and/or accessed for architectures that don't do it in hardware (most 19821da177e4SLinus Torvalds * RISC architectures). The early dirtying is also good on the i386. 19831da177e4SLinus Torvalds * 19841da177e4SLinus Torvalds * There is also a hook called "update_mmu_cache()" that architectures 19851da177e4SLinus Torvalds * with external mmu caches can use to update those (ie the Sparc or 19861da177e4SLinus Torvalds * PowerPC hashed page tables that act as extended TLBs). 19871da177e4SLinus Torvalds * 19881da177e4SLinus Torvalds * Note the "page_table_lock". It is to protect against kswapd removing 19891da177e4SLinus Torvalds * pages from under us. Note that kswapd only ever _removes_ pages, never 19901da177e4SLinus Torvalds * adds them. As such, once we have noticed that the page is not present, 19911da177e4SLinus Torvalds * we can drop the lock early. 19921da177e4SLinus Torvalds * 19931da177e4SLinus Torvalds * The adding of pages is protected by the MM semaphore (which we hold), 19941da177e4SLinus Torvalds * so we don't need to worry about a page being suddenly been added into 19951da177e4SLinus Torvalds * our VM. 19961da177e4SLinus Torvalds * 19971da177e4SLinus Torvalds * We enter with the pagetable spinlock held, we are supposed to 19981da177e4SLinus Torvalds * release it when done. 19991da177e4SLinus Torvalds */ 20001da177e4SLinus Torvalds static inline int handle_pte_fault(struct mm_struct *mm, 20011da177e4SLinus Torvalds struct vm_area_struct *vma, unsigned long address, 200265500d23SHugh Dickins pte_t *pte, pmd_t *pmd, int write_access) 20031da177e4SLinus Torvalds { 20041da177e4SLinus Torvalds pte_t entry; 20051da177e4SLinus Torvalds 20061da177e4SLinus Torvalds entry = *pte; 20071da177e4SLinus Torvalds if (!pte_present(entry)) { 200865500d23SHugh Dickins if (pte_none(entry)) { 200965500d23SHugh Dickins if (!vma->vm_ops || !vma->vm_ops->nopage) 201065500d23SHugh Dickins return do_anonymous_page(mm, vma, address, 201165500d23SHugh Dickins pte, pmd, write_access); 201265500d23SHugh Dickins return do_no_page(mm, vma, address, 201365500d23SHugh Dickins pte, pmd, write_access); 201465500d23SHugh Dickins } 20151da177e4SLinus Torvalds if (pte_file(entry)) 201665500d23SHugh Dickins return do_file_page(mm, vma, address, 201765500d23SHugh Dickins pte, pmd, write_access, entry); 201865500d23SHugh Dickins return do_swap_page(mm, vma, address, 201965500d23SHugh Dickins pte, pmd, write_access, entry); 20201da177e4SLinus Torvalds } 20211da177e4SLinus Torvalds 20221da177e4SLinus Torvalds if (write_access) { 20231da177e4SLinus Torvalds if (!pte_write(entry)) 20241da177e4SLinus Torvalds return do_wp_page(mm, vma, address, pte, pmd, entry); 20251da177e4SLinus Torvalds entry = pte_mkdirty(entry); 20261da177e4SLinus Torvalds } 20271da177e4SLinus Torvalds entry = pte_mkyoung(entry); 20281da177e4SLinus Torvalds ptep_set_access_flags(vma, address, pte, entry, write_access); 20291da177e4SLinus Torvalds update_mmu_cache(vma, address, entry); 20301da177e4SLinus Torvalds lazy_mmu_prot_update(entry); 20311da177e4SLinus Torvalds pte_unmap(pte); 20321da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 20331da177e4SLinus Torvalds return VM_FAULT_MINOR; 20341da177e4SLinus Torvalds } 20351da177e4SLinus Torvalds 20361da177e4SLinus Torvalds /* 20371da177e4SLinus Torvalds * By the time we get here, we already hold the mm semaphore 20381da177e4SLinus Torvalds */ 2039f33ea7f4SNick Piggin int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 20401da177e4SLinus Torvalds unsigned long address, int write_access) 20411da177e4SLinus Torvalds { 20421da177e4SLinus Torvalds pgd_t *pgd; 20431da177e4SLinus Torvalds pud_t *pud; 20441da177e4SLinus Torvalds pmd_t *pmd; 20451da177e4SLinus Torvalds pte_t *pte; 20461da177e4SLinus Torvalds 20471da177e4SLinus Torvalds __set_current_state(TASK_RUNNING); 20481da177e4SLinus Torvalds 20491da177e4SLinus Torvalds inc_page_state(pgfault); 20501da177e4SLinus Torvalds 2051ac9b9c66SHugh Dickins if (unlikely(is_vm_hugetlb_page(vma))) 2052ac9b9c66SHugh Dickins return hugetlb_fault(mm, vma, address, write_access); 20531da177e4SLinus Torvalds 20541da177e4SLinus Torvalds /* 20551da177e4SLinus Torvalds * We need the page table lock to synchronize with kswapd 20561da177e4SLinus Torvalds * and the SMP-safe atomic PTE updates. 20571da177e4SLinus Torvalds */ 20581da177e4SLinus Torvalds pgd = pgd_offset(mm, address); 20591da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 20601da177e4SLinus Torvalds 20611da177e4SLinus Torvalds pud = pud_alloc(mm, pgd, address); 20621da177e4SLinus Torvalds if (!pud) 20631da177e4SLinus Torvalds goto oom; 20641da177e4SLinus Torvalds 20651da177e4SLinus Torvalds pmd = pmd_alloc(mm, pud, address); 20661da177e4SLinus Torvalds if (!pmd) 20671da177e4SLinus Torvalds goto oom; 20681da177e4SLinus Torvalds 20691da177e4SLinus Torvalds pte = pte_alloc_map(mm, pmd, address); 20701da177e4SLinus Torvalds if (!pte) 20711da177e4SLinus Torvalds goto oom; 20721da177e4SLinus Torvalds 207365500d23SHugh Dickins return handle_pte_fault(mm, vma, address, pte, pmd, write_access); 20741da177e4SLinus Torvalds 20751da177e4SLinus Torvalds oom: 20761da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 20771da177e4SLinus Torvalds return VM_FAULT_OOM; 20781da177e4SLinus Torvalds } 20791da177e4SLinus Torvalds 20801da177e4SLinus Torvalds #ifndef __PAGETABLE_PUD_FOLDED 20811da177e4SLinus Torvalds /* 20821da177e4SLinus Torvalds * Allocate page upper directory. 2083872fec16SHugh Dickins * We've already handled the fast-path in-line. 20841da177e4SLinus Torvalds */ 2085*1bb3630eSHugh Dickins int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 20861da177e4SLinus Torvalds { 20871da177e4SLinus Torvalds pud_t *new; 20881da177e4SLinus Torvalds 2089872fec16SHugh Dickins if (mm != &init_mm) /* Temporary bridging hack */ 20901da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 20911da177e4SLinus Torvalds new = pud_alloc_one(mm, address); 2092872fec16SHugh Dickins if (!new) { 2093872fec16SHugh Dickins if (mm != &init_mm) /* Temporary bridging hack */ 20941da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 2095*1bb3630eSHugh Dickins return -ENOMEM; 2096872fec16SHugh Dickins } 20971da177e4SLinus Torvalds 2098872fec16SHugh Dickins spin_lock(&mm->page_table_lock); 2099*1bb3630eSHugh Dickins if (pgd_present(*pgd)) /* Another has populated it */ 21001da177e4SLinus Torvalds pud_free(new); 2101*1bb3630eSHugh Dickins else 21021da177e4SLinus Torvalds pgd_populate(mm, pgd, new); 2103872fec16SHugh Dickins if (mm == &init_mm) /* Temporary bridging hack */ 2104872fec16SHugh Dickins spin_unlock(&mm->page_table_lock); 2105*1bb3630eSHugh Dickins return 0; 21061da177e4SLinus Torvalds } 21071da177e4SLinus Torvalds #endif /* __PAGETABLE_PUD_FOLDED */ 21081da177e4SLinus Torvalds 21091da177e4SLinus Torvalds #ifndef __PAGETABLE_PMD_FOLDED 21101da177e4SLinus Torvalds /* 21111da177e4SLinus Torvalds * Allocate page middle directory. 2112872fec16SHugh Dickins * We've already handled the fast-path in-line. 21131da177e4SLinus Torvalds */ 2114*1bb3630eSHugh Dickins int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 21151da177e4SLinus Torvalds { 21161da177e4SLinus Torvalds pmd_t *new; 21171da177e4SLinus Torvalds 2118872fec16SHugh Dickins if (mm != &init_mm) /* Temporary bridging hack */ 21191da177e4SLinus Torvalds spin_unlock(&mm->page_table_lock); 21201da177e4SLinus Torvalds new = pmd_alloc_one(mm, address); 2121872fec16SHugh Dickins if (!new) { 2122872fec16SHugh Dickins if (mm != &init_mm) /* Temporary bridging hack */ 21231da177e4SLinus Torvalds spin_lock(&mm->page_table_lock); 2124*1bb3630eSHugh Dickins return -ENOMEM; 2125872fec16SHugh Dickins } 21261da177e4SLinus Torvalds 2127872fec16SHugh Dickins spin_lock(&mm->page_table_lock); 21281da177e4SLinus Torvalds #ifndef __ARCH_HAS_4LEVEL_HACK 2129*1bb3630eSHugh Dickins if (pud_present(*pud)) /* Another has populated it */ 21301da177e4SLinus Torvalds pmd_free(new); 2131*1bb3630eSHugh Dickins else 21321da177e4SLinus Torvalds pud_populate(mm, pud, new); 21331da177e4SLinus Torvalds #else 2134*1bb3630eSHugh Dickins if (pgd_present(*pud)) /* Another has populated it */ 21351da177e4SLinus Torvalds pmd_free(new); 2136*1bb3630eSHugh Dickins else 21371da177e4SLinus Torvalds pgd_populate(mm, pud, new); 21381da177e4SLinus Torvalds #endif /* __ARCH_HAS_4LEVEL_HACK */ 2139872fec16SHugh Dickins if (mm == &init_mm) /* Temporary bridging hack */ 2140872fec16SHugh Dickins spin_unlock(&mm->page_table_lock); 2141*1bb3630eSHugh Dickins return 0; 21421da177e4SLinus Torvalds } 21431da177e4SLinus Torvalds #endif /* __PAGETABLE_PMD_FOLDED */ 21441da177e4SLinus Torvalds 21451da177e4SLinus Torvalds int make_pages_present(unsigned long addr, unsigned long end) 21461da177e4SLinus Torvalds { 21471da177e4SLinus Torvalds int ret, len, write; 21481da177e4SLinus Torvalds struct vm_area_struct * vma; 21491da177e4SLinus Torvalds 21501da177e4SLinus Torvalds vma = find_vma(current->mm, addr); 21511da177e4SLinus Torvalds if (!vma) 21521da177e4SLinus Torvalds return -1; 21531da177e4SLinus Torvalds write = (vma->vm_flags & VM_WRITE) != 0; 21541da177e4SLinus Torvalds if (addr >= end) 21551da177e4SLinus Torvalds BUG(); 21561da177e4SLinus Torvalds if (end > vma->vm_end) 21571da177e4SLinus Torvalds BUG(); 21581da177e4SLinus Torvalds len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; 21591da177e4SLinus Torvalds ret = get_user_pages(current, current->mm, addr, 21601da177e4SLinus Torvalds len, write, 0, NULL, NULL); 21611da177e4SLinus Torvalds if (ret < 0) 21621da177e4SLinus Torvalds return ret; 21631da177e4SLinus Torvalds return ret == len ? 0 : -1; 21641da177e4SLinus Torvalds } 21651da177e4SLinus Torvalds 21661da177e4SLinus Torvalds /* 21671da177e4SLinus Torvalds * Map a vmalloc()-space virtual address to the physical page. 21681da177e4SLinus Torvalds */ 21691da177e4SLinus Torvalds struct page * vmalloc_to_page(void * vmalloc_addr) 21701da177e4SLinus Torvalds { 21711da177e4SLinus Torvalds unsigned long addr = (unsigned long) vmalloc_addr; 21721da177e4SLinus Torvalds struct page *page = NULL; 21731da177e4SLinus Torvalds pgd_t *pgd = pgd_offset_k(addr); 21741da177e4SLinus Torvalds pud_t *pud; 21751da177e4SLinus Torvalds pmd_t *pmd; 21761da177e4SLinus Torvalds pte_t *ptep, pte; 21771da177e4SLinus Torvalds 21781da177e4SLinus Torvalds if (!pgd_none(*pgd)) { 21791da177e4SLinus Torvalds pud = pud_offset(pgd, addr); 21801da177e4SLinus Torvalds if (!pud_none(*pud)) { 21811da177e4SLinus Torvalds pmd = pmd_offset(pud, addr); 21821da177e4SLinus Torvalds if (!pmd_none(*pmd)) { 21831da177e4SLinus Torvalds ptep = pte_offset_map(pmd, addr); 21841da177e4SLinus Torvalds pte = *ptep; 21851da177e4SLinus Torvalds if (pte_present(pte)) 21861da177e4SLinus Torvalds page = pte_page(pte); 21871da177e4SLinus Torvalds pte_unmap(ptep); 21881da177e4SLinus Torvalds } 21891da177e4SLinus Torvalds } 21901da177e4SLinus Torvalds } 21911da177e4SLinus Torvalds return page; 21921da177e4SLinus Torvalds } 21931da177e4SLinus Torvalds 21941da177e4SLinus Torvalds EXPORT_SYMBOL(vmalloc_to_page); 21951da177e4SLinus Torvalds 21961da177e4SLinus Torvalds /* 21971da177e4SLinus Torvalds * Map a vmalloc()-space virtual address to the physical page frame number. 21981da177e4SLinus Torvalds */ 21991da177e4SLinus Torvalds unsigned long vmalloc_to_pfn(void * vmalloc_addr) 22001da177e4SLinus Torvalds { 22011da177e4SLinus Torvalds return page_to_pfn(vmalloc_to_page(vmalloc_addr)); 22021da177e4SLinus Torvalds } 22031da177e4SLinus Torvalds 22041da177e4SLinus Torvalds EXPORT_SYMBOL(vmalloc_to_pfn); 22051da177e4SLinus Torvalds 22061da177e4SLinus Torvalds #if !defined(__HAVE_ARCH_GATE_AREA) 22071da177e4SLinus Torvalds 22081da177e4SLinus Torvalds #if defined(AT_SYSINFO_EHDR) 22095ce7852cSAdrian Bunk static struct vm_area_struct gate_vma; 22101da177e4SLinus Torvalds 22111da177e4SLinus Torvalds static int __init gate_vma_init(void) 22121da177e4SLinus Torvalds { 22131da177e4SLinus Torvalds gate_vma.vm_mm = NULL; 22141da177e4SLinus Torvalds gate_vma.vm_start = FIXADDR_USER_START; 22151da177e4SLinus Torvalds gate_vma.vm_end = FIXADDR_USER_END; 22161da177e4SLinus Torvalds gate_vma.vm_page_prot = PAGE_READONLY; 2217b5810039SNick Piggin gate_vma.vm_flags = VM_RESERVED; 22181da177e4SLinus Torvalds return 0; 22191da177e4SLinus Torvalds } 22201da177e4SLinus Torvalds __initcall(gate_vma_init); 22211da177e4SLinus Torvalds #endif 22221da177e4SLinus Torvalds 22231da177e4SLinus Torvalds struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 22241da177e4SLinus Torvalds { 22251da177e4SLinus Torvalds #ifdef AT_SYSINFO_EHDR 22261da177e4SLinus Torvalds return &gate_vma; 22271da177e4SLinus Torvalds #else 22281da177e4SLinus Torvalds return NULL; 22291da177e4SLinus Torvalds #endif 22301da177e4SLinus Torvalds } 22311da177e4SLinus Torvalds 22321da177e4SLinus Torvalds int in_gate_area_no_task(unsigned long addr) 22331da177e4SLinus Torvalds { 22341da177e4SLinus Torvalds #ifdef AT_SYSINFO_EHDR 22351da177e4SLinus Torvalds if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) 22361da177e4SLinus Torvalds return 1; 22371da177e4SLinus Torvalds #endif 22381da177e4SLinus Torvalds return 0; 22391da177e4SLinus Torvalds } 22401da177e4SLinus Torvalds 22411da177e4SLinus Torvalds #endif /* __HAVE_ARCH_GATE_AREA */ 2242