1 #include <linux/sched.h> 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/mm.h> 5 #include <linux/nmi.h> 6 #include <linux/swap.h> 7 #include <linux/smp.h> 8 #include <linux/highmem.h> 9 #include <linux/slab.h> 10 #include <linux/pagemap.h> 11 #include <linux/spinlock.h> 12 #include <linux/module.h> 13 #include <linux/quicklist.h> 14 15 #include <asm/system.h> 16 #include <asm/pgtable.h> 17 #include <asm/pgalloc.h> 18 #include <asm/fixmap.h> 19 #include <asm/e820.h> 20 #include <asm/tlb.h> 21 #include <asm/tlbflush.h> 22 23 void show_mem(void) 24 { 25 int total = 0, reserved = 0; 26 int shared = 0, cached = 0; 27 int highmem = 0; 28 struct page *page; 29 pg_data_t *pgdat; 30 unsigned long i; 31 unsigned long flags; 32 33 printk(KERN_INFO "Mem-info:\n"); 34 show_free_areas(); 35 for_each_online_pgdat(pgdat) { 36 pgdat_resize_lock(pgdat, &flags); 37 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 38 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) 39 touch_nmi_watchdog(); 40 page = pgdat_page_nr(pgdat, i); 41 total++; 42 if (PageHighMem(page)) 43 highmem++; 44 if (PageReserved(page)) 45 reserved++; 46 else if (PageSwapCache(page)) 47 cached++; 48 else if (page_count(page)) 49 shared += page_count(page) - 1; 50 } 51 pgdat_resize_unlock(pgdat, &flags); 52 } 53 printk(KERN_INFO "%d pages of RAM\n", total); 54 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); 55 printk(KERN_INFO "%d reserved pages\n", reserved); 56 printk(KERN_INFO "%d pages shared\n", shared); 57 printk(KERN_INFO "%d pages swap cached\n", cached); 58 59 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); 60 printk(KERN_INFO "%lu pages writeback\n", 61 global_page_state(NR_WRITEBACK)); 62 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); 63 printk(KERN_INFO "%lu pages slab\n", 64 global_page_state(NR_SLAB_RECLAIMABLE) + 65 global_page_state(NR_SLAB_UNRECLAIMABLE)); 66 printk(KERN_INFO "%lu pages pagetables\n", 67 global_page_state(NR_PAGETABLE)); 68 } 69 70 /* 71 * Associate a virtual page frame with a given physical page frame 72 * and protection flags for that frame. 73 */ 74 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 75 { 76 pgd_t *pgd; 77 pud_t *pud; 78 pmd_t *pmd; 79 pte_t *pte; 80 81 pgd = swapper_pg_dir + pgd_index(vaddr); 82 if (pgd_none(*pgd)) { 83 BUG(); 84 return; 85 } 86 pud = pud_offset(pgd, vaddr); 87 if (pud_none(*pud)) { 88 BUG(); 89 return; 90 } 91 pmd = pmd_offset(pud, vaddr); 92 if (pmd_none(*pmd)) { 93 BUG(); 94 return; 95 } 96 pte = pte_offset_kernel(pmd, vaddr); 97 if (pgprot_val(flags)) 98 set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags)); 99 else 100 pte_clear(&init_mm, vaddr, pte); 101 102 /* 103 * It's enough to flush this one mapping. 104 * (PGE mappings get flushed as well) 105 */ 106 __flush_tlb_one(vaddr); 107 } 108 109 /* 110 * Associate a large virtual page frame with a given physical page frame 111 * and protection flags for that frame. pfn is for the base of the page, 112 * vaddr is what the page gets mapped to - both must be properly aligned. 113 * The pmd must already be instantiated. Assumes PAE mode. 114 */ 115 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 116 { 117 pgd_t *pgd; 118 pud_t *pud; 119 pmd_t *pmd; 120 121 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ 122 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); 123 return; /* BUG(); */ 124 } 125 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ 126 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); 127 return; /* BUG(); */ 128 } 129 pgd = swapper_pg_dir + pgd_index(vaddr); 130 if (pgd_none(*pgd)) { 131 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); 132 return; /* BUG(); */ 133 } 134 pud = pud_offset(pgd, vaddr); 135 pmd = pmd_offset(pud, vaddr); 136 set_pmd(pmd, pfn_pmd(pfn, flags)); 137 /* 138 * It's enough to flush this one mapping. 139 * (PGE mappings get flushed as well) 140 */ 141 __flush_tlb_one(vaddr); 142 } 143 144 static int fixmaps; 145 unsigned long __FIXADDR_TOP = 0xfffff000; 146 EXPORT_SYMBOL(__FIXADDR_TOP); 147 148 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 149 { 150 unsigned long address = __fix_to_virt(idx); 151 152 if (idx >= __end_of_fixed_addresses) { 153 BUG(); 154 return; 155 } 156 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 157 fixmaps++; 158 } 159 160 /** 161 * reserve_top_address - reserves a hole in the top of kernel address space 162 * @reserve - size of hole to reserve 163 * 164 * Can be used to relocate the fixmap area and poke a hole in the top 165 * of kernel address space to make room for a hypervisor. 166 */ 167 void reserve_top_address(unsigned long reserve) 168 { 169 BUG_ON(fixmaps > 0); 170 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 171 (int)-reserve); 172 __FIXADDR_TOP = -reserve - PAGE_SIZE; 173 __VMALLOC_RESERVE += reserve; 174 } 175 176 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 177 { 178 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 179 } 180 181 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) 182 { 183 struct page *pte; 184 185 #ifdef CONFIG_HIGHPTE 186 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); 187 #else 188 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 189 #endif 190 if (pte) 191 pgtable_page_ctor(pte); 192 return pte; 193 } 194 195 /* 196 * List of all pgd's needed for non-PAE so it can invalidate entries 197 * in both cached and uncached pgd's; not needed for PAE since the 198 * kernel pmd is shared. If PAE were not to share the pmd a similar 199 * tactic would be needed. This is essentially codepath-based locking 200 * against pageattr.c; it is the unique case in which a valid change 201 * of kernel pagetables can't be lazily synchronized by vmalloc faults. 202 * vmalloc faults work because attached pagetables are never freed. 203 * -- wli 204 */ 205 static inline void pgd_list_add(pgd_t *pgd) 206 { 207 struct page *page = virt_to_page(pgd); 208 209 list_add(&page->lru, &pgd_list); 210 } 211 212 static inline void pgd_list_del(pgd_t *pgd) 213 { 214 struct page *page = virt_to_page(pgd); 215 216 list_del(&page->lru); 217 } 218 219 #define UNSHARED_PTRS_PER_PGD \ 220 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) 221 222 static void pgd_ctor(void *p) 223 { 224 pgd_t *pgd = p; 225 unsigned long flags; 226 227 /* Clear usermode parts of PGD */ 228 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 229 230 spin_lock_irqsave(&pgd_lock, flags); 231 232 /* If the pgd points to a shared pagetable level (either the 233 ptes in non-PAE, or shared PMD in PAE), then just copy the 234 references from swapper_pg_dir. */ 235 if (PAGETABLE_LEVELS == 2 || 236 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { 237 clone_pgd_range(pgd + USER_PTRS_PER_PGD, 238 swapper_pg_dir + USER_PTRS_PER_PGD, 239 KERNEL_PGD_PTRS); 240 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 241 __pa(swapper_pg_dir) >> PAGE_SHIFT, 242 USER_PTRS_PER_PGD, 243 KERNEL_PGD_PTRS); 244 } 245 246 /* list required to sync kernel mapping updates */ 247 if (!SHARED_KERNEL_PMD) 248 pgd_list_add(pgd); 249 250 spin_unlock_irqrestore(&pgd_lock, flags); 251 } 252 253 static void pgd_dtor(void *pgd) 254 { 255 unsigned long flags; /* can be called from interrupt context */ 256 257 if (SHARED_KERNEL_PMD) 258 return; 259 260 spin_lock_irqsave(&pgd_lock, flags); 261 pgd_list_del(pgd); 262 spin_unlock_irqrestore(&pgd_lock, flags); 263 } 264 265 #ifdef CONFIG_X86_PAE 266 /* 267 * Mop up any pmd pages which may still be attached to the pgd. 268 * Normally they will be freed by munmap/exit_mmap, but any pmd we 269 * preallocate which never got a corresponding vma will need to be 270 * freed manually. 271 */ 272 static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) 273 { 274 int i; 275 276 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { 277 pgd_t pgd = pgdp[i]; 278 279 if (pgd_val(pgd) != 0) { 280 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); 281 282 pgdp[i] = native_make_pgd(0); 283 284 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); 285 pmd_free(mm, pmd); 286 } 287 } 288 } 289 290 /* 291 * In PAE mode, we need to do a cr3 reload (=tlb flush) when 292 * updating the top-level pagetable entries to guarantee the 293 * processor notices the update. Since this is expensive, and 294 * all 4 top-level entries are used almost immediately in a 295 * new process's life, we just pre-populate them here. 296 * 297 * Also, if we're in a paravirt environment where the kernel pmd is 298 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate 299 * and initialize the kernel pmds here. 300 */ 301 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) 302 { 303 pud_t *pud; 304 unsigned long addr; 305 int i; 306 307 pud = pud_offset(pgd, 0); 308 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; 309 i++, pud++, addr += PUD_SIZE) { 310 pmd_t *pmd = pmd_alloc_one(mm, addr); 311 312 if (!pmd) { 313 pgd_mop_up_pmds(mm, pgd); 314 return 0; 315 } 316 317 if (i >= USER_PTRS_PER_PGD) 318 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), 319 sizeof(pmd_t) * PTRS_PER_PMD); 320 321 pud_populate(mm, pud, pmd); 322 } 323 324 return 1; 325 } 326 #else /* !CONFIG_X86_PAE */ 327 /* No need to prepopulate any pagetable entries in non-PAE modes. */ 328 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) 329 { 330 return 1; 331 } 332 333 static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) 334 { 335 } 336 #endif /* CONFIG_X86_PAE */ 337 338 pgd_t *pgd_alloc(struct mm_struct *mm) 339 { 340 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); 341 342 /* so that alloc_pd can use it */ 343 mm->pgd = pgd; 344 if (pgd) 345 pgd_ctor(pgd); 346 347 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { 348 pgd_dtor(pgd); 349 free_page((unsigned long)pgd); 350 pgd = NULL; 351 } 352 353 return pgd; 354 } 355 356 void pgd_free(struct mm_struct *mm, pgd_t *pgd) 357 { 358 pgd_mop_up_pmds(mm, pgd); 359 pgd_dtor(pgd); 360 free_page((unsigned long)pgd); 361 } 362 363 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 364 { 365 pgtable_page_dtor(pte); 366 paravirt_release_pt(page_to_pfn(pte)); 367 tlb_remove_page(tlb, pte); 368 } 369 370 #ifdef CONFIG_X86_PAE 371 372 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) 373 { 374 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 375 tlb_remove_page(tlb, virt_to_page(pmd)); 376 } 377 378 #endif 379 380 int pmd_bad(pmd_t pmd) 381 { 382 WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd)); 383 384 return pmd_bad_v1(pmd); 385 } 386