1 /* 2 * linux/arch/i386/mm/pgtable.c 3 */ 4 5 #include <linux/sched.h> 6 #include <linux/kernel.h> 7 #include <linux/errno.h> 8 #include <linux/mm.h> 9 #include <linux/swap.h> 10 #include <linux/smp.h> 11 #include <linux/highmem.h> 12 #include <linux/slab.h> 13 #include <linux/pagemap.h> 14 #include <linux/spinlock.h> 15 #include <linux/module.h> 16 #include <linux/quicklist.h> 17 18 #include <asm/system.h> 19 #include <asm/pgtable.h> 20 #include <asm/pgalloc.h> 21 #include <asm/fixmap.h> 22 #include <asm/e820.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 26 void show_mem(void) 27 { 28 int total = 0, reserved = 0; 29 int shared = 0, cached = 0; 30 int highmem = 0; 31 struct page *page; 32 pg_data_t *pgdat; 33 unsigned long i; 34 unsigned long flags; 35 36 printk(KERN_INFO "Mem-info:\n"); 37 show_free_areas(); 38 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 39 for_each_online_pgdat(pgdat) { 40 pgdat_resize_lock(pgdat, &flags); 41 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 42 page = pgdat_page_nr(pgdat, i); 43 total++; 44 if (PageHighMem(page)) 45 highmem++; 46 if (PageReserved(page)) 47 reserved++; 48 else if (PageSwapCache(page)) 49 cached++; 50 else if (page_count(page)) 51 shared += page_count(page) - 1; 52 } 53 pgdat_resize_unlock(pgdat, &flags); 54 } 55 printk(KERN_INFO "%d pages of RAM\n", total); 56 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); 57 printk(KERN_INFO "%d reserved pages\n", reserved); 58 printk(KERN_INFO "%d pages shared\n", shared); 59 printk(KERN_INFO "%d pages swap cached\n", cached); 60 61 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); 62 printk(KERN_INFO "%lu pages writeback\n", 63 global_page_state(NR_WRITEBACK)); 64 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); 65 printk(KERN_INFO "%lu pages slab\n", 66 global_page_state(NR_SLAB_RECLAIMABLE) + 67 global_page_state(NR_SLAB_UNRECLAIMABLE)); 68 printk(KERN_INFO "%lu pages pagetables\n", 69 global_page_state(NR_PAGETABLE)); 70 } 71 72 /* 73 * Associate a virtual page frame with a given physical page frame 74 * and protection flags for that frame. 75 */ 76 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 77 { 78 pgd_t *pgd; 79 pud_t *pud; 80 pmd_t *pmd; 81 pte_t *pte; 82 83 pgd = swapper_pg_dir + pgd_index(vaddr); 84 if (pgd_none(*pgd)) { 85 BUG(); 86 return; 87 } 88 pud = pud_offset(pgd, vaddr); 89 if (pud_none(*pud)) { 90 BUG(); 91 return; 92 } 93 pmd = pmd_offset(pud, vaddr); 94 if (pmd_none(*pmd)) { 95 BUG(); 96 return; 97 } 98 pte = pte_offset_kernel(pmd, vaddr); 99 if (pgprot_val(flags)) 100 /* <pfn,flags> stored as-is, to permit clearing entries */ 101 set_pte(pte, pfn_pte(pfn, flags)); 102 else 103 pte_clear(&init_mm, vaddr, pte); 104 105 /* 106 * It's enough to flush this one mapping. 107 * (PGE mappings get flushed as well) 108 */ 109 __flush_tlb_one(vaddr); 110 } 111 112 /* 113 * Associate a large virtual page frame with a given physical page frame 114 * and protection flags for that frame. pfn is for the base of the page, 115 * vaddr is what the page gets mapped to - both must be properly aligned. 116 * The pmd must already be instantiated. Assumes PAE mode. 117 */ 118 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 119 { 120 pgd_t *pgd; 121 pud_t *pud; 122 pmd_t *pmd; 123 124 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ 125 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); 126 return; /* BUG(); */ 127 } 128 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ 129 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); 130 return; /* BUG(); */ 131 } 132 pgd = swapper_pg_dir + pgd_index(vaddr); 133 if (pgd_none(*pgd)) { 134 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); 135 return; /* BUG(); */ 136 } 137 pud = pud_offset(pgd, vaddr); 138 pmd = pmd_offset(pud, vaddr); 139 set_pmd(pmd, pfn_pmd(pfn, flags)); 140 /* 141 * It's enough to flush this one mapping. 142 * (PGE mappings get flushed as well) 143 */ 144 __flush_tlb_one(vaddr); 145 } 146 147 static int fixmaps; 148 unsigned long __FIXADDR_TOP = 0xfffff000; 149 EXPORT_SYMBOL(__FIXADDR_TOP); 150 151 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 152 { 153 unsigned long address = __fix_to_virt(idx); 154 155 if (idx >= __end_of_fixed_addresses) { 156 BUG(); 157 return; 158 } 159 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 160 fixmaps++; 161 } 162 163 /** 164 * reserve_top_address - reserves a hole in the top of kernel address space 165 * @reserve - size of hole to reserve 166 * 167 * Can be used to relocate the fixmap area and poke a hole in the top 168 * of kernel address space to make room for a hypervisor. 169 */ 170 void reserve_top_address(unsigned long reserve) 171 { 172 BUG_ON(fixmaps > 0); 173 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 174 (int)-reserve); 175 __FIXADDR_TOP = -reserve - PAGE_SIZE; 176 __VMALLOC_RESERVE += reserve; 177 } 178 179 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 180 { 181 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); 182 } 183 184 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 185 { 186 struct page *pte; 187 188 #ifdef CONFIG_HIGHPTE 189 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); 190 #else 191 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 192 #endif 193 return pte; 194 } 195 196 void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) 197 { 198 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); 199 } 200 201 /* 202 * List of all pgd's needed for non-PAE so it can invalidate entries 203 * in both cached and uncached pgd's; not needed for PAE since the 204 * kernel pmd is shared. If PAE were not to share the pmd a similar 205 * tactic would be needed. This is essentially codepath-based locking 206 * against pageattr.c; it is the unique case in which a valid change 207 * of kernel pagetables can't be lazily synchronized by vmalloc faults. 208 * vmalloc faults work because attached pagetables are never freed. 209 * -- wli 210 */ 211 DEFINE_SPINLOCK(pgd_lock); 212 struct page *pgd_list; 213 214 static inline void pgd_list_add(pgd_t *pgd) 215 { 216 struct page *page = virt_to_page(pgd); 217 page->index = (unsigned long)pgd_list; 218 if (pgd_list) 219 set_page_private(pgd_list, (unsigned long)&page->index); 220 pgd_list = page; 221 set_page_private(page, (unsigned long)&pgd_list); 222 } 223 224 static inline void pgd_list_del(pgd_t *pgd) 225 { 226 struct page *next, **pprev, *page = virt_to_page(pgd); 227 next = (struct page *)page->index; 228 pprev = (struct page **)page_private(page); 229 *pprev = next; 230 if (next) 231 set_page_private(next, (unsigned long)pprev); 232 } 233 234 235 236 #if (PTRS_PER_PMD == 1) 237 /* Non-PAE pgd constructor */ 238 static void pgd_ctor(void *pgd) 239 { 240 unsigned long flags; 241 242 /* !PAE, no pagetable sharing */ 243 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 244 245 spin_lock_irqsave(&pgd_lock, flags); 246 247 /* must happen under lock */ 248 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 249 swapper_pg_dir + USER_PTRS_PER_PGD, 250 KERNEL_PGD_PTRS); 251 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 252 __pa(swapper_pg_dir) >> PAGE_SHIFT, 253 USER_PTRS_PER_PGD, 254 KERNEL_PGD_PTRS); 255 pgd_list_add(pgd); 256 spin_unlock_irqrestore(&pgd_lock, flags); 257 } 258 #else /* PTRS_PER_PMD > 1 */ 259 /* PAE pgd constructor */ 260 static void pgd_ctor(void *pgd) 261 { 262 /* PAE, kernel PMD may be shared */ 263 264 if (SHARED_KERNEL_PMD) { 265 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 266 swapper_pg_dir + USER_PTRS_PER_PGD, 267 KERNEL_PGD_PTRS); 268 } else { 269 unsigned long flags; 270 271 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 272 spin_lock_irqsave(&pgd_lock, flags); 273 pgd_list_add(pgd); 274 spin_unlock_irqrestore(&pgd_lock, flags); 275 } 276 } 277 #endif /* PTRS_PER_PMD */ 278 279 static void pgd_dtor(void *pgd) 280 { 281 unsigned long flags; /* can be called from interrupt context */ 282 283 if (SHARED_KERNEL_PMD) 284 return; 285 286 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); 287 spin_lock_irqsave(&pgd_lock, flags); 288 pgd_list_del(pgd); 289 spin_unlock_irqrestore(&pgd_lock, flags); 290 } 291 292 #define UNSHARED_PTRS_PER_PGD \ 293 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) 294 295 /* If we allocate a pmd for part of the kernel address space, then 296 make sure its initialized with the appropriate kernel mappings. 297 Otherwise use a cached zeroed pmd. */ 298 static pmd_t *pmd_cache_alloc(int idx) 299 { 300 pmd_t *pmd; 301 302 if (idx >= USER_PTRS_PER_PGD) { 303 pmd = (pmd_t *)__get_free_page(GFP_KERNEL); 304 305 if (pmd) 306 memcpy(pmd, 307 (void *)pgd_page_vaddr(swapper_pg_dir[idx]), 308 sizeof(pmd_t) * PTRS_PER_PMD); 309 } else 310 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 311 312 return pmd; 313 } 314 315 static void pmd_cache_free(pmd_t *pmd, int idx) 316 { 317 if (idx >= USER_PTRS_PER_PGD) 318 free_page((unsigned long)pmd); 319 else 320 kmem_cache_free(pmd_cache, pmd); 321 } 322 323 pgd_t *pgd_alloc(struct mm_struct *mm) 324 { 325 int i; 326 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); 327 328 if (PTRS_PER_PMD == 1 || !pgd) 329 return pgd; 330 331 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { 332 pmd_t *pmd = pmd_cache_alloc(i); 333 334 if (!pmd) 335 goto out_oom; 336 337 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); 338 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 339 } 340 return pgd; 341 342 out_oom: 343 for (i--; i >= 0; i--) { 344 pgd_t pgdent = pgd[i]; 345 void* pmd = (void *)__va(pgd_val(pgdent)-1); 346 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 347 pmd_cache_free(pmd, i); 348 } 349 quicklist_free(0, pgd_dtor, pgd); 350 return NULL; 351 } 352 353 void pgd_free(pgd_t *pgd) 354 { 355 int i; 356 357 /* in the PAE case user pgd entries are overwritten before usage */ 358 if (PTRS_PER_PMD > 1) 359 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { 360 pgd_t pgdent = pgd[i]; 361 void* pmd = (void *)__va(pgd_val(pgdent)-1); 362 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 363 pmd_cache_free(pmd, i); 364 } 365 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 366 quicklist_free(0, pgd_dtor, pgd); 367 } 368 369 void check_pgt_cache(void) 370 { 371 quicklist_trim(0, pgd_dtor, 25, 16); 372 } 373 374