1*ad757b6aSThomas Gleixner /* 2*ad757b6aSThomas Gleixner * linux/arch/i386/mm/init.c 3*ad757b6aSThomas Gleixner * 4*ad757b6aSThomas Gleixner * Copyright (C) 1995 Linus Torvalds 5*ad757b6aSThomas Gleixner * 6*ad757b6aSThomas Gleixner * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 7*ad757b6aSThomas Gleixner */ 8*ad757b6aSThomas Gleixner 9*ad757b6aSThomas Gleixner #include <linux/module.h> 10*ad757b6aSThomas Gleixner #include <linux/signal.h> 11*ad757b6aSThomas Gleixner #include <linux/sched.h> 12*ad757b6aSThomas Gleixner #include <linux/kernel.h> 13*ad757b6aSThomas Gleixner #include <linux/errno.h> 14*ad757b6aSThomas Gleixner #include <linux/string.h> 15*ad757b6aSThomas Gleixner #include <linux/types.h> 16*ad757b6aSThomas Gleixner #include <linux/ptrace.h> 17*ad757b6aSThomas Gleixner #include <linux/mman.h> 18*ad757b6aSThomas Gleixner #include <linux/mm.h> 19*ad757b6aSThomas Gleixner #include <linux/hugetlb.h> 20*ad757b6aSThomas Gleixner #include <linux/swap.h> 21*ad757b6aSThomas Gleixner #include <linux/smp.h> 22*ad757b6aSThomas Gleixner #include <linux/init.h> 23*ad757b6aSThomas Gleixner #include <linux/highmem.h> 24*ad757b6aSThomas Gleixner #include <linux/pagemap.h> 25*ad757b6aSThomas Gleixner #include <linux/pfn.h> 26*ad757b6aSThomas Gleixner #include <linux/poison.h> 27*ad757b6aSThomas Gleixner #include <linux/bootmem.h> 28*ad757b6aSThomas Gleixner #include <linux/slab.h> 29*ad757b6aSThomas Gleixner #include <linux/proc_fs.h> 30*ad757b6aSThomas Gleixner #include <linux/efi.h> 31*ad757b6aSThomas Gleixner #include <linux/memory_hotplug.h> 32*ad757b6aSThomas Gleixner #include <linux/initrd.h> 33*ad757b6aSThomas Gleixner #include <linux/cpumask.h> 34*ad757b6aSThomas Gleixner 35*ad757b6aSThomas Gleixner #include <asm/processor.h> 36*ad757b6aSThomas Gleixner #include <asm/system.h> 37*ad757b6aSThomas Gleixner #include <asm/uaccess.h> 38*ad757b6aSThomas Gleixner #include <asm/pgtable.h> 39*ad757b6aSThomas Gleixner #include <asm/dma.h> 40*ad757b6aSThomas Gleixner #include <asm/fixmap.h> 41*ad757b6aSThomas Gleixner #include <asm/e820.h> 42*ad757b6aSThomas Gleixner #include <asm/apic.h> 43*ad757b6aSThomas Gleixner #include <asm/tlb.h> 44*ad757b6aSThomas Gleixner #include <asm/tlbflush.h> 45*ad757b6aSThomas Gleixner #include <asm/sections.h> 46*ad757b6aSThomas Gleixner #include <asm/paravirt.h> 47*ad757b6aSThomas Gleixner 48*ad757b6aSThomas Gleixner unsigned int __VMALLOC_RESERVE = 128 << 20; 49*ad757b6aSThomas Gleixner 50*ad757b6aSThomas Gleixner DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 51*ad757b6aSThomas Gleixner unsigned long highstart_pfn, highend_pfn; 52*ad757b6aSThomas Gleixner 53*ad757b6aSThomas Gleixner static int noinline do_test_wp_bit(void); 54*ad757b6aSThomas Gleixner 55*ad757b6aSThomas Gleixner /* 56*ad757b6aSThomas Gleixner * Creates a middle page table and puts a pointer to it in the 57*ad757b6aSThomas Gleixner * given global directory entry. This only returns the gd entry 58*ad757b6aSThomas Gleixner * in non-PAE compilation mode, since the middle layer is folded. 59*ad757b6aSThomas Gleixner */ 60*ad757b6aSThomas Gleixner static pmd_t * __init one_md_table_init(pgd_t *pgd) 61*ad757b6aSThomas Gleixner { 62*ad757b6aSThomas Gleixner pud_t *pud; 63*ad757b6aSThomas Gleixner pmd_t *pmd_table; 64*ad757b6aSThomas Gleixner 65*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 66*ad757b6aSThomas Gleixner if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 67*ad757b6aSThomas Gleixner pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 68*ad757b6aSThomas Gleixner 69*ad757b6aSThomas Gleixner paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); 70*ad757b6aSThomas Gleixner set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 71*ad757b6aSThomas Gleixner pud = pud_offset(pgd, 0); 72*ad757b6aSThomas Gleixner if (pmd_table != pmd_offset(pud, 0)) 73*ad757b6aSThomas Gleixner BUG(); 74*ad757b6aSThomas Gleixner } 75*ad757b6aSThomas Gleixner #endif 76*ad757b6aSThomas Gleixner pud = pud_offset(pgd, 0); 77*ad757b6aSThomas Gleixner pmd_table = pmd_offset(pud, 0); 78*ad757b6aSThomas Gleixner return pmd_table; 79*ad757b6aSThomas Gleixner } 80*ad757b6aSThomas Gleixner 81*ad757b6aSThomas Gleixner /* 82*ad757b6aSThomas Gleixner * Create a page table and place a pointer to it in a middle page 83*ad757b6aSThomas Gleixner * directory entry. 84*ad757b6aSThomas Gleixner */ 85*ad757b6aSThomas Gleixner static pte_t * __init one_page_table_init(pmd_t *pmd) 86*ad757b6aSThomas Gleixner { 87*ad757b6aSThomas Gleixner if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 88*ad757b6aSThomas Gleixner pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 89*ad757b6aSThomas Gleixner 90*ad757b6aSThomas Gleixner paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); 91*ad757b6aSThomas Gleixner set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 92*ad757b6aSThomas Gleixner BUG_ON(page_table != pte_offset_kernel(pmd, 0)); 93*ad757b6aSThomas Gleixner } 94*ad757b6aSThomas Gleixner 95*ad757b6aSThomas Gleixner return pte_offset_kernel(pmd, 0); 96*ad757b6aSThomas Gleixner } 97*ad757b6aSThomas Gleixner 98*ad757b6aSThomas Gleixner /* 99*ad757b6aSThomas Gleixner * This function initializes a certain range of kernel virtual memory 100*ad757b6aSThomas Gleixner * with new bootmem page tables, everywhere page tables are missing in 101*ad757b6aSThomas Gleixner * the given range. 102*ad757b6aSThomas Gleixner */ 103*ad757b6aSThomas Gleixner 104*ad757b6aSThomas Gleixner /* 105*ad757b6aSThomas Gleixner * NOTE: The pagetables are allocated contiguous on the physical space 106*ad757b6aSThomas Gleixner * so we can cache the place of the first one and move around without 107*ad757b6aSThomas Gleixner * checking the pgd every time. 108*ad757b6aSThomas Gleixner */ 109*ad757b6aSThomas Gleixner static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 110*ad757b6aSThomas Gleixner { 111*ad757b6aSThomas Gleixner pgd_t *pgd; 112*ad757b6aSThomas Gleixner pmd_t *pmd; 113*ad757b6aSThomas Gleixner int pgd_idx, pmd_idx; 114*ad757b6aSThomas Gleixner unsigned long vaddr; 115*ad757b6aSThomas Gleixner 116*ad757b6aSThomas Gleixner vaddr = start; 117*ad757b6aSThomas Gleixner pgd_idx = pgd_index(vaddr); 118*ad757b6aSThomas Gleixner pmd_idx = pmd_index(vaddr); 119*ad757b6aSThomas Gleixner pgd = pgd_base + pgd_idx; 120*ad757b6aSThomas Gleixner 121*ad757b6aSThomas Gleixner for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { 122*ad757b6aSThomas Gleixner pmd = one_md_table_init(pgd); 123*ad757b6aSThomas Gleixner pmd = pmd + pmd_index(vaddr); 124*ad757b6aSThomas Gleixner for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { 125*ad757b6aSThomas Gleixner one_page_table_init(pmd); 126*ad757b6aSThomas Gleixner 127*ad757b6aSThomas Gleixner vaddr += PMD_SIZE; 128*ad757b6aSThomas Gleixner } 129*ad757b6aSThomas Gleixner pmd_idx = 0; 130*ad757b6aSThomas Gleixner } 131*ad757b6aSThomas Gleixner } 132*ad757b6aSThomas Gleixner 133*ad757b6aSThomas Gleixner static inline int is_kernel_text(unsigned long addr) 134*ad757b6aSThomas Gleixner { 135*ad757b6aSThomas Gleixner if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 136*ad757b6aSThomas Gleixner return 1; 137*ad757b6aSThomas Gleixner return 0; 138*ad757b6aSThomas Gleixner } 139*ad757b6aSThomas Gleixner 140*ad757b6aSThomas Gleixner /* 141*ad757b6aSThomas Gleixner * This maps the physical memory to kernel virtual address space, a total 142*ad757b6aSThomas Gleixner * of max_low_pfn pages, by creating page tables starting from address 143*ad757b6aSThomas Gleixner * PAGE_OFFSET. 144*ad757b6aSThomas Gleixner */ 145*ad757b6aSThomas Gleixner static void __init kernel_physical_mapping_init(pgd_t *pgd_base) 146*ad757b6aSThomas Gleixner { 147*ad757b6aSThomas Gleixner unsigned long pfn; 148*ad757b6aSThomas Gleixner pgd_t *pgd; 149*ad757b6aSThomas Gleixner pmd_t *pmd; 150*ad757b6aSThomas Gleixner pte_t *pte; 151*ad757b6aSThomas Gleixner int pgd_idx, pmd_idx, pte_ofs; 152*ad757b6aSThomas Gleixner 153*ad757b6aSThomas Gleixner pgd_idx = pgd_index(PAGE_OFFSET); 154*ad757b6aSThomas Gleixner pgd = pgd_base + pgd_idx; 155*ad757b6aSThomas Gleixner pfn = 0; 156*ad757b6aSThomas Gleixner 157*ad757b6aSThomas Gleixner for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { 158*ad757b6aSThomas Gleixner pmd = one_md_table_init(pgd); 159*ad757b6aSThomas Gleixner if (pfn >= max_low_pfn) 160*ad757b6aSThomas Gleixner continue; 161*ad757b6aSThomas Gleixner for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { 162*ad757b6aSThomas Gleixner unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; 163*ad757b6aSThomas Gleixner 164*ad757b6aSThomas Gleixner /* Map with big pages if possible, otherwise create normal page tables. */ 165*ad757b6aSThomas Gleixner if (cpu_has_pse) { 166*ad757b6aSThomas Gleixner unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; 167*ad757b6aSThomas Gleixner if (is_kernel_text(address) || is_kernel_text(address2)) 168*ad757b6aSThomas Gleixner set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); 169*ad757b6aSThomas Gleixner else 170*ad757b6aSThomas Gleixner set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); 171*ad757b6aSThomas Gleixner 172*ad757b6aSThomas Gleixner pfn += PTRS_PER_PTE; 173*ad757b6aSThomas Gleixner } else { 174*ad757b6aSThomas Gleixner pte = one_page_table_init(pmd); 175*ad757b6aSThomas Gleixner 176*ad757b6aSThomas Gleixner for (pte_ofs = 0; 177*ad757b6aSThomas Gleixner pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; 178*ad757b6aSThomas Gleixner pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { 179*ad757b6aSThomas Gleixner if (is_kernel_text(address)) 180*ad757b6aSThomas Gleixner set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 181*ad757b6aSThomas Gleixner else 182*ad757b6aSThomas Gleixner set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); 183*ad757b6aSThomas Gleixner } 184*ad757b6aSThomas Gleixner } 185*ad757b6aSThomas Gleixner } 186*ad757b6aSThomas Gleixner } 187*ad757b6aSThomas Gleixner } 188*ad757b6aSThomas Gleixner 189*ad757b6aSThomas Gleixner static inline int page_kills_ppro(unsigned long pagenr) 190*ad757b6aSThomas Gleixner { 191*ad757b6aSThomas Gleixner if (pagenr >= 0x70000 && pagenr <= 0x7003F) 192*ad757b6aSThomas Gleixner return 1; 193*ad757b6aSThomas Gleixner return 0; 194*ad757b6aSThomas Gleixner } 195*ad757b6aSThomas Gleixner 196*ad757b6aSThomas Gleixner int page_is_ram(unsigned long pagenr) 197*ad757b6aSThomas Gleixner { 198*ad757b6aSThomas Gleixner int i; 199*ad757b6aSThomas Gleixner unsigned long addr, end; 200*ad757b6aSThomas Gleixner 201*ad757b6aSThomas Gleixner if (efi_enabled) { 202*ad757b6aSThomas Gleixner efi_memory_desc_t *md; 203*ad757b6aSThomas Gleixner void *p; 204*ad757b6aSThomas Gleixner 205*ad757b6aSThomas Gleixner for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 206*ad757b6aSThomas Gleixner md = p; 207*ad757b6aSThomas Gleixner if (!is_available_memory(md)) 208*ad757b6aSThomas Gleixner continue; 209*ad757b6aSThomas Gleixner addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; 210*ad757b6aSThomas Gleixner end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; 211*ad757b6aSThomas Gleixner 212*ad757b6aSThomas Gleixner if ((pagenr >= addr) && (pagenr < end)) 213*ad757b6aSThomas Gleixner return 1; 214*ad757b6aSThomas Gleixner } 215*ad757b6aSThomas Gleixner return 0; 216*ad757b6aSThomas Gleixner } 217*ad757b6aSThomas Gleixner 218*ad757b6aSThomas Gleixner for (i = 0; i < e820.nr_map; i++) { 219*ad757b6aSThomas Gleixner 220*ad757b6aSThomas Gleixner if (e820.map[i].type != E820_RAM) /* not usable memory */ 221*ad757b6aSThomas Gleixner continue; 222*ad757b6aSThomas Gleixner /* 223*ad757b6aSThomas Gleixner * !!!FIXME!!! Some BIOSen report areas as RAM that 224*ad757b6aSThomas Gleixner * are not. Notably the 640->1Mb area. We need a sanity 225*ad757b6aSThomas Gleixner * check here. 226*ad757b6aSThomas Gleixner */ 227*ad757b6aSThomas Gleixner addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; 228*ad757b6aSThomas Gleixner end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; 229*ad757b6aSThomas Gleixner if ((pagenr >= addr) && (pagenr < end)) 230*ad757b6aSThomas Gleixner return 1; 231*ad757b6aSThomas Gleixner } 232*ad757b6aSThomas Gleixner return 0; 233*ad757b6aSThomas Gleixner } 234*ad757b6aSThomas Gleixner 235*ad757b6aSThomas Gleixner #ifdef CONFIG_HIGHMEM 236*ad757b6aSThomas Gleixner pte_t *kmap_pte; 237*ad757b6aSThomas Gleixner pgprot_t kmap_prot; 238*ad757b6aSThomas Gleixner 239*ad757b6aSThomas Gleixner #define kmap_get_fixmap_pte(vaddr) \ 240*ad757b6aSThomas Gleixner pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) 241*ad757b6aSThomas Gleixner 242*ad757b6aSThomas Gleixner static void __init kmap_init(void) 243*ad757b6aSThomas Gleixner { 244*ad757b6aSThomas Gleixner unsigned long kmap_vstart; 245*ad757b6aSThomas Gleixner 246*ad757b6aSThomas Gleixner /* cache the first kmap pte */ 247*ad757b6aSThomas Gleixner kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); 248*ad757b6aSThomas Gleixner kmap_pte = kmap_get_fixmap_pte(kmap_vstart); 249*ad757b6aSThomas Gleixner 250*ad757b6aSThomas Gleixner kmap_prot = PAGE_KERNEL; 251*ad757b6aSThomas Gleixner } 252*ad757b6aSThomas Gleixner 253*ad757b6aSThomas Gleixner static void __init permanent_kmaps_init(pgd_t *pgd_base) 254*ad757b6aSThomas Gleixner { 255*ad757b6aSThomas Gleixner pgd_t *pgd; 256*ad757b6aSThomas Gleixner pud_t *pud; 257*ad757b6aSThomas Gleixner pmd_t *pmd; 258*ad757b6aSThomas Gleixner pte_t *pte; 259*ad757b6aSThomas Gleixner unsigned long vaddr; 260*ad757b6aSThomas Gleixner 261*ad757b6aSThomas Gleixner vaddr = PKMAP_BASE; 262*ad757b6aSThomas Gleixner page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); 263*ad757b6aSThomas Gleixner 264*ad757b6aSThomas Gleixner pgd = swapper_pg_dir + pgd_index(vaddr); 265*ad757b6aSThomas Gleixner pud = pud_offset(pgd, vaddr); 266*ad757b6aSThomas Gleixner pmd = pmd_offset(pud, vaddr); 267*ad757b6aSThomas Gleixner pte = pte_offset_kernel(pmd, vaddr); 268*ad757b6aSThomas Gleixner pkmap_page_table = pte; 269*ad757b6aSThomas Gleixner } 270*ad757b6aSThomas Gleixner 271*ad757b6aSThomas Gleixner static void __meminit free_new_highpage(struct page *page) 272*ad757b6aSThomas Gleixner { 273*ad757b6aSThomas Gleixner init_page_count(page); 274*ad757b6aSThomas Gleixner __free_page(page); 275*ad757b6aSThomas Gleixner totalhigh_pages++; 276*ad757b6aSThomas Gleixner } 277*ad757b6aSThomas Gleixner 278*ad757b6aSThomas Gleixner void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) 279*ad757b6aSThomas Gleixner { 280*ad757b6aSThomas Gleixner if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 281*ad757b6aSThomas Gleixner ClearPageReserved(page); 282*ad757b6aSThomas Gleixner free_new_highpage(page); 283*ad757b6aSThomas Gleixner } else 284*ad757b6aSThomas Gleixner SetPageReserved(page); 285*ad757b6aSThomas Gleixner } 286*ad757b6aSThomas Gleixner 287*ad757b6aSThomas Gleixner static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) 288*ad757b6aSThomas Gleixner { 289*ad757b6aSThomas Gleixner free_new_highpage(page); 290*ad757b6aSThomas Gleixner totalram_pages++; 291*ad757b6aSThomas Gleixner #ifdef CONFIG_FLATMEM 292*ad757b6aSThomas Gleixner max_mapnr = max(pfn, max_mapnr); 293*ad757b6aSThomas Gleixner #endif 294*ad757b6aSThomas Gleixner num_physpages++; 295*ad757b6aSThomas Gleixner return 0; 296*ad757b6aSThomas Gleixner } 297*ad757b6aSThomas Gleixner 298*ad757b6aSThomas Gleixner /* 299*ad757b6aSThomas Gleixner * Not currently handling the NUMA case. 300*ad757b6aSThomas Gleixner * Assuming single node and all memory that 301*ad757b6aSThomas Gleixner * has been added dynamically that would be 302*ad757b6aSThomas Gleixner * onlined here is in HIGHMEM 303*ad757b6aSThomas Gleixner */ 304*ad757b6aSThomas Gleixner void __meminit online_page(struct page *page) 305*ad757b6aSThomas Gleixner { 306*ad757b6aSThomas Gleixner ClearPageReserved(page); 307*ad757b6aSThomas Gleixner add_one_highpage_hotplug(page, page_to_pfn(page)); 308*ad757b6aSThomas Gleixner } 309*ad757b6aSThomas Gleixner 310*ad757b6aSThomas Gleixner 311*ad757b6aSThomas Gleixner #ifdef CONFIG_NUMA 312*ad757b6aSThomas Gleixner extern void set_highmem_pages_init(int); 313*ad757b6aSThomas Gleixner #else 314*ad757b6aSThomas Gleixner static void __init set_highmem_pages_init(int bad_ppro) 315*ad757b6aSThomas Gleixner { 316*ad757b6aSThomas Gleixner int pfn; 317*ad757b6aSThomas Gleixner for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) 318*ad757b6aSThomas Gleixner add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 319*ad757b6aSThomas Gleixner totalram_pages += totalhigh_pages; 320*ad757b6aSThomas Gleixner } 321*ad757b6aSThomas Gleixner #endif /* CONFIG_FLATMEM */ 322*ad757b6aSThomas Gleixner 323*ad757b6aSThomas Gleixner #else 324*ad757b6aSThomas Gleixner #define kmap_init() do { } while (0) 325*ad757b6aSThomas Gleixner #define permanent_kmaps_init(pgd_base) do { } while (0) 326*ad757b6aSThomas Gleixner #define set_highmem_pages_init(bad_ppro) do { } while (0) 327*ad757b6aSThomas Gleixner #endif /* CONFIG_HIGHMEM */ 328*ad757b6aSThomas Gleixner 329*ad757b6aSThomas Gleixner unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; 330*ad757b6aSThomas Gleixner EXPORT_SYMBOL(__PAGE_KERNEL); 331*ad757b6aSThomas Gleixner unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; 332*ad757b6aSThomas Gleixner 333*ad757b6aSThomas Gleixner #ifdef CONFIG_NUMA 334*ad757b6aSThomas Gleixner extern void __init remap_numa_kva(void); 335*ad757b6aSThomas Gleixner #else 336*ad757b6aSThomas Gleixner #define remap_numa_kva() do {} while (0) 337*ad757b6aSThomas Gleixner #endif 338*ad757b6aSThomas Gleixner 339*ad757b6aSThomas Gleixner void __init native_pagetable_setup_start(pgd_t *base) 340*ad757b6aSThomas Gleixner { 341*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 342*ad757b6aSThomas Gleixner int i; 343*ad757b6aSThomas Gleixner 344*ad757b6aSThomas Gleixner /* 345*ad757b6aSThomas Gleixner * Init entries of the first-level page table to the 346*ad757b6aSThomas Gleixner * zero page, if they haven't already been set up. 347*ad757b6aSThomas Gleixner * 348*ad757b6aSThomas Gleixner * In a normal native boot, we'll be running on a 349*ad757b6aSThomas Gleixner * pagetable rooted in swapper_pg_dir, but not in PAE 350*ad757b6aSThomas Gleixner * mode, so this will end up clobbering the mappings 351*ad757b6aSThomas Gleixner * for the lower 24Mbytes of the address space, 352*ad757b6aSThomas Gleixner * without affecting the kernel address space. 353*ad757b6aSThomas Gleixner */ 354*ad757b6aSThomas Gleixner for (i = 0; i < USER_PTRS_PER_PGD; i++) 355*ad757b6aSThomas Gleixner set_pgd(&base[i], 356*ad757b6aSThomas Gleixner __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 357*ad757b6aSThomas Gleixner 358*ad757b6aSThomas Gleixner /* Make sure kernel address space is empty so that a pagetable 359*ad757b6aSThomas Gleixner will be allocated for it. */ 360*ad757b6aSThomas Gleixner memset(&base[USER_PTRS_PER_PGD], 0, 361*ad757b6aSThomas Gleixner KERNEL_PGD_PTRS * sizeof(pgd_t)); 362*ad757b6aSThomas Gleixner #else 363*ad757b6aSThomas Gleixner paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); 364*ad757b6aSThomas Gleixner #endif 365*ad757b6aSThomas Gleixner } 366*ad757b6aSThomas Gleixner 367*ad757b6aSThomas Gleixner void __init native_pagetable_setup_done(pgd_t *base) 368*ad757b6aSThomas Gleixner { 369*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 370*ad757b6aSThomas Gleixner /* 371*ad757b6aSThomas Gleixner * Add low memory identity-mappings - SMP needs it when 372*ad757b6aSThomas Gleixner * starting up on an AP from real-mode. In the non-PAE 373*ad757b6aSThomas Gleixner * case we already have these mappings through head.S. 374*ad757b6aSThomas Gleixner * All user-space mappings are explicitly cleared after 375*ad757b6aSThomas Gleixner * SMP startup. 376*ad757b6aSThomas Gleixner */ 377*ad757b6aSThomas Gleixner set_pgd(&base[0], base[USER_PTRS_PER_PGD]); 378*ad757b6aSThomas Gleixner #endif 379*ad757b6aSThomas Gleixner } 380*ad757b6aSThomas Gleixner 381*ad757b6aSThomas Gleixner /* 382*ad757b6aSThomas Gleixner * Build a proper pagetable for the kernel mappings. Up until this 383*ad757b6aSThomas Gleixner * point, we've been running on some set of pagetables constructed by 384*ad757b6aSThomas Gleixner * the boot process. 385*ad757b6aSThomas Gleixner * 386*ad757b6aSThomas Gleixner * If we're booting on native hardware, this will be a pagetable 387*ad757b6aSThomas Gleixner * constructed in arch/i386/kernel/head.S, and not running in PAE mode 388*ad757b6aSThomas Gleixner * (even if we'll end up running in PAE). The root of the pagetable 389*ad757b6aSThomas Gleixner * will be swapper_pg_dir. 390*ad757b6aSThomas Gleixner * 391*ad757b6aSThomas Gleixner * If we're booting paravirtualized under a hypervisor, then there are 392*ad757b6aSThomas Gleixner * more options: we may already be running PAE, and the pagetable may 393*ad757b6aSThomas Gleixner * or may not be based in swapper_pg_dir. In any case, 394*ad757b6aSThomas Gleixner * paravirt_pagetable_setup_start() will set up swapper_pg_dir 395*ad757b6aSThomas Gleixner * appropriately for the rest of the initialization to work. 396*ad757b6aSThomas Gleixner * 397*ad757b6aSThomas Gleixner * In general, pagetable_init() assumes that the pagetable may already 398*ad757b6aSThomas Gleixner * be partially populated, and so it avoids stomping on any existing 399*ad757b6aSThomas Gleixner * mappings. 400*ad757b6aSThomas Gleixner */ 401*ad757b6aSThomas Gleixner static void __init pagetable_init (void) 402*ad757b6aSThomas Gleixner { 403*ad757b6aSThomas Gleixner unsigned long vaddr, end; 404*ad757b6aSThomas Gleixner pgd_t *pgd_base = swapper_pg_dir; 405*ad757b6aSThomas Gleixner 406*ad757b6aSThomas Gleixner paravirt_pagetable_setup_start(pgd_base); 407*ad757b6aSThomas Gleixner 408*ad757b6aSThomas Gleixner /* Enable PSE if available */ 409*ad757b6aSThomas Gleixner if (cpu_has_pse) 410*ad757b6aSThomas Gleixner set_in_cr4(X86_CR4_PSE); 411*ad757b6aSThomas Gleixner 412*ad757b6aSThomas Gleixner /* Enable PGE if available */ 413*ad757b6aSThomas Gleixner if (cpu_has_pge) { 414*ad757b6aSThomas Gleixner set_in_cr4(X86_CR4_PGE); 415*ad757b6aSThomas Gleixner __PAGE_KERNEL |= _PAGE_GLOBAL; 416*ad757b6aSThomas Gleixner __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; 417*ad757b6aSThomas Gleixner } 418*ad757b6aSThomas Gleixner 419*ad757b6aSThomas Gleixner kernel_physical_mapping_init(pgd_base); 420*ad757b6aSThomas Gleixner remap_numa_kva(); 421*ad757b6aSThomas Gleixner 422*ad757b6aSThomas Gleixner /* 423*ad757b6aSThomas Gleixner * Fixed mappings, only the page table structure has to be 424*ad757b6aSThomas Gleixner * created - mappings will be set by set_fixmap(): 425*ad757b6aSThomas Gleixner */ 426*ad757b6aSThomas Gleixner vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 427*ad757b6aSThomas Gleixner end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 428*ad757b6aSThomas Gleixner page_table_range_init(vaddr, end, pgd_base); 429*ad757b6aSThomas Gleixner 430*ad757b6aSThomas Gleixner permanent_kmaps_init(pgd_base); 431*ad757b6aSThomas Gleixner 432*ad757b6aSThomas Gleixner paravirt_pagetable_setup_done(pgd_base); 433*ad757b6aSThomas Gleixner } 434*ad757b6aSThomas Gleixner 435*ad757b6aSThomas Gleixner #if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI) 436*ad757b6aSThomas Gleixner /* 437*ad757b6aSThomas Gleixner * Swap suspend & friends need this for resume because things like the intel-agp 438*ad757b6aSThomas Gleixner * driver might have split up a kernel 4MB mapping. 439*ad757b6aSThomas Gleixner */ 440*ad757b6aSThomas Gleixner char __nosavedata swsusp_pg_dir[PAGE_SIZE] 441*ad757b6aSThomas Gleixner __attribute__ ((aligned (PAGE_SIZE))); 442*ad757b6aSThomas Gleixner 443*ad757b6aSThomas Gleixner static inline void save_pg_dir(void) 444*ad757b6aSThomas Gleixner { 445*ad757b6aSThomas Gleixner memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); 446*ad757b6aSThomas Gleixner } 447*ad757b6aSThomas Gleixner #else 448*ad757b6aSThomas Gleixner static inline void save_pg_dir(void) 449*ad757b6aSThomas Gleixner { 450*ad757b6aSThomas Gleixner } 451*ad757b6aSThomas Gleixner #endif 452*ad757b6aSThomas Gleixner 453*ad757b6aSThomas Gleixner void zap_low_mappings (void) 454*ad757b6aSThomas Gleixner { 455*ad757b6aSThomas Gleixner int i; 456*ad757b6aSThomas Gleixner 457*ad757b6aSThomas Gleixner save_pg_dir(); 458*ad757b6aSThomas Gleixner 459*ad757b6aSThomas Gleixner /* 460*ad757b6aSThomas Gleixner * Zap initial low-memory mappings. 461*ad757b6aSThomas Gleixner * 462*ad757b6aSThomas Gleixner * Note that "pgd_clear()" doesn't do it for 463*ad757b6aSThomas Gleixner * us, because pgd_clear() is a no-op on i386. 464*ad757b6aSThomas Gleixner */ 465*ad757b6aSThomas Gleixner for (i = 0; i < USER_PTRS_PER_PGD; i++) 466*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 467*ad757b6aSThomas Gleixner set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 468*ad757b6aSThomas Gleixner #else 469*ad757b6aSThomas Gleixner set_pgd(swapper_pg_dir+i, __pgd(0)); 470*ad757b6aSThomas Gleixner #endif 471*ad757b6aSThomas Gleixner flush_tlb_all(); 472*ad757b6aSThomas Gleixner } 473*ad757b6aSThomas Gleixner 474*ad757b6aSThomas Gleixner int nx_enabled = 0; 475*ad757b6aSThomas Gleixner 476*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 477*ad757b6aSThomas Gleixner 478*ad757b6aSThomas Gleixner static int disable_nx __initdata = 0; 479*ad757b6aSThomas Gleixner u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; 480*ad757b6aSThomas Gleixner EXPORT_SYMBOL_GPL(__supported_pte_mask); 481*ad757b6aSThomas Gleixner 482*ad757b6aSThomas Gleixner /* 483*ad757b6aSThomas Gleixner * noexec = on|off 484*ad757b6aSThomas Gleixner * 485*ad757b6aSThomas Gleixner * Control non executable mappings. 486*ad757b6aSThomas Gleixner * 487*ad757b6aSThomas Gleixner * on Enable 488*ad757b6aSThomas Gleixner * off Disable 489*ad757b6aSThomas Gleixner */ 490*ad757b6aSThomas Gleixner static int __init noexec_setup(char *str) 491*ad757b6aSThomas Gleixner { 492*ad757b6aSThomas Gleixner if (!str || !strcmp(str, "on")) { 493*ad757b6aSThomas Gleixner if (cpu_has_nx) { 494*ad757b6aSThomas Gleixner __supported_pte_mask |= _PAGE_NX; 495*ad757b6aSThomas Gleixner disable_nx = 0; 496*ad757b6aSThomas Gleixner } 497*ad757b6aSThomas Gleixner } else if (!strcmp(str,"off")) { 498*ad757b6aSThomas Gleixner disable_nx = 1; 499*ad757b6aSThomas Gleixner __supported_pte_mask &= ~_PAGE_NX; 500*ad757b6aSThomas Gleixner } else 501*ad757b6aSThomas Gleixner return -EINVAL; 502*ad757b6aSThomas Gleixner 503*ad757b6aSThomas Gleixner return 0; 504*ad757b6aSThomas Gleixner } 505*ad757b6aSThomas Gleixner early_param("noexec", noexec_setup); 506*ad757b6aSThomas Gleixner 507*ad757b6aSThomas Gleixner static void __init set_nx(void) 508*ad757b6aSThomas Gleixner { 509*ad757b6aSThomas Gleixner unsigned int v[4], l, h; 510*ad757b6aSThomas Gleixner 511*ad757b6aSThomas Gleixner if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { 512*ad757b6aSThomas Gleixner cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); 513*ad757b6aSThomas Gleixner if ((v[3] & (1 << 20)) && !disable_nx) { 514*ad757b6aSThomas Gleixner rdmsr(MSR_EFER, l, h); 515*ad757b6aSThomas Gleixner l |= EFER_NX; 516*ad757b6aSThomas Gleixner wrmsr(MSR_EFER, l, h); 517*ad757b6aSThomas Gleixner nx_enabled = 1; 518*ad757b6aSThomas Gleixner __supported_pte_mask |= _PAGE_NX; 519*ad757b6aSThomas Gleixner } 520*ad757b6aSThomas Gleixner } 521*ad757b6aSThomas Gleixner } 522*ad757b6aSThomas Gleixner 523*ad757b6aSThomas Gleixner /* 524*ad757b6aSThomas Gleixner * Enables/disables executability of a given kernel page and 525*ad757b6aSThomas Gleixner * returns the previous setting. 526*ad757b6aSThomas Gleixner */ 527*ad757b6aSThomas Gleixner int __init set_kernel_exec(unsigned long vaddr, int enable) 528*ad757b6aSThomas Gleixner { 529*ad757b6aSThomas Gleixner pte_t *pte; 530*ad757b6aSThomas Gleixner int ret = 1; 531*ad757b6aSThomas Gleixner 532*ad757b6aSThomas Gleixner if (!nx_enabled) 533*ad757b6aSThomas Gleixner goto out; 534*ad757b6aSThomas Gleixner 535*ad757b6aSThomas Gleixner pte = lookup_address(vaddr); 536*ad757b6aSThomas Gleixner BUG_ON(!pte); 537*ad757b6aSThomas Gleixner 538*ad757b6aSThomas Gleixner if (!pte_exec_kernel(*pte)) 539*ad757b6aSThomas Gleixner ret = 0; 540*ad757b6aSThomas Gleixner 541*ad757b6aSThomas Gleixner if (enable) 542*ad757b6aSThomas Gleixner pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); 543*ad757b6aSThomas Gleixner else 544*ad757b6aSThomas Gleixner pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); 545*ad757b6aSThomas Gleixner pte_update_defer(&init_mm, vaddr, pte); 546*ad757b6aSThomas Gleixner __flush_tlb_all(); 547*ad757b6aSThomas Gleixner out: 548*ad757b6aSThomas Gleixner return ret; 549*ad757b6aSThomas Gleixner } 550*ad757b6aSThomas Gleixner 551*ad757b6aSThomas Gleixner #endif 552*ad757b6aSThomas Gleixner 553*ad757b6aSThomas Gleixner /* 554*ad757b6aSThomas Gleixner * paging_init() sets up the page tables - note that the first 8MB are 555*ad757b6aSThomas Gleixner * already mapped by head.S. 556*ad757b6aSThomas Gleixner * 557*ad757b6aSThomas Gleixner * This routines also unmaps the page at virtual kernel address 0, so 558*ad757b6aSThomas Gleixner * that we can trap those pesky NULL-reference errors in the kernel. 559*ad757b6aSThomas Gleixner */ 560*ad757b6aSThomas Gleixner void __init paging_init(void) 561*ad757b6aSThomas Gleixner { 562*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 563*ad757b6aSThomas Gleixner set_nx(); 564*ad757b6aSThomas Gleixner if (nx_enabled) 565*ad757b6aSThomas Gleixner printk("NX (Execute Disable) protection: active\n"); 566*ad757b6aSThomas Gleixner #endif 567*ad757b6aSThomas Gleixner 568*ad757b6aSThomas Gleixner pagetable_init(); 569*ad757b6aSThomas Gleixner 570*ad757b6aSThomas Gleixner load_cr3(swapper_pg_dir); 571*ad757b6aSThomas Gleixner 572*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 573*ad757b6aSThomas Gleixner /* 574*ad757b6aSThomas Gleixner * We will bail out later - printk doesn't work right now so 575*ad757b6aSThomas Gleixner * the user would just see a hanging kernel. 576*ad757b6aSThomas Gleixner */ 577*ad757b6aSThomas Gleixner if (cpu_has_pae) 578*ad757b6aSThomas Gleixner set_in_cr4(X86_CR4_PAE); 579*ad757b6aSThomas Gleixner #endif 580*ad757b6aSThomas Gleixner __flush_tlb_all(); 581*ad757b6aSThomas Gleixner 582*ad757b6aSThomas Gleixner kmap_init(); 583*ad757b6aSThomas Gleixner } 584*ad757b6aSThomas Gleixner 585*ad757b6aSThomas Gleixner /* 586*ad757b6aSThomas Gleixner * Test if the WP bit works in supervisor mode. It isn't supported on 386's 587*ad757b6aSThomas Gleixner * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This 588*ad757b6aSThomas Gleixner * used to involve black magic jumps to work around some nasty CPU bugs, 589*ad757b6aSThomas Gleixner * but fortunately the switch to using exceptions got rid of all that. 590*ad757b6aSThomas Gleixner */ 591*ad757b6aSThomas Gleixner 592*ad757b6aSThomas Gleixner static void __init test_wp_bit(void) 593*ad757b6aSThomas Gleixner { 594*ad757b6aSThomas Gleixner printk("Checking if this processor honours the WP bit even in supervisor mode... "); 595*ad757b6aSThomas Gleixner 596*ad757b6aSThomas Gleixner /* Any page-aligned address will do, the test is non-destructive */ 597*ad757b6aSThomas Gleixner __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); 598*ad757b6aSThomas Gleixner boot_cpu_data.wp_works_ok = do_test_wp_bit(); 599*ad757b6aSThomas Gleixner clear_fixmap(FIX_WP_TEST); 600*ad757b6aSThomas Gleixner 601*ad757b6aSThomas Gleixner if (!boot_cpu_data.wp_works_ok) { 602*ad757b6aSThomas Gleixner printk("No.\n"); 603*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_WP_WORKS_OK 604*ad757b6aSThomas Gleixner panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); 605*ad757b6aSThomas Gleixner #endif 606*ad757b6aSThomas Gleixner } else { 607*ad757b6aSThomas Gleixner printk("Ok.\n"); 608*ad757b6aSThomas Gleixner } 609*ad757b6aSThomas Gleixner } 610*ad757b6aSThomas Gleixner 611*ad757b6aSThomas Gleixner static struct kcore_list kcore_mem, kcore_vmalloc; 612*ad757b6aSThomas Gleixner 613*ad757b6aSThomas Gleixner void __init mem_init(void) 614*ad757b6aSThomas Gleixner { 615*ad757b6aSThomas Gleixner extern int ppro_with_ram_bug(void); 616*ad757b6aSThomas Gleixner int codesize, reservedpages, datasize, initsize; 617*ad757b6aSThomas Gleixner int tmp; 618*ad757b6aSThomas Gleixner int bad_ppro; 619*ad757b6aSThomas Gleixner 620*ad757b6aSThomas Gleixner #ifdef CONFIG_FLATMEM 621*ad757b6aSThomas Gleixner BUG_ON(!mem_map); 622*ad757b6aSThomas Gleixner #endif 623*ad757b6aSThomas Gleixner 624*ad757b6aSThomas Gleixner bad_ppro = ppro_with_ram_bug(); 625*ad757b6aSThomas Gleixner 626*ad757b6aSThomas Gleixner #ifdef CONFIG_HIGHMEM 627*ad757b6aSThomas Gleixner /* check that fixmap and pkmap do not overlap */ 628*ad757b6aSThomas Gleixner if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { 629*ad757b6aSThomas Gleixner printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); 630*ad757b6aSThomas Gleixner printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", 631*ad757b6aSThomas Gleixner PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); 632*ad757b6aSThomas Gleixner BUG(); 633*ad757b6aSThomas Gleixner } 634*ad757b6aSThomas Gleixner #endif 635*ad757b6aSThomas Gleixner 636*ad757b6aSThomas Gleixner /* this will put all low memory onto the freelists */ 637*ad757b6aSThomas Gleixner totalram_pages += free_all_bootmem(); 638*ad757b6aSThomas Gleixner 639*ad757b6aSThomas Gleixner reservedpages = 0; 640*ad757b6aSThomas Gleixner for (tmp = 0; tmp < max_low_pfn; tmp++) 641*ad757b6aSThomas Gleixner /* 642*ad757b6aSThomas Gleixner * Only count reserved RAM pages 643*ad757b6aSThomas Gleixner */ 644*ad757b6aSThomas Gleixner if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 645*ad757b6aSThomas Gleixner reservedpages++; 646*ad757b6aSThomas Gleixner 647*ad757b6aSThomas Gleixner set_highmem_pages_init(bad_ppro); 648*ad757b6aSThomas Gleixner 649*ad757b6aSThomas Gleixner codesize = (unsigned long) &_etext - (unsigned long) &_text; 650*ad757b6aSThomas Gleixner datasize = (unsigned long) &_edata - (unsigned long) &_etext; 651*ad757b6aSThomas Gleixner initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 652*ad757b6aSThomas Gleixner 653*ad757b6aSThomas Gleixner kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 654*ad757b6aSThomas Gleixner kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 655*ad757b6aSThomas Gleixner VMALLOC_END-VMALLOC_START); 656*ad757b6aSThomas Gleixner 657*ad757b6aSThomas Gleixner printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", 658*ad757b6aSThomas Gleixner (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 659*ad757b6aSThomas Gleixner num_physpages << (PAGE_SHIFT-10), 660*ad757b6aSThomas Gleixner codesize >> 10, 661*ad757b6aSThomas Gleixner reservedpages << (PAGE_SHIFT-10), 662*ad757b6aSThomas Gleixner datasize >> 10, 663*ad757b6aSThomas Gleixner initsize >> 10, 664*ad757b6aSThomas Gleixner (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 665*ad757b6aSThomas Gleixner ); 666*ad757b6aSThomas Gleixner 667*ad757b6aSThomas Gleixner #if 1 /* double-sanity-check paranoia */ 668*ad757b6aSThomas Gleixner printk("virtual kernel memory layout:\n" 669*ad757b6aSThomas Gleixner " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 670*ad757b6aSThomas Gleixner #ifdef CONFIG_HIGHMEM 671*ad757b6aSThomas Gleixner " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 672*ad757b6aSThomas Gleixner #endif 673*ad757b6aSThomas Gleixner " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" 674*ad757b6aSThomas Gleixner " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" 675*ad757b6aSThomas Gleixner " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" 676*ad757b6aSThomas Gleixner " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" 677*ad757b6aSThomas Gleixner " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", 678*ad757b6aSThomas Gleixner FIXADDR_START, FIXADDR_TOP, 679*ad757b6aSThomas Gleixner (FIXADDR_TOP - FIXADDR_START) >> 10, 680*ad757b6aSThomas Gleixner 681*ad757b6aSThomas Gleixner #ifdef CONFIG_HIGHMEM 682*ad757b6aSThomas Gleixner PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 683*ad757b6aSThomas Gleixner (LAST_PKMAP*PAGE_SIZE) >> 10, 684*ad757b6aSThomas Gleixner #endif 685*ad757b6aSThomas Gleixner 686*ad757b6aSThomas Gleixner VMALLOC_START, VMALLOC_END, 687*ad757b6aSThomas Gleixner (VMALLOC_END - VMALLOC_START) >> 20, 688*ad757b6aSThomas Gleixner 689*ad757b6aSThomas Gleixner (unsigned long)__va(0), (unsigned long)high_memory, 690*ad757b6aSThomas Gleixner ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, 691*ad757b6aSThomas Gleixner 692*ad757b6aSThomas Gleixner (unsigned long)&__init_begin, (unsigned long)&__init_end, 693*ad757b6aSThomas Gleixner ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, 694*ad757b6aSThomas Gleixner 695*ad757b6aSThomas Gleixner (unsigned long)&_etext, (unsigned long)&_edata, 696*ad757b6aSThomas Gleixner ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, 697*ad757b6aSThomas Gleixner 698*ad757b6aSThomas Gleixner (unsigned long)&_text, (unsigned long)&_etext, 699*ad757b6aSThomas Gleixner ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 700*ad757b6aSThomas Gleixner 701*ad757b6aSThomas Gleixner #ifdef CONFIG_HIGHMEM 702*ad757b6aSThomas Gleixner BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 703*ad757b6aSThomas Gleixner BUG_ON(VMALLOC_END > PKMAP_BASE); 704*ad757b6aSThomas Gleixner #endif 705*ad757b6aSThomas Gleixner BUG_ON(VMALLOC_START > VMALLOC_END); 706*ad757b6aSThomas Gleixner BUG_ON((unsigned long)high_memory > VMALLOC_START); 707*ad757b6aSThomas Gleixner #endif /* double-sanity-check paranoia */ 708*ad757b6aSThomas Gleixner 709*ad757b6aSThomas Gleixner #ifdef CONFIG_X86_PAE 710*ad757b6aSThomas Gleixner if (!cpu_has_pae) 711*ad757b6aSThomas Gleixner panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); 712*ad757b6aSThomas Gleixner #endif 713*ad757b6aSThomas Gleixner if (boot_cpu_data.wp_works_ok < 0) 714*ad757b6aSThomas Gleixner test_wp_bit(); 715*ad757b6aSThomas Gleixner 716*ad757b6aSThomas Gleixner /* 717*ad757b6aSThomas Gleixner * Subtle. SMP is doing it's boot stuff late (because it has to 718*ad757b6aSThomas Gleixner * fork idle threads) - but it also needs low mappings for the 719*ad757b6aSThomas Gleixner * protected-mode entry to work. We zap these entries only after 720*ad757b6aSThomas Gleixner * the WP-bit has been tested. 721*ad757b6aSThomas Gleixner */ 722*ad757b6aSThomas Gleixner #ifndef CONFIG_SMP 723*ad757b6aSThomas Gleixner zap_low_mappings(); 724*ad757b6aSThomas Gleixner #endif 725*ad757b6aSThomas Gleixner } 726*ad757b6aSThomas Gleixner 727*ad757b6aSThomas Gleixner #ifdef CONFIG_MEMORY_HOTPLUG 728*ad757b6aSThomas Gleixner int arch_add_memory(int nid, u64 start, u64 size) 729*ad757b6aSThomas Gleixner { 730*ad757b6aSThomas Gleixner struct pglist_data *pgdata = NODE_DATA(nid); 731*ad757b6aSThomas Gleixner struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; 732*ad757b6aSThomas Gleixner unsigned long start_pfn = start >> PAGE_SHIFT; 733*ad757b6aSThomas Gleixner unsigned long nr_pages = size >> PAGE_SHIFT; 734*ad757b6aSThomas Gleixner 735*ad757b6aSThomas Gleixner return __add_pages(zone, start_pfn, nr_pages); 736*ad757b6aSThomas Gleixner } 737*ad757b6aSThomas Gleixner 738*ad757b6aSThomas Gleixner int remove_memory(u64 start, u64 size) 739*ad757b6aSThomas Gleixner { 740*ad757b6aSThomas Gleixner return -EINVAL; 741*ad757b6aSThomas Gleixner } 742*ad757b6aSThomas Gleixner EXPORT_SYMBOL_GPL(remove_memory); 743*ad757b6aSThomas Gleixner #endif 744*ad757b6aSThomas Gleixner 745*ad757b6aSThomas Gleixner struct kmem_cache *pmd_cache; 746*ad757b6aSThomas Gleixner 747*ad757b6aSThomas Gleixner void __init pgtable_cache_init(void) 748*ad757b6aSThomas Gleixner { 749*ad757b6aSThomas Gleixner size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); 750*ad757b6aSThomas Gleixner 751*ad757b6aSThomas Gleixner if (PTRS_PER_PMD > 1) { 752*ad757b6aSThomas Gleixner pmd_cache = kmem_cache_create("pmd", 753*ad757b6aSThomas Gleixner PTRS_PER_PMD*sizeof(pmd_t), 754*ad757b6aSThomas Gleixner PTRS_PER_PMD*sizeof(pmd_t), 755*ad757b6aSThomas Gleixner SLAB_PANIC, 756*ad757b6aSThomas Gleixner pmd_ctor); 757*ad757b6aSThomas Gleixner if (!SHARED_KERNEL_PMD) { 758*ad757b6aSThomas Gleixner /* If we're in PAE mode and have a non-shared 759*ad757b6aSThomas Gleixner kernel pmd, then the pgd size must be a 760*ad757b6aSThomas Gleixner page size. This is because the pgd_list 761*ad757b6aSThomas Gleixner links through the page structure, so there 762*ad757b6aSThomas Gleixner can only be one pgd per page for this to 763*ad757b6aSThomas Gleixner work. */ 764*ad757b6aSThomas Gleixner pgd_size = PAGE_SIZE; 765*ad757b6aSThomas Gleixner } 766*ad757b6aSThomas Gleixner } 767*ad757b6aSThomas Gleixner } 768*ad757b6aSThomas Gleixner 769*ad757b6aSThomas Gleixner /* 770*ad757b6aSThomas Gleixner * This function cannot be __init, since exceptions don't work in that 771*ad757b6aSThomas Gleixner * section. Put this after the callers, so that it cannot be inlined. 772*ad757b6aSThomas Gleixner */ 773*ad757b6aSThomas Gleixner static int noinline do_test_wp_bit(void) 774*ad757b6aSThomas Gleixner { 775*ad757b6aSThomas Gleixner char tmp_reg; 776*ad757b6aSThomas Gleixner int flag; 777*ad757b6aSThomas Gleixner 778*ad757b6aSThomas Gleixner __asm__ __volatile__( 779*ad757b6aSThomas Gleixner " movb %0,%1 \n" 780*ad757b6aSThomas Gleixner "1: movb %1,%0 \n" 781*ad757b6aSThomas Gleixner " xorl %2,%2 \n" 782*ad757b6aSThomas Gleixner "2: \n" 783*ad757b6aSThomas Gleixner ".section __ex_table,\"a\"\n" 784*ad757b6aSThomas Gleixner " .align 4 \n" 785*ad757b6aSThomas Gleixner " .long 1b,2b \n" 786*ad757b6aSThomas Gleixner ".previous \n" 787*ad757b6aSThomas Gleixner :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), 788*ad757b6aSThomas Gleixner "=q" (tmp_reg), 789*ad757b6aSThomas Gleixner "=r" (flag) 790*ad757b6aSThomas Gleixner :"2" (1) 791*ad757b6aSThomas Gleixner :"memory"); 792*ad757b6aSThomas Gleixner 793*ad757b6aSThomas Gleixner return flag; 794*ad757b6aSThomas Gleixner } 795*ad757b6aSThomas Gleixner 796*ad757b6aSThomas Gleixner #ifdef CONFIG_DEBUG_RODATA 797*ad757b6aSThomas Gleixner 798*ad757b6aSThomas Gleixner void mark_rodata_ro(void) 799*ad757b6aSThomas Gleixner { 800*ad757b6aSThomas Gleixner unsigned long start = PFN_ALIGN(_text); 801*ad757b6aSThomas Gleixner unsigned long size = PFN_ALIGN(_etext) - start; 802*ad757b6aSThomas Gleixner 803*ad757b6aSThomas Gleixner #ifndef CONFIG_KPROBES 804*ad757b6aSThomas Gleixner #ifdef CONFIG_HOTPLUG_CPU 805*ad757b6aSThomas Gleixner /* It must still be possible to apply SMP alternatives. */ 806*ad757b6aSThomas Gleixner if (num_possible_cpus() <= 1) 807*ad757b6aSThomas Gleixner #endif 808*ad757b6aSThomas Gleixner { 809*ad757b6aSThomas Gleixner change_page_attr(virt_to_page(start), 810*ad757b6aSThomas Gleixner size >> PAGE_SHIFT, PAGE_KERNEL_RX); 811*ad757b6aSThomas Gleixner printk("Write protecting the kernel text: %luk\n", size >> 10); 812*ad757b6aSThomas Gleixner } 813*ad757b6aSThomas Gleixner #endif 814*ad757b6aSThomas Gleixner start += size; 815*ad757b6aSThomas Gleixner size = (unsigned long)__end_rodata - start; 816*ad757b6aSThomas Gleixner change_page_attr(virt_to_page(start), 817*ad757b6aSThomas Gleixner size >> PAGE_SHIFT, PAGE_KERNEL_RO); 818*ad757b6aSThomas Gleixner printk("Write protecting the kernel read-only data: %luk\n", 819*ad757b6aSThomas Gleixner size >> 10); 820*ad757b6aSThomas Gleixner 821*ad757b6aSThomas Gleixner /* 822*ad757b6aSThomas Gleixner * change_page_attr() requires a global_flush_tlb() call after it. 823*ad757b6aSThomas Gleixner * We do this after the printk so that if something went wrong in the 824*ad757b6aSThomas Gleixner * change, the printk gets out at least to give a better debug hint 825*ad757b6aSThomas Gleixner * of who is the culprit. 826*ad757b6aSThomas Gleixner */ 827*ad757b6aSThomas Gleixner global_flush_tlb(); 828*ad757b6aSThomas Gleixner } 829*ad757b6aSThomas Gleixner #endif 830*ad757b6aSThomas Gleixner 831*ad757b6aSThomas Gleixner void free_init_pages(char *what, unsigned long begin, unsigned long end) 832*ad757b6aSThomas Gleixner { 833*ad757b6aSThomas Gleixner unsigned long addr; 834*ad757b6aSThomas Gleixner 835*ad757b6aSThomas Gleixner for (addr = begin; addr < end; addr += PAGE_SIZE) { 836*ad757b6aSThomas Gleixner ClearPageReserved(virt_to_page(addr)); 837*ad757b6aSThomas Gleixner init_page_count(virt_to_page(addr)); 838*ad757b6aSThomas Gleixner memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); 839*ad757b6aSThomas Gleixner free_page(addr); 840*ad757b6aSThomas Gleixner totalram_pages++; 841*ad757b6aSThomas Gleixner } 842*ad757b6aSThomas Gleixner printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 843*ad757b6aSThomas Gleixner } 844*ad757b6aSThomas Gleixner 845*ad757b6aSThomas Gleixner void free_initmem(void) 846*ad757b6aSThomas Gleixner { 847*ad757b6aSThomas Gleixner free_init_pages("unused kernel memory", 848*ad757b6aSThomas Gleixner (unsigned long)(&__init_begin), 849*ad757b6aSThomas Gleixner (unsigned long)(&__init_end)); 850*ad757b6aSThomas Gleixner } 851*ad757b6aSThomas Gleixner 852*ad757b6aSThomas Gleixner #ifdef CONFIG_BLK_DEV_INITRD 853*ad757b6aSThomas Gleixner void free_initrd_mem(unsigned long start, unsigned long end) 854*ad757b6aSThomas Gleixner { 855*ad757b6aSThomas Gleixner free_init_pages("initrd memory", start, end); 856*ad757b6aSThomas Gleixner } 857*ad757b6aSThomas Gleixner #endif 858*ad757b6aSThomas Gleixner 859