1 /* 2 * linux/arch/i386/mm/init.c 3 * 4 * Copyright (C) 1995 Linus Torvalds 5 * 6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 7 */ 8 9 #include <linux/module.h> 10 #include <linux/signal.h> 11 #include <linux/sched.h> 12 #include <linux/kernel.h> 13 #include <linux/errno.h> 14 #include <linux/string.h> 15 #include <linux/types.h> 16 #include <linux/ptrace.h> 17 #include <linux/mman.h> 18 #include <linux/mm.h> 19 #include <linux/hugetlb.h> 20 #include <linux/swap.h> 21 #include <linux/smp.h> 22 #include <linux/init.h> 23 #include <linux/highmem.h> 24 #include <linux/pagemap.h> 25 #include <linux/pfn.h> 26 #include <linux/poison.h> 27 #include <linux/bootmem.h> 28 #include <linux/slab.h> 29 #include <linux/proc_fs.h> 30 #include <linux/efi.h> 31 #include <linux/memory_hotplug.h> 32 #include <linux/initrd.h> 33 #include <linux/cpumask.h> 34 35 #include <asm/processor.h> 36 #include <asm/system.h> 37 #include <asm/uaccess.h> 38 #include <asm/pgtable.h> 39 #include <asm/dma.h> 40 #include <asm/fixmap.h> 41 #include <asm/e820.h> 42 #include <asm/apic.h> 43 #include <asm/tlb.h> 44 #include <asm/tlbflush.h> 45 #include <asm/sections.h> 46 #include <asm/paravirt.h> 47 48 unsigned int __VMALLOC_RESERVE = 128 << 20; 49 50 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 51 unsigned long highstart_pfn, highend_pfn; 52 53 static int noinline do_test_wp_bit(void); 54 55 /* 56 * Creates a middle page table and puts a pointer to it in the 57 * given global directory entry. This only returns the gd entry 58 * in non-PAE compilation mode, since the middle layer is folded. 59 */ 60 static pmd_t * __init one_md_table_init(pgd_t *pgd) 61 { 62 pud_t *pud; 63 pmd_t *pmd_table; 64 65 #ifdef CONFIG_X86_PAE 66 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 67 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 68 69 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); 70 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 71 pud = pud_offset(pgd, 0); 72 if (pmd_table != pmd_offset(pud, 0)) 73 BUG(); 74 } 75 #endif 76 pud = pud_offset(pgd, 0); 77 pmd_table = pmd_offset(pud, 0); 78 return pmd_table; 79 } 80 81 /* 82 * Create a page table and place a pointer to it in a middle page 83 * directory entry. 84 */ 85 static pte_t * __init one_page_table_init(pmd_t *pmd) 86 { 87 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 88 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 89 90 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); 91 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 92 BUG_ON(page_table != pte_offset_kernel(pmd, 0)); 93 } 94 95 return pte_offset_kernel(pmd, 0); 96 } 97 98 /* 99 * This function initializes a certain range of kernel virtual memory 100 * with new bootmem page tables, everywhere page tables are missing in 101 * the given range. 102 */ 103 104 /* 105 * NOTE: The pagetables are allocated contiguous on the physical space 106 * so we can cache the place of the first one and move around without 107 * checking the pgd every time. 108 */ 109 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 110 { 111 pgd_t *pgd; 112 pmd_t *pmd; 113 int pgd_idx, pmd_idx; 114 unsigned long vaddr; 115 116 vaddr = start; 117 pgd_idx = pgd_index(vaddr); 118 pmd_idx = pmd_index(vaddr); 119 pgd = pgd_base + pgd_idx; 120 121 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { 122 pmd = one_md_table_init(pgd); 123 pmd = pmd + pmd_index(vaddr); 124 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { 125 one_page_table_init(pmd); 126 127 vaddr += PMD_SIZE; 128 } 129 pmd_idx = 0; 130 } 131 } 132 133 static inline int is_kernel_text(unsigned long addr) 134 { 135 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 136 return 1; 137 return 0; 138 } 139 140 /* 141 * This maps the physical memory to kernel virtual address space, a total 142 * of max_low_pfn pages, by creating page tables starting from address 143 * PAGE_OFFSET. 144 */ 145 static void __init kernel_physical_mapping_init(pgd_t *pgd_base) 146 { 147 unsigned long pfn; 148 pgd_t *pgd; 149 pmd_t *pmd; 150 pte_t *pte; 151 int pgd_idx, pmd_idx, pte_ofs; 152 153 pgd_idx = pgd_index(PAGE_OFFSET); 154 pgd = pgd_base + pgd_idx; 155 pfn = 0; 156 157 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { 158 pmd = one_md_table_init(pgd); 159 if (pfn >= max_low_pfn) 160 continue; 161 for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { 162 unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; 163 164 /* Map with big pages if possible, otherwise create normal page tables. */ 165 if (cpu_has_pse) { 166 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; 167 if (is_kernel_text(address) || is_kernel_text(address2)) 168 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); 169 else 170 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); 171 172 pfn += PTRS_PER_PTE; 173 } else { 174 pte = one_page_table_init(pmd); 175 176 for (pte_ofs = 0; 177 pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; 178 pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { 179 if (is_kernel_text(address)) 180 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 181 else 182 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); 183 } 184 } 185 } 186 } 187 } 188 189 static inline int page_kills_ppro(unsigned long pagenr) 190 { 191 if (pagenr >= 0x70000 && pagenr <= 0x7003F) 192 return 1; 193 return 0; 194 } 195 196 int page_is_ram(unsigned long pagenr) 197 { 198 int i; 199 unsigned long addr, end; 200 201 if (efi_enabled) { 202 efi_memory_desc_t *md; 203 void *p; 204 205 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 206 md = p; 207 if (!is_available_memory(md)) 208 continue; 209 addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; 210 end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; 211 212 if ((pagenr >= addr) && (pagenr < end)) 213 return 1; 214 } 215 return 0; 216 } 217 218 for (i = 0; i < e820.nr_map; i++) { 219 220 if (e820.map[i].type != E820_RAM) /* not usable memory */ 221 continue; 222 /* 223 * !!!FIXME!!! Some BIOSen report areas as RAM that 224 * are not. Notably the 640->1Mb area. We need a sanity 225 * check here. 226 */ 227 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; 228 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; 229 if ((pagenr >= addr) && (pagenr < end)) 230 return 1; 231 } 232 return 0; 233 } 234 235 #ifdef CONFIG_HIGHMEM 236 pte_t *kmap_pte; 237 pgprot_t kmap_prot; 238 239 #define kmap_get_fixmap_pte(vaddr) \ 240 pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) 241 242 static void __init kmap_init(void) 243 { 244 unsigned long kmap_vstart; 245 246 /* cache the first kmap pte */ 247 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); 248 kmap_pte = kmap_get_fixmap_pte(kmap_vstart); 249 250 kmap_prot = PAGE_KERNEL; 251 } 252 253 static void __init permanent_kmaps_init(pgd_t *pgd_base) 254 { 255 pgd_t *pgd; 256 pud_t *pud; 257 pmd_t *pmd; 258 pte_t *pte; 259 unsigned long vaddr; 260 261 vaddr = PKMAP_BASE; 262 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); 263 264 pgd = swapper_pg_dir + pgd_index(vaddr); 265 pud = pud_offset(pgd, vaddr); 266 pmd = pmd_offset(pud, vaddr); 267 pte = pte_offset_kernel(pmd, vaddr); 268 pkmap_page_table = pte; 269 } 270 271 static void __meminit free_new_highpage(struct page *page) 272 { 273 init_page_count(page); 274 __free_page(page); 275 totalhigh_pages++; 276 } 277 278 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) 279 { 280 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 281 ClearPageReserved(page); 282 free_new_highpage(page); 283 } else 284 SetPageReserved(page); 285 } 286 287 static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) 288 { 289 free_new_highpage(page); 290 totalram_pages++; 291 #ifdef CONFIG_FLATMEM 292 max_mapnr = max(pfn, max_mapnr); 293 #endif 294 num_physpages++; 295 return 0; 296 } 297 298 /* 299 * Not currently handling the NUMA case. 300 * Assuming single node and all memory that 301 * has been added dynamically that would be 302 * onlined here is in HIGHMEM 303 */ 304 void __meminit online_page(struct page *page) 305 { 306 ClearPageReserved(page); 307 add_one_highpage_hotplug(page, page_to_pfn(page)); 308 } 309 310 311 #ifdef CONFIG_NUMA 312 extern void set_highmem_pages_init(int); 313 #else 314 static void __init set_highmem_pages_init(int bad_ppro) 315 { 316 int pfn; 317 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) 318 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 319 totalram_pages += totalhigh_pages; 320 } 321 #endif /* CONFIG_FLATMEM */ 322 323 #else 324 #define kmap_init() do { } while (0) 325 #define permanent_kmaps_init(pgd_base) do { } while (0) 326 #define set_highmem_pages_init(bad_ppro) do { } while (0) 327 #endif /* CONFIG_HIGHMEM */ 328 329 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; 330 EXPORT_SYMBOL(__PAGE_KERNEL); 331 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; 332 333 #ifdef CONFIG_NUMA 334 extern void __init remap_numa_kva(void); 335 #else 336 #define remap_numa_kva() do {} while (0) 337 #endif 338 339 void __init native_pagetable_setup_start(pgd_t *base) 340 { 341 #ifdef CONFIG_X86_PAE 342 int i; 343 344 /* 345 * Init entries of the first-level page table to the 346 * zero page, if they haven't already been set up. 347 * 348 * In a normal native boot, we'll be running on a 349 * pagetable rooted in swapper_pg_dir, but not in PAE 350 * mode, so this will end up clobbering the mappings 351 * for the lower 24Mbytes of the address space, 352 * without affecting the kernel address space. 353 */ 354 for (i = 0; i < USER_PTRS_PER_PGD; i++) 355 set_pgd(&base[i], 356 __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 357 358 /* Make sure kernel address space is empty so that a pagetable 359 will be allocated for it. */ 360 memset(&base[USER_PTRS_PER_PGD], 0, 361 KERNEL_PGD_PTRS * sizeof(pgd_t)); 362 #else 363 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); 364 #endif 365 } 366 367 void __init native_pagetable_setup_done(pgd_t *base) 368 { 369 #ifdef CONFIG_X86_PAE 370 /* 371 * Add low memory identity-mappings - SMP needs it when 372 * starting up on an AP from real-mode. In the non-PAE 373 * case we already have these mappings through head.S. 374 * All user-space mappings are explicitly cleared after 375 * SMP startup. 376 */ 377 set_pgd(&base[0], base[USER_PTRS_PER_PGD]); 378 #endif 379 } 380 381 /* 382 * Build a proper pagetable for the kernel mappings. Up until this 383 * point, we've been running on some set of pagetables constructed by 384 * the boot process. 385 * 386 * If we're booting on native hardware, this will be a pagetable 387 * constructed in arch/i386/kernel/head.S, and not running in PAE mode 388 * (even if we'll end up running in PAE). The root of the pagetable 389 * will be swapper_pg_dir. 390 * 391 * If we're booting paravirtualized under a hypervisor, then there are 392 * more options: we may already be running PAE, and the pagetable may 393 * or may not be based in swapper_pg_dir. In any case, 394 * paravirt_pagetable_setup_start() will set up swapper_pg_dir 395 * appropriately for the rest of the initialization to work. 396 * 397 * In general, pagetable_init() assumes that the pagetable may already 398 * be partially populated, and so it avoids stomping on any existing 399 * mappings. 400 */ 401 static void __init pagetable_init (void) 402 { 403 unsigned long vaddr, end; 404 pgd_t *pgd_base = swapper_pg_dir; 405 406 paravirt_pagetable_setup_start(pgd_base); 407 408 /* Enable PSE if available */ 409 if (cpu_has_pse) 410 set_in_cr4(X86_CR4_PSE); 411 412 /* Enable PGE if available */ 413 if (cpu_has_pge) { 414 set_in_cr4(X86_CR4_PGE); 415 __PAGE_KERNEL |= _PAGE_GLOBAL; 416 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; 417 } 418 419 kernel_physical_mapping_init(pgd_base); 420 remap_numa_kva(); 421 422 /* 423 * Fixed mappings, only the page table structure has to be 424 * created - mappings will be set by set_fixmap(): 425 */ 426 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 427 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 428 page_table_range_init(vaddr, end, pgd_base); 429 430 permanent_kmaps_init(pgd_base); 431 432 paravirt_pagetable_setup_done(pgd_base); 433 } 434 435 #if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI) 436 /* 437 * Swap suspend & friends need this for resume because things like the intel-agp 438 * driver might have split up a kernel 4MB mapping. 439 */ 440 char __nosavedata swsusp_pg_dir[PAGE_SIZE] 441 __attribute__ ((aligned (PAGE_SIZE))); 442 443 static inline void save_pg_dir(void) 444 { 445 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); 446 } 447 #else 448 static inline void save_pg_dir(void) 449 { 450 } 451 #endif 452 453 void zap_low_mappings (void) 454 { 455 int i; 456 457 save_pg_dir(); 458 459 /* 460 * Zap initial low-memory mappings. 461 * 462 * Note that "pgd_clear()" doesn't do it for 463 * us, because pgd_clear() is a no-op on i386. 464 */ 465 for (i = 0; i < USER_PTRS_PER_PGD; i++) 466 #ifdef CONFIG_X86_PAE 467 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 468 #else 469 set_pgd(swapper_pg_dir+i, __pgd(0)); 470 #endif 471 flush_tlb_all(); 472 } 473 474 int nx_enabled = 0; 475 476 #ifdef CONFIG_X86_PAE 477 478 static int disable_nx __initdata = 0; 479 u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; 480 EXPORT_SYMBOL_GPL(__supported_pte_mask); 481 482 /* 483 * noexec = on|off 484 * 485 * Control non executable mappings. 486 * 487 * on Enable 488 * off Disable 489 */ 490 static int __init noexec_setup(char *str) 491 { 492 if (!str || !strcmp(str, "on")) { 493 if (cpu_has_nx) { 494 __supported_pte_mask |= _PAGE_NX; 495 disable_nx = 0; 496 } 497 } else if (!strcmp(str,"off")) { 498 disable_nx = 1; 499 __supported_pte_mask &= ~_PAGE_NX; 500 } else 501 return -EINVAL; 502 503 return 0; 504 } 505 early_param("noexec", noexec_setup); 506 507 static void __init set_nx(void) 508 { 509 unsigned int v[4], l, h; 510 511 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { 512 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); 513 if ((v[3] & (1 << 20)) && !disable_nx) { 514 rdmsr(MSR_EFER, l, h); 515 l |= EFER_NX; 516 wrmsr(MSR_EFER, l, h); 517 nx_enabled = 1; 518 __supported_pte_mask |= _PAGE_NX; 519 } 520 } 521 } 522 523 /* 524 * Enables/disables executability of a given kernel page and 525 * returns the previous setting. 526 */ 527 int __init set_kernel_exec(unsigned long vaddr, int enable) 528 { 529 pte_t *pte; 530 int ret = 1; 531 532 if (!nx_enabled) 533 goto out; 534 535 pte = lookup_address(vaddr); 536 BUG_ON(!pte); 537 538 if (!pte_exec_kernel(*pte)) 539 ret = 0; 540 541 if (enable) 542 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); 543 else 544 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); 545 pte_update_defer(&init_mm, vaddr, pte); 546 __flush_tlb_all(); 547 out: 548 return ret; 549 } 550 551 #endif 552 553 /* 554 * paging_init() sets up the page tables - note that the first 8MB are 555 * already mapped by head.S. 556 * 557 * This routines also unmaps the page at virtual kernel address 0, so 558 * that we can trap those pesky NULL-reference errors in the kernel. 559 */ 560 void __init paging_init(void) 561 { 562 #ifdef CONFIG_X86_PAE 563 set_nx(); 564 if (nx_enabled) 565 printk("NX (Execute Disable) protection: active\n"); 566 #endif 567 568 pagetable_init(); 569 570 load_cr3(swapper_pg_dir); 571 572 #ifdef CONFIG_X86_PAE 573 /* 574 * We will bail out later - printk doesn't work right now so 575 * the user would just see a hanging kernel. 576 */ 577 if (cpu_has_pae) 578 set_in_cr4(X86_CR4_PAE); 579 #endif 580 __flush_tlb_all(); 581 582 kmap_init(); 583 } 584 585 /* 586 * Test if the WP bit works in supervisor mode. It isn't supported on 386's 587 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This 588 * used to involve black magic jumps to work around some nasty CPU bugs, 589 * but fortunately the switch to using exceptions got rid of all that. 590 */ 591 592 static void __init test_wp_bit(void) 593 { 594 printk("Checking if this processor honours the WP bit even in supervisor mode... "); 595 596 /* Any page-aligned address will do, the test is non-destructive */ 597 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); 598 boot_cpu_data.wp_works_ok = do_test_wp_bit(); 599 clear_fixmap(FIX_WP_TEST); 600 601 if (!boot_cpu_data.wp_works_ok) { 602 printk("No.\n"); 603 #ifdef CONFIG_X86_WP_WORKS_OK 604 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); 605 #endif 606 } else { 607 printk("Ok.\n"); 608 } 609 } 610 611 static struct kcore_list kcore_mem, kcore_vmalloc; 612 613 void __init mem_init(void) 614 { 615 extern int ppro_with_ram_bug(void); 616 int codesize, reservedpages, datasize, initsize; 617 int tmp; 618 int bad_ppro; 619 620 #ifdef CONFIG_FLATMEM 621 BUG_ON(!mem_map); 622 #endif 623 624 bad_ppro = ppro_with_ram_bug(); 625 626 #ifdef CONFIG_HIGHMEM 627 /* check that fixmap and pkmap do not overlap */ 628 if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { 629 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); 630 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", 631 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); 632 BUG(); 633 } 634 #endif 635 636 /* this will put all low memory onto the freelists */ 637 totalram_pages += free_all_bootmem(); 638 639 reservedpages = 0; 640 for (tmp = 0; tmp < max_low_pfn; tmp++) 641 /* 642 * Only count reserved RAM pages 643 */ 644 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 645 reservedpages++; 646 647 set_highmem_pages_init(bad_ppro); 648 649 codesize = (unsigned long) &_etext - (unsigned long) &_text; 650 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 651 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; 652 653 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 654 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 655 VMALLOC_END-VMALLOC_START); 656 657 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", 658 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 659 num_physpages << (PAGE_SHIFT-10), 660 codesize >> 10, 661 reservedpages << (PAGE_SHIFT-10), 662 datasize >> 10, 663 initsize >> 10, 664 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 665 ); 666 667 #if 1 /* double-sanity-check paranoia */ 668 printk("virtual kernel memory layout:\n" 669 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 670 #ifdef CONFIG_HIGHMEM 671 " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 672 #endif 673 " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" 674 " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" 675 " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" 676 " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" 677 " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", 678 FIXADDR_START, FIXADDR_TOP, 679 (FIXADDR_TOP - FIXADDR_START) >> 10, 680 681 #ifdef CONFIG_HIGHMEM 682 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, 683 (LAST_PKMAP*PAGE_SIZE) >> 10, 684 #endif 685 686 VMALLOC_START, VMALLOC_END, 687 (VMALLOC_END - VMALLOC_START) >> 20, 688 689 (unsigned long)__va(0), (unsigned long)high_memory, 690 ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, 691 692 (unsigned long)&__init_begin, (unsigned long)&__init_end, 693 ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, 694 695 (unsigned long)&_etext, (unsigned long)&_edata, 696 ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, 697 698 (unsigned long)&_text, (unsigned long)&_etext, 699 ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 700 701 #ifdef CONFIG_HIGHMEM 702 BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 703 BUG_ON(VMALLOC_END > PKMAP_BASE); 704 #endif 705 BUG_ON(VMALLOC_START > VMALLOC_END); 706 BUG_ON((unsigned long)high_memory > VMALLOC_START); 707 #endif /* double-sanity-check paranoia */ 708 709 #ifdef CONFIG_X86_PAE 710 if (!cpu_has_pae) 711 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); 712 #endif 713 if (boot_cpu_data.wp_works_ok < 0) 714 test_wp_bit(); 715 716 /* 717 * Subtle. SMP is doing it's boot stuff late (because it has to 718 * fork idle threads) - but it also needs low mappings for the 719 * protected-mode entry to work. We zap these entries only after 720 * the WP-bit has been tested. 721 */ 722 #ifndef CONFIG_SMP 723 zap_low_mappings(); 724 #endif 725 } 726 727 #ifdef CONFIG_MEMORY_HOTPLUG 728 int arch_add_memory(int nid, u64 start, u64 size) 729 { 730 struct pglist_data *pgdata = NODE_DATA(nid); 731 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; 732 unsigned long start_pfn = start >> PAGE_SHIFT; 733 unsigned long nr_pages = size >> PAGE_SHIFT; 734 735 return __add_pages(zone, start_pfn, nr_pages); 736 } 737 738 int remove_memory(u64 start, u64 size) 739 { 740 return -EINVAL; 741 } 742 EXPORT_SYMBOL_GPL(remove_memory); 743 #endif 744 745 struct kmem_cache *pmd_cache; 746 747 void __init pgtable_cache_init(void) 748 { 749 size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); 750 751 if (PTRS_PER_PMD > 1) { 752 pmd_cache = kmem_cache_create("pmd", 753 PTRS_PER_PMD*sizeof(pmd_t), 754 PTRS_PER_PMD*sizeof(pmd_t), 755 SLAB_PANIC, 756 pmd_ctor); 757 if (!SHARED_KERNEL_PMD) { 758 /* If we're in PAE mode and have a non-shared 759 kernel pmd, then the pgd size must be a 760 page size. This is because the pgd_list 761 links through the page structure, so there 762 can only be one pgd per page for this to 763 work. */ 764 pgd_size = PAGE_SIZE; 765 } 766 } 767 } 768 769 /* 770 * This function cannot be __init, since exceptions don't work in that 771 * section. Put this after the callers, so that it cannot be inlined. 772 */ 773 static int noinline do_test_wp_bit(void) 774 { 775 char tmp_reg; 776 int flag; 777 778 __asm__ __volatile__( 779 " movb %0,%1 \n" 780 "1: movb %1,%0 \n" 781 " xorl %2,%2 \n" 782 "2: \n" 783 ".section __ex_table,\"a\"\n" 784 " .align 4 \n" 785 " .long 1b,2b \n" 786 ".previous \n" 787 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), 788 "=q" (tmp_reg), 789 "=r" (flag) 790 :"2" (1) 791 :"memory"); 792 793 return flag; 794 } 795 796 #ifdef CONFIG_DEBUG_RODATA 797 798 void mark_rodata_ro(void) 799 { 800 unsigned long start = PFN_ALIGN(_text); 801 unsigned long size = PFN_ALIGN(_etext) - start; 802 803 #ifndef CONFIG_KPROBES 804 #ifdef CONFIG_HOTPLUG_CPU 805 /* It must still be possible to apply SMP alternatives. */ 806 if (num_possible_cpus() <= 1) 807 #endif 808 { 809 change_page_attr(virt_to_page(start), 810 size >> PAGE_SHIFT, PAGE_KERNEL_RX); 811 printk("Write protecting the kernel text: %luk\n", size >> 10); 812 } 813 #endif 814 start += size; 815 size = (unsigned long)__end_rodata - start; 816 change_page_attr(virt_to_page(start), 817 size >> PAGE_SHIFT, PAGE_KERNEL_RO); 818 printk("Write protecting the kernel read-only data: %luk\n", 819 size >> 10); 820 821 /* 822 * change_page_attr() requires a global_flush_tlb() call after it. 823 * We do this after the printk so that if something went wrong in the 824 * change, the printk gets out at least to give a better debug hint 825 * of who is the culprit. 826 */ 827 global_flush_tlb(); 828 } 829 #endif 830 831 void free_init_pages(char *what, unsigned long begin, unsigned long end) 832 { 833 unsigned long addr; 834 835 for (addr = begin; addr < end; addr += PAGE_SIZE) { 836 ClearPageReserved(virt_to_page(addr)); 837 init_page_count(virt_to_page(addr)); 838 memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); 839 free_page(addr); 840 totalram_pages++; 841 } 842 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 843 } 844 845 void free_initmem(void) 846 { 847 free_init_pages("unused kernel memory", 848 (unsigned long)(&__init_begin), 849 (unsigned long)(&__init_end)); 850 } 851 852 #ifdef CONFIG_BLK_DEV_INITRD 853 void free_initrd_mem(unsigned long start, unsigned long end) 854 { 855 free_init_pages("initrd memory", start, end); 856 } 857 #endif 858 859