xref: /linux/arch/x86/mm/init_64.c (revision 2b8232ce512105e28453f301d1510de8363bccd1)
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8 
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31 
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 
47 #ifndef Dprintk
48 #define Dprintk(x...)
49 #endif
50 
51 const struct dma_mapping_ops* dma_ops;
52 EXPORT_SYMBOL(dma_ops);
53 
54 static unsigned long dma_reserve __initdata;
55 
56 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57 
58 /*
59  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60  * physical space so we can cache the place of the first one and move
61  * around without checking the pgd every time.
62  */
63 
64 void show_mem(void)
65 {
66 	long i, total = 0, reserved = 0;
67 	long shared = 0, cached = 0;
68 	pg_data_t *pgdat;
69 	struct page *page;
70 
71 	printk(KERN_INFO "Mem-info:\n");
72 	show_free_areas();
73 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
74 
75 	for_each_online_pgdat(pgdat) {
76                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 			/* this loop can take a while with 256 GB and 4k pages
78 			   so update the NMI watchdog */
79 			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 				touch_nmi_watchdog();
81 			}
82 			if (!pfn_valid(pgdat->node_start_pfn + i))
83 				continue;
84 			page = pfn_to_page(pgdat->node_start_pfn + i);
85 			total++;
86 			if (PageReserved(page))
87 				reserved++;
88 			else if (PageSwapCache(page))
89 				cached++;
90 			else if (page_count(page))
91 				shared += page_count(page) - 1;
92                }
93 	}
94 	printk(KERN_INFO "%lu pages of RAM\n", total);
95 	printk(KERN_INFO "%lu reserved pages\n",reserved);
96 	printk(KERN_INFO "%lu pages shared\n",shared);
97 	printk(KERN_INFO "%lu pages swap cached\n",cached);
98 }
99 
100 int after_bootmem;
101 
102 static __init void *spp_getpage(void)
103 {
104 	void *ptr;
105 	if (after_bootmem)
106 		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
107 	else
108 		ptr = alloc_bootmem_pages(PAGE_SIZE);
109 	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
110 		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
111 
112 	Dprintk("spp_getpage %p\n", ptr);
113 	return ptr;
114 }
115 
116 static __init void set_pte_phys(unsigned long vaddr,
117 			 unsigned long phys, pgprot_t prot)
118 {
119 	pgd_t *pgd;
120 	pud_t *pud;
121 	pmd_t *pmd;
122 	pte_t *pte, new_pte;
123 
124 	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
125 
126 	pgd = pgd_offset_k(vaddr);
127 	if (pgd_none(*pgd)) {
128 		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
129 		return;
130 	}
131 	pud = pud_offset(pgd, vaddr);
132 	if (pud_none(*pud)) {
133 		pmd = (pmd_t *) spp_getpage();
134 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
135 		if (pmd != pmd_offset(pud, 0)) {
136 			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
137 			return;
138 		}
139 	}
140 	pmd = pmd_offset(pud, vaddr);
141 	if (pmd_none(*pmd)) {
142 		pte = (pte_t *) spp_getpage();
143 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
144 		if (pte != pte_offset_kernel(pmd, 0)) {
145 			printk("PAGETABLE BUG #02!\n");
146 			return;
147 		}
148 	}
149 	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
150 
151 	pte = pte_offset_kernel(pmd, vaddr);
152 	if (!pte_none(*pte) &&
153 	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
154 		pte_ERROR(*pte);
155 	set_pte(pte, new_pte);
156 
157 	/*
158 	 * It's enough to flush this one mapping.
159 	 * (PGE mappings get flushed as well)
160 	 */
161 	__flush_tlb_one(vaddr);
162 }
163 
164 /* NOTE: this is meant to be run only at boot */
165 void __init
166 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167 {
168 	unsigned long address = __fix_to_virt(idx);
169 
170 	if (idx >= __end_of_fixed_addresses) {
171 		printk("Invalid __set_fixmap\n");
172 		return;
173 	}
174 	set_pte_phys(address, phys, prot);
175 }
176 
177 unsigned long __meminitdata table_start, table_end;
178 
179 static __meminit void *alloc_low_page(unsigned long *phys)
180 {
181 	unsigned long pfn = table_end++;
182 	void *adr;
183 
184 	if (after_bootmem) {
185 		adr = (void *)get_zeroed_page(GFP_ATOMIC);
186 		*phys = __pa(adr);
187 		return adr;
188 	}
189 
190 	if (pfn >= end_pfn)
191 		panic("alloc_low_page: ran out of memory");
192 
193 	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
194 	memset(adr, 0, PAGE_SIZE);
195 	*phys  = pfn * PAGE_SIZE;
196 	return adr;
197 }
198 
199 static __meminit void unmap_low_page(void *adr)
200 {
201 
202 	if (after_bootmem)
203 		return;
204 
205 	early_iounmap(adr, PAGE_SIZE);
206 }
207 
208 /* Must run before zap_low_mappings */
209 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
210 {
211 	unsigned long vaddr;
212 	pmd_t *pmd, *last_pmd;
213 	int i, pmds;
214 
215 	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
216 	vaddr = __START_KERNEL_map;
217 	pmd = level2_kernel_pgt;
218 	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
219 	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
220 		for (i = 0; i < pmds; i++) {
221 			if (pmd_present(pmd[i]))
222 				goto next;
223 		}
224 		vaddr += addr & ~PMD_MASK;
225 		addr &= PMD_MASK;
226 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
227 			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
228 		__flush_tlb();
229 		return (void *)vaddr;
230 	next:
231 		;
232 	}
233 	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
234 	return NULL;
235 }
236 
237 /* To avoid virtual aliases later */
238 __meminit void early_iounmap(void *addr, unsigned long size)
239 {
240 	unsigned long vaddr;
241 	pmd_t *pmd;
242 	int i, pmds;
243 
244 	vaddr = (unsigned long)addr;
245 	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
246 	pmd = level2_kernel_pgt + pmd_index(vaddr);
247 	for (i = 0; i < pmds; i++)
248 		pmd_clear(pmd + i);
249 	__flush_tlb();
250 }
251 
252 static void __meminit
253 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
254 {
255 	int i = pmd_index(address);
256 
257 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
258 		unsigned long entry;
259 		pmd_t *pmd = pmd_page + pmd_index(address);
260 
261 		if (address >= end) {
262 			if (!after_bootmem)
263 				for (; i < PTRS_PER_PMD; i++, pmd++)
264 					set_pmd(pmd, __pmd(0));
265 			break;
266 		}
267 
268 		if (pmd_val(*pmd))
269 			continue;
270 
271 		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
272 		entry &= __supported_pte_mask;
273 		set_pmd(pmd, __pmd(entry));
274 	}
275 }
276 
277 static void __meminit
278 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
279 {
280 	pmd_t *pmd = pmd_offset(pud,0);
281 	spin_lock(&init_mm.page_table_lock);
282 	phys_pmd_init(pmd, address, end);
283 	spin_unlock(&init_mm.page_table_lock);
284 	__flush_tlb_all();
285 }
286 
287 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
288 {
289 	int i = pud_index(addr);
290 
291 
292 	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
293 		unsigned long pmd_phys;
294 		pud_t *pud = pud_page + pud_index(addr);
295 		pmd_t *pmd;
296 
297 		if (addr >= end)
298 			break;
299 
300 		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
301 			set_pud(pud, __pud(0));
302 			continue;
303 		}
304 
305 		if (pud_val(*pud)) {
306 			phys_pmd_update(pud, addr, end);
307 			continue;
308 		}
309 
310 		pmd = alloc_low_page(&pmd_phys);
311 		spin_lock(&init_mm.page_table_lock);
312 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 		phys_pmd_init(pmd, addr, end);
314 		spin_unlock(&init_mm.page_table_lock);
315 		unmap_low_page(pmd);
316 	}
317 	__flush_tlb();
318 }
319 
320 static void __init find_early_table_space(unsigned long end)
321 {
322 	unsigned long puds, pmds, tables, start;
323 
324 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
325 	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
326 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
327 		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
328 
329  	/* RED-PEN putting page tables only on node 0 could
330  	   cause a hotspot and fill up ZONE_DMA. The page tables
331  	   need roughly 0.5KB per GB. */
332  	start = 0x8000;
333  	table_start = find_e820_area(start, end, tables);
334 	if (table_start == -1UL)
335 		panic("Cannot find space for the kernel page tables");
336 
337 	table_start >>= PAGE_SHIFT;
338 	table_end = table_start;
339 
340 	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
341 		end, table_start << PAGE_SHIFT,
342 		(table_start << PAGE_SHIFT) + tables);
343 }
344 
345 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
346    This runs before bootmem is initialized and gets pages directly from the
347    physical memory. To access them they are temporarily mapped. */
348 void __meminit init_memory_mapping(unsigned long start, unsigned long end)
349 {
350 	unsigned long next;
351 
352 	Dprintk("init_memory_mapping\n");
353 
354 	/*
355 	 * Find space for the kernel direct mapping tables.
356 	 * Later we should allocate these tables in the local node of the memory
357 	 * mapped.  Unfortunately this is done currently before the nodes are
358 	 * discovered.
359 	 */
360 	if (!after_bootmem)
361 		find_early_table_space(end);
362 
363 	start = (unsigned long)__va(start);
364 	end = (unsigned long)__va(end);
365 
366 	for (; start < end; start = next) {
367 		unsigned long pud_phys;
368 		pgd_t *pgd = pgd_offset_k(start);
369 		pud_t *pud;
370 
371 		if (after_bootmem)
372 			pud = pud_offset(pgd, start & PGDIR_MASK);
373 		else
374 			pud = alloc_low_page(&pud_phys);
375 
376 		next = start + PGDIR_SIZE;
377 		if (next > end)
378 			next = end;
379 		phys_pud_init(pud, __pa(start), __pa(next));
380 		if (!after_bootmem)
381 			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
382 		unmap_low_page(pud);
383 	}
384 
385 	if (!after_bootmem)
386 		mmu_cr4_features = read_cr4();
387 	__flush_tlb_all();
388 }
389 
390 #ifndef CONFIG_NUMA
391 void __init paging_init(void)
392 {
393 	unsigned long max_zone_pfns[MAX_NR_ZONES];
394 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
395 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
396 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
397 	max_zone_pfns[ZONE_NORMAL] = end_pfn;
398 
399 	memory_present(0, 0, end_pfn);
400 	sparse_init();
401 	free_area_init_nodes(max_zone_pfns);
402 }
403 #endif
404 
405 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
406    from the CPU leading to inconsistent cache lines. address and size
407    must be aligned to 2MB boundaries.
408    Does nothing when the mapping doesn't exist. */
409 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
410 {
411 	unsigned long end = address + size;
412 
413 	BUG_ON(address & ~LARGE_PAGE_MASK);
414 	BUG_ON(size & ~LARGE_PAGE_MASK);
415 
416 	for (; address < end; address += LARGE_PAGE_SIZE) {
417 		pgd_t *pgd = pgd_offset_k(address);
418 		pud_t *pud;
419 		pmd_t *pmd;
420 		if (pgd_none(*pgd))
421 			continue;
422 		pud = pud_offset(pgd, address);
423 		if (pud_none(*pud))
424 			continue;
425 		pmd = pmd_offset(pud, address);
426 		if (!pmd || pmd_none(*pmd))
427 			continue;
428 		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
429 			/* Could handle this, but it should not happen currently. */
430 			printk(KERN_ERR
431 	       "clear_kernel_mapping: mapping has been split. will leak memory\n");
432 			pmd_ERROR(*pmd);
433 		}
434 		set_pmd(pmd, __pmd(0));
435 	}
436 	__flush_tlb_all();
437 }
438 
439 /*
440  * Memory hotplug specific functions
441  */
442 void online_page(struct page *page)
443 {
444 	ClearPageReserved(page);
445 	init_page_count(page);
446 	__free_page(page);
447 	totalram_pages++;
448 	num_physpages++;
449 }
450 
451 #ifdef CONFIG_MEMORY_HOTPLUG
452 /*
453  * Memory is added always to NORMAL zone. This means you will never get
454  * additional DMA/DMA32 memory.
455  */
456 int arch_add_memory(int nid, u64 start, u64 size)
457 {
458 	struct pglist_data *pgdat = NODE_DATA(nid);
459 	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
460 	unsigned long start_pfn = start >> PAGE_SHIFT;
461 	unsigned long nr_pages = size >> PAGE_SHIFT;
462 	int ret;
463 
464 	init_memory_mapping(start, (start + size -1));
465 
466 	ret = __add_pages(zone, start_pfn, nr_pages);
467 	if (ret)
468 		goto error;
469 
470 	return ret;
471 error:
472 	printk("%s: Problem encountered in __add_pages!\n", __func__);
473 	return ret;
474 }
475 EXPORT_SYMBOL_GPL(arch_add_memory);
476 
477 int remove_memory(u64 start, u64 size)
478 {
479 	return -EINVAL;
480 }
481 EXPORT_SYMBOL_GPL(remove_memory);
482 
483 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
484 int memory_add_physaddr_to_nid(u64 start)
485 {
486 	return 0;
487 }
488 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
489 #endif
490 
491 #endif /* CONFIG_MEMORY_HOTPLUG */
492 
493 #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
494 /*
495  * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
496  * just online the pages.
497  */
498 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
499 {
500 	int err = -EIO;
501 	unsigned long pfn;
502 	unsigned long total = 0, mem = 0;
503 	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
504 		if (pfn_valid(pfn)) {
505 			online_page(pfn_to_page(pfn));
506 			err = 0;
507 			mem++;
508 		}
509 		total++;
510 	}
511 	if (!err) {
512 		z->spanned_pages += total;
513 		z->present_pages += mem;
514 		z->zone_pgdat->node_spanned_pages += total;
515 		z->zone_pgdat->node_present_pages += mem;
516 	}
517 	return err;
518 }
519 #endif
520 
521 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
522 			 kcore_vsyscall;
523 
524 void __init mem_init(void)
525 {
526 	long codesize, reservedpages, datasize, initsize;
527 
528 	pci_iommu_alloc();
529 
530 	/* clear the zero-page */
531 	memset(empty_zero_page, 0, PAGE_SIZE);
532 
533 	reservedpages = 0;
534 
535 	/* this will put all low memory onto the freelists */
536 #ifdef CONFIG_NUMA
537 	totalram_pages = numa_free_all_bootmem();
538 #else
539 	totalram_pages = free_all_bootmem();
540 #endif
541 	reservedpages = end_pfn - totalram_pages -
542 					absent_pages_in_range(0, end_pfn);
543 
544 	after_bootmem = 1;
545 
546 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
547 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
548 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
549 
550 	/* Register memory areas for /proc/kcore */
551 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
552 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
553 		   VMALLOC_END-VMALLOC_START);
554 	kclist_add(&kcore_kernel, &_stext, _end - _stext);
555 	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
556 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
557 				 VSYSCALL_END - VSYSCALL_START);
558 
559 	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
560 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
561 		end_pfn << (PAGE_SHIFT-10),
562 		codesize >> 10,
563 		reservedpages << (PAGE_SHIFT-10),
564 		datasize >> 10,
565 		initsize >> 10);
566 }
567 
568 void free_init_pages(char *what, unsigned long begin, unsigned long end)
569 {
570 	unsigned long addr;
571 
572 	if (begin >= end)
573 		return;
574 
575 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
576 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
577 		ClearPageReserved(virt_to_page(addr));
578 		init_page_count(virt_to_page(addr));
579 		memset((void *)(addr & ~(PAGE_SIZE-1)),
580 			POISON_FREE_INITMEM, PAGE_SIZE);
581 		if (addr >= __START_KERNEL_map)
582 			change_page_attr_addr(addr, 1, __pgprot(0));
583 		free_page(addr);
584 		totalram_pages++;
585 	}
586 	if (addr > __START_KERNEL_map)
587 		global_flush_tlb();
588 }
589 
590 void free_initmem(void)
591 {
592 	free_init_pages("unused kernel memory",
593 			(unsigned long)(&__init_begin),
594 			(unsigned long)(&__init_end));
595 }
596 
597 #ifdef CONFIG_DEBUG_RODATA
598 
599 void mark_rodata_ro(void)
600 {
601 	unsigned long start = (unsigned long)_stext, end;
602 
603 #ifdef CONFIG_HOTPLUG_CPU
604 	/* It must still be possible to apply SMP alternatives. */
605 	if (num_possible_cpus() > 1)
606 		start = (unsigned long)_etext;
607 #endif
608 
609 #ifdef CONFIG_KPROBES
610 	start = (unsigned long)__start_rodata;
611 #endif
612 
613 	end = (unsigned long)__end_rodata;
614 	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
615 	end &= PAGE_MASK;
616 	if (end <= start)
617 		return;
618 
619 	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
620 
621 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
622 	       (end - start) >> 10);
623 
624 	/*
625 	 * change_page_attr_addr() requires a global_flush_tlb() call after it.
626 	 * We do this after the printk so that if something went wrong in the
627 	 * change, the printk gets out at least to give a better debug hint
628 	 * of who is the culprit.
629 	 */
630 	global_flush_tlb();
631 }
632 #endif
633 
634 #ifdef CONFIG_BLK_DEV_INITRD
635 void free_initrd_mem(unsigned long start, unsigned long end)
636 {
637 	free_init_pages("initrd memory", start, end);
638 }
639 #endif
640 
641 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
642 {
643 #ifdef CONFIG_NUMA
644 	int nid = phys_to_nid(phys);
645 #endif
646 	unsigned long pfn = phys >> PAGE_SHIFT;
647 	if (pfn >= end_pfn) {
648 		/* This can happen with kdump kernels when accessing firmware
649 		   tables. */
650 		if (pfn < end_pfn_map)
651 			return;
652 		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
653 				phys, len);
654 		return;
655 	}
656 
657 	/* Should check here against the e820 map to avoid double free */
658 #ifdef CONFIG_NUMA
659   	reserve_bootmem_node(NODE_DATA(nid), phys, len);
660 #else
661 	reserve_bootmem(phys, len);
662 #endif
663 	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
664 		dma_reserve += len / PAGE_SIZE;
665 		set_dma_reserve(dma_reserve);
666 	}
667 }
668 
669 int kern_addr_valid(unsigned long addr)
670 {
671 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
672        pgd_t *pgd;
673        pud_t *pud;
674        pmd_t *pmd;
675        pte_t *pte;
676 
677 	if (above != 0 && above != -1UL)
678 		return 0;
679 
680 	pgd = pgd_offset_k(addr);
681 	if (pgd_none(*pgd))
682 		return 0;
683 
684 	pud = pud_offset(pgd, addr);
685 	if (pud_none(*pud))
686 		return 0;
687 
688 	pmd = pmd_offset(pud, addr);
689 	if (pmd_none(*pmd))
690 		return 0;
691 	if (pmd_large(*pmd))
692 		return pfn_valid(pmd_pfn(*pmd));
693 
694 	pte = pte_offset_kernel(pmd, addr);
695 	if (pte_none(*pte))
696 		return 0;
697 	return pfn_valid(pte_pfn(*pte));
698 }
699 
700 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
701    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
702    not need special handling anymore. */
703 
704 static struct vm_area_struct gate_vma = {
705 	.vm_start = VSYSCALL_START,
706 	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
707 	.vm_page_prot = PAGE_READONLY_EXEC,
708 	.vm_flags = VM_READ | VM_EXEC
709 };
710 
711 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
712 {
713 #ifdef CONFIG_IA32_EMULATION
714 	if (test_tsk_thread_flag(tsk, TIF_IA32))
715 		return NULL;
716 #endif
717 	return &gate_vma;
718 }
719 
720 int in_gate_area(struct task_struct *task, unsigned long addr)
721 {
722 	struct vm_area_struct *vma = get_gate_vma(task);
723 	if (!vma)
724 		return 0;
725 	return (addr >= vma->vm_start) && (addr < vma->vm_end);
726 }
727 
728 /* Use this when you have no reliable task/vma, typically from interrupt
729  * context.  It is less reliable than using the task's vma and may give
730  * false positives.
731  */
732 int in_gate_area_no_task(unsigned long addr)
733 {
734 	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
735 }
736 
737 void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
738 {
739 	return __alloc_bootmem_core(pgdat->bdata, size,
740 			SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
741 }
742 
743 const char *arch_vma_name(struct vm_area_struct *vma)
744 {
745 	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
746 		return "[vdso]";
747 	if (vma == &gate_vma)
748 		return "[vsyscall]";
749 	return NULL;
750 }
751