xref: /linux/arch/x86/mm/init_64.c (revision 42fda66387daa53538ae13a2c858396aaf037158)
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8 
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31 
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 
47 #ifndef Dprintk
48 #define Dprintk(x...)
49 #endif
50 
51 const struct dma_mapping_ops* dma_ops;
52 EXPORT_SYMBOL(dma_ops);
53 
54 static unsigned long dma_reserve __initdata;
55 
56 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57 
58 /*
59  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60  * physical space so we can cache the place of the first one and move
61  * around without checking the pgd every time.
62  */
63 
64 void show_mem(void)
65 {
66 	long i, total = 0, reserved = 0;
67 	long shared = 0, cached = 0;
68 	pg_data_t *pgdat;
69 	struct page *page;
70 
71 	printk(KERN_INFO "Mem-info:\n");
72 	show_free_areas();
73 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
74 
75 	for_each_online_pgdat(pgdat) {
76                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 			/* this loop can take a while with 256 GB and 4k pages
78 			   so update the NMI watchdog */
79 			if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 				touch_nmi_watchdog();
81 			}
82 			if (!pfn_valid(pgdat->node_start_pfn + i))
83 				continue;
84 			page = pfn_to_page(pgdat->node_start_pfn + i);
85 			total++;
86 			if (PageReserved(page))
87 				reserved++;
88 			else if (PageSwapCache(page))
89 				cached++;
90 			else if (page_count(page))
91 				shared += page_count(page) - 1;
92                }
93 	}
94 	printk(KERN_INFO "%lu pages of RAM\n", total);
95 	printk(KERN_INFO "%lu reserved pages\n",reserved);
96 	printk(KERN_INFO "%lu pages shared\n",shared);
97 	printk(KERN_INFO "%lu pages swap cached\n",cached);
98 }
99 
100 int after_bootmem;
101 
102 static __init void *spp_getpage(void)
103 {
104 	void *ptr;
105 	if (after_bootmem)
106 		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
107 	else
108 		ptr = alloc_bootmem_pages(PAGE_SIZE);
109 	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
110 		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
111 
112 	Dprintk("spp_getpage %p\n", ptr);
113 	return ptr;
114 }
115 
116 static __init void set_pte_phys(unsigned long vaddr,
117 			 unsigned long phys, pgprot_t prot)
118 {
119 	pgd_t *pgd;
120 	pud_t *pud;
121 	pmd_t *pmd;
122 	pte_t *pte, new_pte;
123 
124 	Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
125 
126 	pgd = pgd_offset_k(vaddr);
127 	if (pgd_none(*pgd)) {
128 		printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
129 		return;
130 	}
131 	pud = pud_offset(pgd, vaddr);
132 	if (pud_none(*pud)) {
133 		pmd = (pmd_t *) spp_getpage();
134 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
135 		if (pmd != pmd_offset(pud, 0)) {
136 			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
137 			return;
138 		}
139 	}
140 	pmd = pmd_offset(pud, vaddr);
141 	if (pmd_none(*pmd)) {
142 		pte = (pte_t *) spp_getpage();
143 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
144 		if (pte != pte_offset_kernel(pmd, 0)) {
145 			printk("PAGETABLE BUG #02!\n");
146 			return;
147 		}
148 	}
149 	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
150 
151 	pte = pte_offset_kernel(pmd, vaddr);
152 	if (!pte_none(*pte) &&
153 	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
154 		pte_ERROR(*pte);
155 	set_pte(pte, new_pte);
156 
157 	/*
158 	 * It's enough to flush this one mapping.
159 	 * (PGE mappings get flushed as well)
160 	 */
161 	__flush_tlb_one(vaddr);
162 }
163 
164 /* NOTE: this is meant to be run only at boot */
165 void __init
166 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167 {
168 	unsigned long address = __fix_to_virt(idx);
169 
170 	if (idx >= __end_of_fixed_addresses) {
171 		printk("Invalid __set_fixmap\n");
172 		return;
173 	}
174 	set_pte_phys(address, phys, prot);
175 }
176 
177 unsigned long __meminitdata table_start, table_end;
178 
179 static __meminit void *alloc_low_page(unsigned long *phys)
180 {
181 	unsigned long pfn = table_end++;
182 	void *adr;
183 
184 	if (after_bootmem) {
185 		adr = (void *)get_zeroed_page(GFP_ATOMIC);
186 		*phys = __pa(adr);
187 		return adr;
188 	}
189 
190 	if (pfn >= end_pfn)
191 		panic("alloc_low_page: ran out of memory");
192 
193 	adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
194 	memset(adr, 0, PAGE_SIZE);
195 	*phys  = pfn * PAGE_SIZE;
196 	return adr;
197 }
198 
199 static __meminit void unmap_low_page(void *adr)
200 {
201 
202 	if (after_bootmem)
203 		return;
204 
205 	early_iounmap(adr, PAGE_SIZE);
206 }
207 
208 /* Must run before zap_low_mappings */
209 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
210 {
211 	unsigned long vaddr;
212 	pmd_t *pmd, *last_pmd;
213 	int i, pmds;
214 
215 	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
216 	vaddr = __START_KERNEL_map;
217 	pmd = level2_kernel_pgt;
218 	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
219 	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
220 		for (i = 0; i < pmds; i++) {
221 			if (pmd_present(pmd[i]))
222 				goto next;
223 		}
224 		vaddr += addr & ~PMD_MASK;
225 		addr &= PMD_MASK;
226 		for (i = 0; i < pmds; i++, addr += PMD_SIZE)
227 			set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
228 		__flush_tlb();
229 		return (void *)vaddr;
230 	next:
231 		;
232 	}
233 	printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
234 	return NULL;
235 }
236 
237 /* To avoid virtual aliases later */
238 __meminit void early_iounmap(void *addr, unsigned long size)
239 {
240 	unsigned long vaddr;
241 	pmd_t *pmd;
242 	int i, pmds;
243 
244 	vaddr = (unsigned long)addr;
245 	pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
246 	pmd = level2_kernel_pgt + pmd_index(vaddr);
247 	for (i = 0; i < pmds; i++)
248 		pmd_clear(pmd + i);
249 	__flush_tlb();
250 }
251 
252 static void __meminit
253 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
254 {
255 	int i = pmd_index(address);
256 
257 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
258 		unsigned long entry;
259 		pmd_t *pmd = pmd_page + pmd_index(address);
260 
261 		if (address >= end) {
262 			if (!after_bootmem)
263 				for (; i < PTRS_PER_PMD; i++, pmd++)
264 					set_pmd(pmd, __pmd(0));
265 			break;
266 		}
267 
268 		if (pmd_val(*pmd))
269 			continue;
270 
271 		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
272 		entry &= __supported_pte_mask;
273 		set_pmd(pmd, __pmd(entry));
274 	}
275 }
276 
277 static void __meminit
278 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
279 {
280 	pmd_t *pmd = pmd_offset(pud,0);
281 	spin_lock(&init_mm.page_table_lock);
282 	phys_pmd_init(pmd, address, end);
283 	spin_unlock(&init_mm.page_table_lock);
284 	__flush_tlb_all();
285 }
286 
287 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
288 {
289 	int i = pud_index(addr);
290 
291 
292 	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
293 		unsigned long pmd_phys;
294 		pud_t *pud = pud_page + pud_index(addr);
295 		pmd_t *pmd;
296 
297 		if (addr >= end)
298 			break;
299 
300 		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
301 			set_pud(pud, __pud(0));
302 			continue;
303 		}
304 
305 		if (pud_val(*pud)) {
306 			phys_pmd_update(pud, addr, end);
307 			continue;
308 		}
309 
310 		pmd = alloc_low_page(&pmd_phys);
311 		spin_lock(&init_mm.page_table_lock);
312 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 		phys_pmd_init(pmd, addr, end);
314 		spin_unlock(&init_mm.page_table_lock);
315 		unmap_low_page(pmd);
316 	}
317 	__flush_tlb();
318 }
319 
320 static void __init find_early_table_space(unsigned long end)
321 {
322 	unsigned long puds, pmds, tables, start;
323 
324 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
325 	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
326 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
327 		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
328 
329  	/* RED-PEN putting page tables only on node 0 could
330  	   cause a hotspot and fill up ZONE_DMA. The page tables
331  	   need roughly 0.5KB per GB. */
332  	start = 0x8000;
333  	table_start = find_e820_area(start, end, tables);
334 	if (table_start == -1UL)
335 		panic("Cannot find space for the kernel page tables");
336 
337 	table_start >>= PAGE_SHIFT;
338 	table_end = table_start;
339 
340 	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
341 		end, table_start << PAGE_SHIFT,
342 		(table_start << PAGE_SHIFT) + tables);
343 }
344 
345 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
346    This runs before bootmem is initialized and gets pages directly from the
347    physical memory. To access them they are temporarily mapped. */
348 void __meminit init_memory_mapping(unsigned long start, unsigned long end)
349 {
350 	unsigned long next;
351 
352 	Dprintk("init_memory_mapping\n");
353 
354 	/*
355 	 * Find space for the kernel direct mapping tables.
356 	 * Later we should allocate these tables in the local node of the memory
357 	 * mapped.  Unfortunately this is done currently before the nodes are
358 	 * discovered.
359 	 */
360 	if (!after_bootmem)
361 		find_early_table_space(end);
362 
363 	start = (unsigned long)__va(start);
364 	end = (unsigned long)__va(end);
365 
366 	for (; start < end; start = next) {
367 		unsigned long pud_phys;
368 		pgd_t *pgd = pgd_offset_k(start);
369 		pud_t *pud;
370 
371 		if (after_bootmem)
372 			pud = pud_offset(pgd, start & PGDIR_MASK);
373 		else
374 			pud = alloc_low_page(&pud_phys);
375 
376 		next = start + PGDIR_SIZE;
377 		if (next > end)
378 			next = end;
379 		phys_pud_init(pud, __pa(start), __pa(next));
380 		if (!after_bootmem)
381 			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
382 		unmap_low_page(pud);
383 	}
384 
385 	if (!after_bootmem)
386 		mmu_cr4_features = read_cr4();
387 	__flush_tlb_all();
388 }
389 
390 #ifndef CONFIG_NUMA
391 void __init paging_init(void)
392 {
393 	unsigned long max_zone_pfns[MAX_NR_ZONES];
394 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
395 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
396 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
397 	max_zone_pfns[ZONE_NORMAL] = end_pfn;
398 
399 	memory_present(0, 0, end_pfn);
400 	sparse_init();
401 	free_area_init_nodes(max_zone_pfns);
402 }
403 #endif
404 
405 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
406    from the CPU leading to inconsistent cache lines. address and size
407    must be aligned to 2MB boundaries.
408    Does nothing when the mapping doesn't exist. */
409 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
410 {
411 	unsigned long end = address + size;
412 
413 	BUG_ON(address & ~LARGE_PAGE_MASK);
414 	BUG_ON(size & ~LARGE_PAGE_MASK);
415 
416 	for (; address < end; address += LARGE_PAGE_SIZE) {
417 		pgd_t *pgd = pgd_offset_k(address);
418 		pud_t *pud;
419 		pmd_t *pmd;
420 		if (pgd_none(*pgd))
421 			continue;
422 		pud = pud_offset(pgd, address);
423 		if (pud_none(*pud))
424 			continue;
425 		pmd = pmd_offset(pud, address);
426 		if (!pmd || pmd_none(*pmd))
427 			continue;
428 		if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
429 			/* Could handle this, but it should not happen currently. */
430 			printk(KERN_ERR
431 	       "clear_kernel_mapping: mapping has been split. will leak memory\n");
432 			pmd_ERROR(*pmd);
433 		}
434 		set_pmd(pmd, __pmd(0));
435 	}
436 	__flush_tlb_all();
437 }
438 
439 /*
440  * Memory hotplug specific functions
441  */
442 void online_page(struct page *page)
443 {
444 	ClearPageReserved(page);
445 	init_page_count(page);
446 	__free_page(page);
447 	totalram_pages++;
448 	num_physpages++;
449 }
450 
451 #ifdef CONFIG_MEMORY_HOTPLUG
452 /*
453  * Memory is added always to NORMAL zone. This means you will never get
454  * additional DMA/DMA32 memory.
455  */
456 int arch_add_memory(int nid, u64 start, u64 size)
457 {
458 	struct pglist_data *pgdat = NODE_DATA(nid);
459 	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
460 	unsigned long start_pfn = start >> PAGE_SHIFT;
461 	unsigned long nr_pages = size >> PAGE_SHIFT;
462 	int ret;
463 
464 	init_memory_mapping(start, (start + size -1));
465 
466 	ret = __add_pages(zone, start_pfn, nr_pages);
467 	if (ret)
468 		goto error;
469 
470 	return ret;
471 error:
472 	printk("%s: Problem encountered in __add_pages!\n", __func__);
473 	return ret;
474 }
475 EXPORT_SYMBOL_GPL(arch_add_memory);
476 
477 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
478 int memory_add_physaddr_to_nid(u64 start)
479 {
480 	return 0;
481 }
482 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
483 #endif
484 
485 #endif /* CONFIG_MEMORY_HOTPLUG */
486 
487 #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
488 /*
489  * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
490  * just online the pages.
491  */
492 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
493 {
494 	int err = -EIO;
495 	unsigned long pfn;
496 	unsigned long total = 0, mem = 0;
497 	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
498 		if (pfn_valid(pfn)) {
499 			online_page(pfn_to_page(pfn));
500 			err = 0;
501 			mem++;
502 		}
503 		total++;
504 	}
505 	if (!err) {
506 		z->spanned_pages += total;
507 		z->present_pages += mem;
508 		z->zone_pgdat->node_spanned_pages += total;
509 		z->zone_pgdat->node_present_pages += mem;
510 	}
511 	return err;
512 }
513 #endif
514 
515 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
516 			 kcore_vsyscall;
517 
518 void __init mem_init(void)
519 {
520 	long codesize, reservedpages, datasize, initsize;
521 
522 	pci_iommu_alloc();
523 
524 	/* clear the zero-page */
525 	memset(empty_zero_page, 0, PAGE_SIZE);
526 
527 	reservedpages = 0;
528 
529 	/* this will put all low memory onto the freelists */
530 #ifdef CONFIG_NUMA
531 	totalram_pages = numa_free_all_bootmem();
532 #else
533 	totalram_pages = free_all_bootmem();
534 #endif
535 	reservedpages = end_pfn - totalram_pages -
536 					absent_pages_in_range(0, end_pfn);
537 
538 	after_bootmem = 1;
539 
540 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
541 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
542 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
543 
544 	/* Register memory areas for /proc/kcore */
545 	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
546 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
547 		   VMALLOC_END-VMALLOC_START);
548 	kclist_add(&kcore_kernel, &_stext, _end - _stext);
549 	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
550 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
551 				 VSYSCALL_END - VSYSCALL_START);
552 
553 	printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
554 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
555 		end_pfn << (PAGE_SHIFT-10),
556 		codesize >> 10,
557 		reservedpages << (PAGE_SHIFT-10),
558 		datasize >> 10,
559 		initsize >> 10);
560 }
561 
562 void free_init_pages(char *what, unsigned long begin, unsigned long end)
563 {
564 	unsigned long addr;
565 
566 	if (begin >= end)
567 		return;
568 
569 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
570 	for (addr = begin; addr < end; addr += PAGE_SIZE) {
571 		ClearPageReserved(virt_to_page(addr));
572 		init_page_count(virt_to_page(addr));
573 		memset((void *)(addr & ~(PAGE_SIZE-1)),
574 			POISON_FREE_INITMEM, PAGE_SIZE);
575 		if (addr >= __START_KERNEL_map)
576 			change_page_attr_addr(addr, 1, __pgprot(0));
577 		free_page(addr);
578 		totalram_pages++;
579 	}
580 	if (addr > __START_KERNEL_map)
581 		global_flush_tlb();
582 }
583 
584 void free_initmem(void)
585 {
586 	free_init_pages("unused kernel memory",
587 			(unsigned long)(&__init_begin),
588 			(unsigned long)(&__init_end));
589 }
590 
591 #ifdef CONFIG_DEBUG_RODATA
592 
593 void mark_rodata_ro(void)
594 {
595 	unsigned long start = (unsigned long)_stext, end;
596 
597 #ifdef CONFIG_HOTPLUG_CPU
598 	/* It must still be possible to apply SMP alternatives. */
599 	if (num_possible_cpus() > 1)
600 		start = (unsigned long)_etext;
601 #endif
602 
603 #ifdef CONFIG_KPROBES
604 	start = (unsigned long)__start_rodata;
605 #endif
606 
607 	end = (unsigned long)__end_rodata;
608 	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
609 	end &= PAGE_MASK;
610 	if (end <= start)
611 		return;
612 
613 	change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
614 
615 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
616 	       (end - start) >> 10);
617 
618 	/*
619 	 * change_page_attr_addr() requires a global_flush_tlb() call after it.
620 	 * We do this after the printk so that if something went wrong in the
621 	 * change, the printk gets out at least to give a better debug hint
622 	 * of who is the culprit.
623 	 */
624 	global_flush_tlb();
625 }
626 #endif
627 
628 #ifdef CONFIG_BLK_DEV_INITRD
629 void free_initrd_mem(unsigned long start, unsigned long end)
630 {
631 	free_init_pages("initrd memory", start, end);
632 }
633 #endif
634 
635 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
636 {
637 #ifdef CONFIG_NUMA
638 	int nid = phys_to_nid(phys);
639 #endif
640 	unsigned long pfn = phys >> PAGE_SHIFT;
641 	if (pfn >= end_pfn) {
642 		/* This can happen with kdump kernels when accessing firmware
643 		   tables. */
644 		if (pfn < end_pfn_map)
645 			return;
646 		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
647 				phys, len);
648 		return;
649 	}
650 
651 	/* Should check here against the e820 map to avoid double free */
652 #ifdef CONFIG_NUMA
653   	reserve_bootmem_node(NODE_DATA(nid), phys, len);
654 #else
655 	reserve_bootmem(phys, len);
656 #endif
657 	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
658 		dma_reserve += len / PAGE_SIZE;
659 		set_dma_reserve(dma_reserve);
660 	}
661 }
662 
663 int kern_addr_valid(unsigned long addr)
664 {
665 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
666        pgd_t *pgd;
667        pud_t *pud;
668        pmd_t *pmd;
669        pte_t *pte;
670 
671 	if (above != 0 && above != -1UL)
672 		return 0;
673 
674 	pgd = pgd_offset_k(addr);
675 	if (pgd_none(*pgd))
676 		return 0;
677 
678 	pud = pud_offset(pgd, addr);
679 	if (pud_none(*pud))
680 		return 0;
681 
682 	pmd = pmd_offset(pud, addr);
683 	if (pmd_none(*pmd))
684 		return 0;
685 	if (pmd_large(*pmd))
686 		return pfn_valid(pmd_pfn(*pmd));
687 
688 	pte = pte_offset_kernel(pmd, addr);
689 	if (pte_none(*pte))
690 		return 0;
691 	return pfn_valid(pte_pfn(*pte));
692 }
693 
694 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
695    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
696    not need special handling anymore. */
697 
698 static struct vm_area_struct gate_vma = {
699 	.vm_start = VSYSCALL_START,
700 	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
701 	.vm_page_prot = PAGE_READONLY_EXEC,
702 	.vm_flags = VM_READ | VM_EXEC
703 };
704 
705 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
706 {
707 #ifdef CONFIG_IA32_EMULATION
708 	if (test_tsk_thread_flag(tsk, TIF_IA32))
709 		return NULL;
710 #endif
711 	return &gate_vma;
712 }
713 
714 int in_gate_area(struct task_struct *task, unsigned long addr)
715 {
716 	struct vm_area_struct *vma = get_gate_vma(task);
717 	if (!vma)
718 		return 0;
719 	return (addr >= vma->vm_start) && (addr < vma->vm_end);
720 }
721 
722 /* Use this when you have no reliable task/vma, typically from interrupt
723  * context.  It is less reliable than using the task's vma and may give
724  * false positives.
725  */
726 int in_gate_area_no_task(unsigned long addr)
727 {
728 	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
729 }
730 
731 void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
732 {
733 	return __alloc_bootmem_core(pgdat->bdata, size,
734 			SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
735 }
736 
737 const char *arch_vma_name(struct vm_area_struct *vma)
738 {
739 	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
740 		return "[vdso]";
741 	if (vma == &gate_vma)
742 		return "[vsyscall]";
743 	return NULL;
744 }
745 
746 #ifdef CONFIG_SPARSEMEM_VMEMMAP
747 /*
748  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
749  */
750 int __meminit vmemmap_populate(struct page *start_page,
751 						unsigned long size, int node)
752 {
753 	unsigned long addr = (unsigned long)start_page;
754 	unsigned long end = (unsigned long)(start_page + size);
755 	unsigned long next;
756 	pgd_t *pgd;
757 	pud_t *pud;
758 	pmd_t *pmd;
759 
760 	for (; addr < end; addr = next) {
761 		next = pmd_addr_end(addr, end);
762 
763 		pgd = vmemmap_pgd_populate(addr, node);
764 		if (!pgd)
765 			return -ENOMEM;
766 		pud = vmemmap_pud_populate(pgd, addr, node);
767 		if (!pud)
768 			return -ENOMEM;
769 
770 		pmd = pmd_offset(pud, addr);
771 		if (pmd_none(*pmd)) {
772 			pte_t entry;
773 			void *p = vmemmap_alloc_block(PMD_SIZE, node);
774 			if (!p)
775 				return -ENOMEM;
776 
777 			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
778 			mk_pte_huge(entry);
779 			set_pmd(pmd, __pmd(pte_val(entry)));
780 
781 			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
782 				addr, addr + PMD_SIZE - 1, p, node);
783 		} else
784 			vmemmap_verify((pte_t *)pmd, node, addr, next);
785 	}
786 
787 	return 0;
788 }
789 #endif
790