xref: /linux/arch/arm/mm/mmu.c (revision f2ee442115c9b6219083c019939a9cc0c9abb2f8)
1 /*
2  *  linux/arch/arm/mm/mmu.c
3  *
4  *  Copyright (C) 1995-2005 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/mman.h>
15 #include <linux/nodemask.h>
16 #include <linux/memblock.h>
17 #include <linux/fs.h>
18 
19 #include <asm/cputype.h>
20 #include <asm/sections.h>
21 #include <asm/cachetype.h>
22 #include <asm/setup.h>
23 #include <asm/sizes.h>
24 #include <asm/smp_plat.h>
25 #include <asm/tlb.h>
26 #include <asm/highmem.h>
27 #include <asm/traps.h>
28 
29 #include <asm/mach/arch.h>
30 #include <asm/mach/map.h>
31 
32 #include "mm.h"
33 
34 /*
35  * empty_zero_page is a special page that is used for
36  * zero-initialized data and COW.
37  */
38 struct page *empty_zero_page;
39 EXPORT_SYMBOL(empty_zero_page);
40 
41 /*
42  * The pmd table for the upper-most set of pages.
43  */
44 pmd_t *top_pmd;
45 
46 #define CPOLICY_UNCACHED	0
47 #define CPOLICY_BUFFERED	1
48 #define CPOLICY_WRITETHROUGH	2
49 #define CPOLICY_WRITEBACK	3
50 #define CPOLICY_WRITEALLOC	4
51 
52 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
53 static unsigned int ecc_mask __initdata = 0;
54 pgprot_t pgprot_user;
55 pgprot_t pgprot_kernel;
56 
57 EXPORT_SYMBOL(pgprot_user);
58 EXPORT_SYMBOL(pgprot_kernel);
59 
60 struct cachepolicy {
61 	const char	policy[16];
62 	unsigned int	cr_mask;
63 	pmdval_t	pmd;
64 	pteval_t	pte;
65 };
66 
67 static struct cachepolicy cache_policies[] __initdata = {
68 	{
69 		.policy		= "uncached",
70 		.cr_mask	= CR_W|CR_C,
71 		.pmd		= PMD_SECT_UNCACHED,
72 		.pte		= L_PTE_MT_UNCACHED,
73 	}, {
74 		.policy		= "buffered",
75 		.cr_mask	= CR_C,
76 		.pmd		= PMD_SECT_BUFFERED,
77 		.pte		= L_PTE_MT_BUFFERABLE,
78 	}, {
79 		.policy		= "writethrough",
80 		.cr_mask	= 0,
81 		.pmd		= PMD_SECT_WT,
82 		.pte		= L_PTE_MT_WRITETHROUGH,
83 	}, {
84 		.policy		= "writeback",
85 		.cr_mask	= 0,
86 		.pmd		= PMD_SECT_WB,
87 		.pte		= L_PTE_MT_WRITEBACK,
88 	}, {
89 		.policy		= "writealloc",
90 		.cr_mask	= 0,
91 		.pmd		= PMD_SECT_WBWA,
92 		.pte		= L_PTE_MT_WRITEALLOC,
93 	}
94 };
95 
96 /*
97  * These are useful for identifying cache coherency
98  * problems by allowing the cache or the cache and
99  * writebuffer to be turned off.  (Note: the write
100  * buffer should not be on and the cache off).
101  */
102 static int __init early_cachepolicy(char *p)
103 {
104 	int i;
105 
106 	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
107 		int len = strlen(cache_policies[i].policy);
108 
109 		if (memcmp(p, cache_policies[i].policy, len) == 0) {
110 			cachepolicy = i;
111 			cr_alignment &= ~cache_policies[i].cr_mask;
112 			cr_no_alignment &= ~cache_policies[i].cr_mask;
113 			break;
114 		}
115 	}
116 	if (i == ARRAY_SIZE(cache_policies))
117 		printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
118 	/*
119 	 * This restriction is partly to do with the way we boot; it is
120 	 * unpredictable to have memory mapped using two different sets of
121 	 * memory attributes (shared, type, and cache attribs).  We can not
122 	 * change these attributes once the initial assembly has setup the
123 	 * page tables.
124 	 */
125 	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
126 		printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
127 		cachepolicy = CPOLICY_WRITEBACK;
128 	}
129 	flush_cache_all();
130 	set_cr(cr_alignment);
131 	return 0;
132 }
133 early_param("cachepolicy", early_cachepolicy);
134 
135 static int __init early_nocache(char *__unused)
136 {
137 	char *p = "buffered";
138 	printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
139 	early_cachepolicy(p);
140 	return 0;
141 }
142 early_param("nocache", early_nocache);
143 
144 static int __init early_nowrite(char *__unused)
145 {
146 	char *p = "uncached";
147 	printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
148 	early_cachepolicy(p);
149 	return 0;
150 }
151 early_param("nowb", early_nowrite);
152 
153 static int __init early_ecc(char *p)
154 {
155 	if (memcmp(p, "on", 2) == 0)
156 		ecc_mask = PMD_PROTECTION;
157 	else if (memcmp(p, "off", 3) == 0)
158 		ecc_mask = 0;
159 	return 0;
160 }
161 early_param("ecc", early_ecc);
162 
163 static int __init noalign_setup(char *__unused)
164 {
165 	cr_alignment &= ~CR_A;
166 	cr_no_alignment &= ~CR_A;
167 	set_cr(cr_alignment);
168 	return 1;
169 }
170 __setup("noalign", noalign_setup);
171 
172 #ifndef CONFIG_SMP
173 void adjust_cr(unsigned long mask, unsigned long set)
174 {
175 	unsigned long flags;
176 
177 	mask &= ~CR_A;
178 
179 	set &= mask;
180 
181 	local_irq_save(flags);
182 
183 	cr_no_alignment = (cr_no_alignment & ~mask) | set;
184 	cr_alignment = (cr_alignment & ~mask) | set;
185 
186 	set_cr((get_cr() & ~mask) | set);
187 
188 	local_irq_restore(flags);
189 }
190 #endif
191 
192 #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
193 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
194 
195 static struct mem_type mem_types[] = {
196 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
197 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
198 				  L_PTE_SHARED,
199 		.prot_l1	= PMD_TYPE_TABLE,
200 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_S,
201 		.domain		= DOMAIN_IO,
202 	},
203 	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
204 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
205 		.prot_l1	= PMD_TYPE_TABLE,
206 		.prot_sect	= PROT_SECT_DEVICE,
207 		.domain		= DOMAIN_IO,
208 	},
209 	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
210 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
211 		.prot_l1	= PMD_TYPE_TABLE,
212 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
213 		.domain		= DOMAIN_IO,
214 	},
215 	[MT_DEVICE_WC] = {	/* ioremap_wc */
216 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
217 		.prot_l1	= PMD_TYPE_TABLE,
218 		.prot_sect	= PROT_SECT_DEVICE,
219 		.domain		= DOMAIN_IO,
220 	},
221 	[MT_UNCACHED] = {
222 		.prot_pte	= PROT_PTE_DEVICE,
223 		.prot_l1	= PMD_TYPE_TABLE,
224 		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
225 		.domain		= DOMAIN_IO,
226 	},
227 	[MT_CACHECLEAN] = {
228 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
229 		.domain    = DOMAIN_KERNEL,
230 	},
231 	[MT_MINICLEAN] = {
232 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
233 		.domain    = DOMAIN_KERNEL,
234 	},
235 	[MT_LOW_VECTORS] = {
236 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
237 				L_PTE_RDONLY,
238 		.prot_l1   = PMD_TYPE_TABLE,
239 		.domain    = DOMAIN_USER,
240 	},
241 	[MT_HIGH_VECTORS] = {
242 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
243 				L_PTE_USER | L_PTE_RDONLY,
244 		.prot_l1   = PMD_TYPE_TABLE,
245 		.domain    = DOMAIN_USER,
246 	},
247 	[MT_MEMORY] = {
248 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
249 		.prot_l1   = PMD_TYPE_TABLE,
250 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
251 		.domain    = DOMAIN_KERNEL,
252 	},
253 	[MT_ROM] = {
254 		.prot_sect = PMD_TYPE_SECT,
255 		.domain    = DOMAIN_KERNEL,
256 	},
257 	[MT_MEMORY_NONCACHED] = {
258 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
259 				L_PTE_MT_BUFFERABLE,
260 		.prot_l1   = PMD_TYPE_TABLE,
261 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
262 		.domain    = DOMAIN_KERNEL,
263 	},
264 	[MT_MEMORY_DTCM] = {
265 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
266 				L_PTE_XN,
267 		.prot_l1   = PMD_TYPE_TABLE,
268 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
269 		.domain    = DOMAIN_KERNEL,
270 	},
271 	[MT_MEMORY_ITCM] = {
272 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
273 		.prot_l1   = PMD_TYPE_TABLE,
274 		.domain    = DOMAIN_KERNEL,
275 	},
276 	[MT_MEMORY_SO] = {
277 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
278 				L_PTE_MT_UNCACHED,
279 		.prot_l1   = PMD_TYPE_TABLE,
280 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
281 				PMD_SECT_UNCACHED | PMD_SECT_XN,
282 		.domain    = DOMAIN_KERNEL,
283 	},
284 };
285 
286 const struct mem_type *get_mem_type(unsigned int type)
287 {
288 	return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
289 }
290 EXPORT_SYMBOL(get_mem_type);
291 
292 /*
293  * Adjust the PMD section entries according to the CPU in use.
294  */
295 static void __init build_mem_type_table(void)
296 {
297 	struct cachepolicy *cp;
298 	unsigned int cr = get_cr();
299 	pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
300 	int cpu_arch = cpu_architecture();
301 	int i;
302 
303 	if (cpu_arch < CPU_ARCH_ARMv6) {
304 #if defined(CONFIG_CPU_DCACHE_DISABLE)
305 		if (cachepolicy > CPOLICY_BUFFERED)
306 			cachepolicy = CPOLICY_BUFFERED;
307 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
308 		if (cachepolicy > CPOLICY_WRITETHROUGH)
309 			cachepolicy = CPOLICY_WRITETHROUGH;
310 #endif
311 	}
312 	if (cpu_arch < CPU_ARCH_ARMv5) {
313 		if (cachepolicy >= CPOLICY_WRITEALLOC)
314 			cachepolicy = CPOLICY_WRITEBACK;
315 		ecc_mask = 0;
316 	}
317 	if (is_smp())
318 		cachepolicy = CPOLICY_WRITEALLOC;
319 
320 	/*
321 	 * Strip out features not present on earlier architectures.
322 	 * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
323 	 * without extended page tables don't have the 'Shared' bit.
324 	 */
325 	if (cpu_arch < CPU_ARCH_ARMv5)
326 		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
327 			mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
328 	if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
329 		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
330 			mem_types[i].prot_sect &= ~PMD_SECT_S;
331 
332 	/*
333 	 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
334 	 * "update-able on write" bit on ARM610).  However, Xscale and
335 	 * Xscale3 require this bit to be cleared.
336 	 */
337 	if (cpu_is_xscale() || cpu_is_xsc3()) {
338 		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
339 			mem_types[i].prot_sect &= ~PMD_BIT4;
340 			mem_types[i].prot_l1 &= ~PMD_BIT4;
341 		}
342 	} else if (cpu_arch < CPU_ARCH_ARMv6) {
343 		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
344 			if (mem_types[i].prot_l1)
345 				mem_types[i].prot_l1 |= PMD_BIT4;
346 			if (mem_types[i].prot_sect)
347 				mem_types[i].prot_sect |= PMD_BIT4;
348 		}
349 	}
350 
351 	/*
352 	 * Mark the device areas according to the CPU/architecture.
353 	 */
354 	if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
355 		if (!cpu_is_xsc3()) {
356 			/*
357 			 * Mark device regions on ARMv6+ as execute-never
358 			 * to prevent speculative instruction fetches.
359 			 */
360 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
361 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
362 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
363 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
364 		}
365 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
366 			/*
367 			 * For ARMv7 with TEX remapping,
368 			 * - shared device is SXCB=1100
369 			 * - nonshared device is SXCB=0100
370 			 * - write combine device mem is SXCB=0001
371 			 * (Uncached Normal memory)
372 			 */
373 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
374 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
375 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
376 		} else if (cpu_is_xsc3()) {
377 			/*
378 			 * For Xscale3,
379 			 * - shared device is TEXCB=00101
380 			 * - nonshared device is TEXCB=01000
381 			 * - write combine device mem is TEXCB=00100
382 			 * (Inner/Outer Uncacheable in xsc3 parlance)
383 			 */
384 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
385 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
386 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
387 		} else {
388 			/*
389 			 * For ARMv6 and ARMv7 without TEX remapping,
390 			 * - shared device is TEXCB=00001
391 			 * - nonshared device is TEXCB=01000
392 			 * - write combine device mem is TEXCB=00100
393 			 * (Uncached Normal in ARMv6 parlance).
394 			 */
395 			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
396 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
397 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
398 		}
399 	} else {
400 		/*
401 		 * On others, write combining is "Uncached/Buffered"
402 		 */
403 		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
404 	}
405 
406 	/*
407 	 * Now deal with the memory-type mappings
408 	 */
409 	cp = &cache_policies[cachepolicy];
410 	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
411 
412 	/*
413 	 * Only use write-through for non-SMP systems
414 	 */
415 	if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
416 		vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
417 
418 	/*
419 	 * Enable CPU-specific coherency if supported.
420 	 * (Only available on XSC3 at the moment.)
421 	 */
422 	if (arch_is_coherent() && cpu_is_xsc3()) {
423 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
424 		mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
425 		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
426 		mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
427 	}
428 	/*
429 	 * ARMv6 and above have extended page tables.
430 	 */
431 	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
432 		/*
433 		 * Mark cache clean areas and XIP ROM read only
434 		 * from SVC mode and no access from userspace.
435 		 */
436 		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
437 		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
438 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
439 
440 		if (is_smp()) {
441 			/*
442 			 * Mark memory with the "shared" attribute
443 			 * for SMP systems
444 			 */
445 			user_pgprot |= L_PTE_SHARED;
446 			kern_pgprot |= L_PTE_SHARED;
447 			vecs_pgprot |= L_PTE_SHARED;
448 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
449 			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
450 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
451 			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
452 			mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
453 			mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
454 			mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
455 			mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
456 		}
457 	}
458 
459 	/*
460 	 * Non-cacheable Normal - intended for memory areas that must
461 	 * not cause dirty cache line writebacks when used
462 	 */
463 	if (cpu_arch >= CPU_ARCH_ARMv6) {
464 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
465 			/* Non-cacheable Normal is XCB = 001 */
466 			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
467 				PMD_SECT_BUFFERED;
468 		} else {
469 			/* For both ARMv6 and non-TEX-remapping ARMv7 */
470 			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
471 				PMD_SECT_TEX(1);
472 		}
473 	} else {
474 		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
475 	}
476 
477 	for (i = 0; i < 16; i++) {
478 		unsigned long v = pgprot_val(protection_map[i]);
479 		protection_map[i] = __pgprot(v | user_pgprot);
480 	}
481 
482 	mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
483 	mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
484 
485 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
486 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
487 				 L_PTE_DIRTY | kern_pgprot);
488 
489 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
490 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
491 	mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
492 	mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
493 	mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
494 	mem_types[MT_ROM].prot_sect |= cp->pmd;
495 
496 	switch (cp->pmd) {
497 	case PMD_SECT_WT:
498 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
499 		break;
500 	case PMD_SECT_WB:
501 	case PMD_SECT_WBWA:
502 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
503 		break;
504 	}
505 	printk("Memory policy: ECC %sabled, Data cache %s\n",
506 		ecc_mask ? "en" : "dis", cp->policy);
507 
508 	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
509 		struct mem_type *t = &mem_types[i];
510 		if (t->prot_l1)
511 			t->prot_l1 |= PMD_DOMAIN(t->domain);
512 		if (t->prot_sect)
513 			t->prot_sect |= PMD_DOMAIN(t->domain);
514 	}
515 }
516 
517 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
518 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
519 			      unsigned long size, pgprot_t vma_prot)
520 {
521 	if (!pfn_valid(pfn))
522 		return pgprot_noncached(vma_prot);
523 	else if (file->f_flags & O_SYNC)
524 		return pgprot_writecombine(vma_prot);
525 	return vma_prot;
526 }
527 EXPORT_SYMBOL(phys_mem_access_prot);
528 #endif
529 
530 #define vectors_base()	(vectors_high() ? 0xffff0000 : 0)
531 
532 static void __init *early_alloc(unsigned long sz)
533 {
534 	void *ptr = __va(memblock_alloc(sz, sz));
535 	memset(ptr, 0, sz);
536 	return ptr;
537 }
538 
539 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
540 {
541 	if (pmd_none(*pmd)) {
542 		pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
543 		__pmd_populate(pmd, __pa(pte), prot);
544 	}
545 	BUG_ON(pmd_bad(*pmd));
546 	return pte_offset_kernel(pmd, addr);
547 }
548 
549 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
550 				  unsigned long end, unsigned long pfn,
551 				  const struct mem_type *type)
552 {
553 	pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
554 	do {
555 		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
556 		pfn++;
557 	} while (pte++, addr += PAGE_SIZE, addr != end);
558 }
559 
560 static void __init alloc_init_section(pud_t *pud, unsigned long addr,
561 				      unsigned long end, phys_addr_t phys,
562 				      const struct mem_type *type)
563 {
564 	pmd_t *pmd = pmd_offset(pud, addr);
565 
566 	/*
567 	 * Try a section mapping - end, addr and phys must all be aligned
568 	 * to a section boundary.  Note that PMDs refer to the individual
569 	 * L1 entries, whereas PGDs refer to a group of L1 entries making
570 	 * up one logical pointer to an L2 table.
571 	 */
572 	if (((addr | end | phys) & ~SECTION_MASK) == 0) {
573 		pmd_t *p = pmd;
574 
575 		if (addr & SECTION_SIZE)
576 			pmd++;
577 
578 		do {
579 			*pmd = __pmd(phys | type->prot_sect);
580 			phys += SECTION_SIZE;
581 		} while (pmd++, addr += SECTION_SIZE, addr != end);
582 
583 		flush_pmd_entry(p);
584 	} else {
585 		/*
586 		 * No need to loop; pte's aren't interested in the
587 		 * individual L1 entries.
588 		 */
589 		alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
590 	}
591 }
592 
593 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
594 	unsigned long phys, const struct mem_type *type)
595 {
596 	pud_t *pud = pud_offset(pgd, addr);
597 	unsigned long next;
598 
599 	do {
600 		next = pud_addr_end(addr, end);
601 		alloc_init_section(pud, addr, next, phys, type);
602 		phys += next - addr;
603 	} while (pud++, addr = next, addr != end);
604 }
605 
606 static void __init create_36bit_mapping(struct map_desc *md,
607 					const struct mem_type *type)
608 {
609 	unsigned long addr, length, end;
610 	phys_addr_t phys;
611 	pgd_t *pgd;
612 
613 	addr = md->virtual;
614 	phys = __pfn_to_phys(md->pfn);
615 	length = PAGE_ALIGN(md->length);
616 
617 	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
618 		printk(KERN_ERR "MM: CPU does not support supersection "
619 		       "mapping for 0x%08llx at 0x%08lx\n",
620 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
621 		return;
622 	}
623 
624 	/* N.B.	ARMv6 supersections are only defined to work with domain 0.
625 	 *	Since domain assignments can in fact be arbitrary, the
626 	 *	'domain == 0' check below is required to insure that ARMv6
627 	 *	supersections are only allocated for domain 0 regardless
628 	 *	of the actual domain assignments in use.
629 	 */
630 	if (type->domain) {
631 		printk(KERN_ERR "MM: invalid domain in supersection "
632 		       "mapping for 0x%08llx at 0x%08lx\n",
633 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
634 		return;
635 	}
636 
637 	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
638 		printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
639 		       " at 0x%08lx invalid alignment\n",
640 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
641 		return;
642 	}
643 
644 	/*
645 	 * Shift bits [35:32] of address into bits [23:20] of PMD
646 	 * (See ARMv6 spec).
647 	 */
648 	phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
649 
650 	pgd = pgd_offset_k(addr);
651 	end = addr + length;
652 	do {
653 		pud_t *pud = pud_offset(pgd, addr);
654 		pmd_t *pmd = pmd_offset(pud, addr);
655 		int i;
656 
657 		for (i = 0; i < 16; i++)
658 			*pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
659 
660 		addr += SUPERSECTION_SIZE;
661 		phys += SUPERSECTION_SIZE;
662 		pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
663 	} while (addr != end);
664 }
665 
666 /*
667  * Create the page directory entries and any necessary
668  * page tables for the mapping specified by `md'.  We
669  * are able to cope here with varying sizes and address
670  * offsets, and we take full advantage of sections and
671  * supersections.
672  */
673 static void __init create_mapping(struct map_desc *md)
674 {
675 	unsigned long addr, length, end;
676 	phys_addr_t phys;
677 	const struct mem_type *type;
678 	pgd_t *pgd;
679 
680 	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
681 		printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
682 		       " at 0x%08lx in user region\n",
683 		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
684 		return;
685 	}
686 
687 	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
688 	    md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
689 		printk(KERN_WARNING "BUG: mapping for 0x%08llx"
690 		       " at 0x%08lx overlaps vmalloc space\n",
691 		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
692 	}
693 
694 	type = &mem_types[md->type];
695 
696 	/*
697 	 * Catch 36-bit addresses
698 	 */
699 	if (md->pfn >= 0x100000) {
700 		create_36bit_mapping(md, type);
701 		return;
702 	}
703 
704 	addr = md->virtual & PAGE_MASK;
705 	phys = __pfn_to_phys(md->pfn);
706 	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
707 
708 	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
709 		printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
710 		       "be mapped using pages, ignoring.\n",
711 		       (long long)__pfn_to_phys(md->pfn), addr);
712 		return;
713 	}
714 
715 	pgd = pgd_offset_k(addr);
716 	end = addr + length;
717 	do {
718 		unsigned long next = pgd_addr_end(addr, end);
719 
720 		alloc_init_pud(pgd, addr, next, phys, type);
721 
722 		phys += next - addr;
723 		addr = next;
724 	} while (pgd++, addr != end);
725 }
726 
727 /*
728  * Create the architecture specific mappings
729  */
730 void __init iotable_init(struct map_desc *io_desc, int nr)
731 {
732 	int i;
733 
734 	for (i = 0; i < nr; i++)
735 		create_mapping(io_desc + i);
736 }
737 
738 static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
739 
740 /*
741  * vmalloc=size forces the vmalloc area to be exactly 'size'
742  * bytes. This can be used to increase (or decrease) the vmalloc
743  * area - the default is 128m.
744  */
745 static int __init early_vmalloc(char *arg)
746 {
747 	unsigned long vmalloc_reserve = memparse(arg, NULL);
748 
749 	if (vmalloc_reserve < SZ_16M) {
750 		vmalloc_reserve = SZ_16M;
751 		printk(KERN_WARNING
752 			"vmalloc area too small, limiting to %luMB\n",
753 			vmalloc_reserve >> 20);
754 	}
755 
756 	if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
757 		vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
758 		printk(KERN_WARNING
759 			"vmalloc area is too big, limiting to %luMB\n",
760 			vmalloc_reserve >> 20);
761 	}
762 
763 	vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
764 	return 0;
765 }
766 early_param("vmalloc", early_vmalloc);
767 
768 static phys_addr_t lowmem_limit __initdata = 0;
769 
770 void __init sanity_check_meminfo(void)
771 {
772 	int i, j, highmem = 0;
773 
774 	for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
775 		struct membank *bank = &meminfo.bank[j];
776 		*bank = meminfo.bank[i];
777 
778 #ifdef CONFIG_HIGHMEM
779 		if (__va(bank->start) >= vmalloc_min ||
780 		    __va(bank->start) < (void *)PAGE_OFFSET)
781 			highmem = 1;
782 
783 		bank->highmem = highmem;
784 
785 		/*
786 		 * Split those memory banks which are partially overlapping
787 		 * the vmalloc area greatly simplifying things later.
788 		 */
789 		if (__va(bank->start) < vmalloc_min &&
790 		    bank->size > vmalloc_min - __va(bank->start)) {
791 			if (meminfo.nr_banks >= NR_BANKS) {
792 				printk(KERN_CRIT "NR_BANKS too low, "
793 						 "ignoring high memory\n");
794 			} else {
795 				memmove(bank + 1, bank,
796 					(meminfo.nr_banks - i) * sizeof(*bank));
797 				meminfo.nr_banks++;
798 				i++;
799 				bank[1].size -= vmalloc_min - __va(bank->start);
800 				bank[1].start = __pa(vmalloc_min - 1) + 1;
801 				bank[1].highmem = highmem = 1;
802 				j++;
803 			}
804 			bank->size = vmalloc_min - __va(bank->start);
805 		}
806 #else
807 		bank->highmem = highmem;
808 
809 		/*
810 		 * Check whether this memory bank would entirely overlap
811 		 * the vmalloc area.
812 		 */
813 		if (__va(bank->start) >= vmalloc_min ||
814 		    __va(bank->start) < (void *)PAGE_OFFSET) {
815 			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
816 			       "(vmalloc region overlap).\n",
817 			       (unsigned long long)bank->start,
818 			       (unsigned long long)bank->start + bank->size - 1);
819 			continue;
820 		}
821 
822 		/*
823 		 * Check whether this memory bank would partially overlap
824 		 * the vmalloc area.
825 		 */
826 		if (__va(bank->start + bank->size) > vmalloc_min ||
827 		    __va(bank->start + bank->size) < __va(bank->start)) {
828 			unsigned long newsize = vmalloc_min - __va(bank->start);
829 			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
830 			       "to -%.8llx (vmalloc region overlap).\n",
831 			       (unsigned long long)bank->start,
832 			       (unsigned long long)bank->start + bank->size - 1,
833 			       (unsigned long long)bank->start + newsize - 1);
834 			bank->size = newsize;
835 		}
836 #endif
837 		if (!bank->highmem && bank->start + bank->size > lowmem_limit)
838 			lowmem_limit = bank->start + bank->size;
839 
840 		j++;
841 	}
842 #ifdef CONFIG_HIGHMEM
843 	if (highmem) {
844 		const char *reason = NULL;
845 
846 		if (cache_is_vipt_aliasing()) {
847 			/*
848 			 * Interactions between kmap and other mappings
849 			 * make highmem support with aliasing VIPT caches
850 			 * rather difficult.
851 			 */
852 			reason = "with VIPT aliasing cache";
853 		}
854 		if (reason) {
855 			printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
856 				reason);
857 			while (j > 0 && meminfo.bank[j - 1].highmem)
858 				j--;
859 		}
860 	}
861 #endif
862 	meminfo.nr_banks = j;
863 	memblock_set_current_limit(lowmem_limit);
864 }
865 
866 static inline void prepare_page_table(void)
867 {
868 	unsigned long addr;
869 	phys_addr_t end;
870 
871 	/*
872 	 * Clear out all the mappings below the kernel image.
873 	 */
874 	for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
875 		pmd_clear(pmd_off_k(addr));
876 
877 #ifdef CONFIG_XIP_KERNEL
878 	/* The XIP kernel is mapped in the module area -- skip over it */
879 	addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
880 #endif
881 	for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
882 		pmd_clear(pmd_off_k(addr));
883 
884 	/*
885 	 * Find the end of the first block of lowmem.
886 	 */
887 	end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
888 	if (end >= lowmem_limit)
889 		end = lowmem_limit;
890 
891 	/*
892 	 * Clear out all the kernel space mappings, except for the first
893 	 * memory bank, up to the end of the vmalloc region.
894 	 */
895 	for (addr = __phys_to_virt(end);
896 	     addr < VMALLOC_END; addr += PMD_SIZE)
897 		pmd_clear(pmd_off_k(addr));
898 }
899 
900 #define SWAPPER_PG_DIR_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
901 
902 /*
903  * Reserve the special regions of memory
904  */
905 void __init arm_mm_memblock_reserve(void)
906 {
907 	/*
908 	 * Reserve the page tables.  These are already in use,
909 	 * and can only be in node 0.
910 	 */
911 	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
912 
913 #ifdef CONFIG_SA1111
914 	/*
915 	 * Because of the SA1111 DMA bug, we want to preserve our
916 	 * precious DMA-able memory...
917 	 */
918 	memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
919 #endif
920 }
921 
922 /*
923  * Set up device the mappings.  Since we clear out the page tables for all
924  * mappings above VMALLOC_END, we will remove any debug device mappings.
925  * This means you have to be careful how you debug this function, or any
926  * called function.  This means you can't use any function or debugging
927  * method which may touch any device, otherwise the kernel _will_ crash.
928  */
929 static void __init devicemaps_init(struct machine_desc *mdesc)
930 {
931 	struct map_desc map;
932 	unsigned long addr;
933 
934 	/*
935 	 * Allocate the vector page early.
936 	 */
937 	vectors_page = early_alloc(PAGE_SIZE);
938 
939 	for (addr = VMALLOC_END; addr; addr += PMD_SIZE)
940 		pmd_clear(pmd_off_k(addr));
941 
942 	/*
943 	 * Map the kernel if it is XIP.
944 	 * It is always first in the modulearea.
945 	 */
946 #ifdef CONFIG_XIP_KERNEL
947 	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
948 	map.virtual = MODULES_VADDR;
949 	map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
950 	map.type = MT_ROM;
951 	create_mapping(&map);
952 #endif
953 
954 	/*
955 	 * Map the cache flushing regions.
956 	 */
957 #ifdef FLUSH_BASE
958 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
959 	map.virtual = FLUSH_BASE;
960 	map.length = SZ_1M;
961 	map.type = MT_CACHECLEAN;
962 	create_mapping(&map);
963 #endif
964 #ifdef FLUSH_BASE_MINICACHE
965 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
966 	map.virtual = FLUSH_BASE_MINICACHE;
967 	map.length = SZ_1M;
968 	map.type = MT_MINICLEAN;
969 	create_mapping(&map);
970 #endif
971 
972 	/*
973 	 * Create a mapping for the machine vectors at the high-vectors
974 	 * location (0xffff0000).  If we aren't using high-vectors, also
975 	 * create a mapping at the low-vectors virtual address.
976 	 */
977 	map.pfn = __phys_to_pfn(virt_to_phys(vectors_page));
978 	map.virtual = 0xffff0000;
979 	map.length = PAGE_SIZE;
980 	map.type = MT_HIGH_VECTORS;
981 	create_mapping(&map);
982 
983 	if (!vectors_high()) {
984 		map.virtual = 0;
985 		map.type = MT_LOW_VECTORS;
986 		create_mapping(&map);
987 	}
988 
989 	/*
990 	 * Ask the machine support to map in the statically mapped devices.
991 	 */
992 	if (mdesc->map_io)
993 		mdesc->map_io();
994 
995 	/*
996 	 * Finally flush the caches and tlb to ensure that we're in a
997 	 * consistent state wrt the writebuffer.  This also ensures that
998 	 * any write-allocated cache lines in the vector page are written
999 	 * back.  After this point, we can start to touch devices again.
1000 	 */
1001 	local_flush_tlb_all();
1002 	flush_cache_all();
1003 }
1004 
1005 static void __init kmap_init(void)
1006 {
1007 #ifdef CONFIG_HIGHMEM
1008 	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
1009 		PKMAP_BASE, _PAGE_KERNEL_TABLE);
1010 #endif
1011 }
1012 
1013 static void __init map_lowmem(void)
1014 {
1015 	struct memblock_region *reg;
1016 
1017 	/* Map all the lowmem memory banks. */
1018 	for_each_memblock(memory, reg) {
1019 		phys_addr_t start = reg->base;
1020 		phys_addr_t end = start + reg->size;
1021 		struct map_desc map;
1022 
1023 		if (end > lowmem_limit)
1024 			end = lowmem_limit;
1025 		if (start >= end)
1026 			break;
1027 
1028 		map.pfn = __phys_to_pfn(start);
1029 		map.virtual = __phys_to_virt(start);
1030 		map.length = end - start;
1031 		map.type = MT_MEMORY;
1032 
1033 		create_mapping(&map);
1034 	}
1035 }
1036 
1037 /*
1038  * paging_init() sets up the page tables, initialises the zone memory
1039  * maps, and sets up the zero page, bad page and bad page tables.
1040  */
1041 void __init paging_init(struct machine_desc *mdesc)
1042 {
1043 	void *zero_page;
1044 
1045 	memblock_set_current_limit(lowmem_limit);
1046 
1047 	build_mem_type_table();
1048 	prepare_page_table();
1049 	map_lowmem();
1050 	devicemaps_init(mdesc);
1051 	kmap_init();
1052 
1053 	top_pmd = pmd_off_k(0xffff0000);
1054 
1055 	/* allocate the zero page. */
1056 	zero_page = early_alloc(PAGE_SIZE);
1057 
1058 	bootmem_init();
1059 
1060 	empty_zero_page = virt_to_page(zero_page);
1061 	__flush_dcache_page(NULL, empty_zero_page);
1062 }
1063