1 /* 2 * linux/arch/arm/mm/mmu.c 3 * 4 * Copyright (C) 1995-2005 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 #include <linux/module.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/mman.h> 15 #include <linux/nodemask.h> 16 #include <linux/memblock.h> 17 #include <linux/fs.h> 18 19 #include <asm/cputype.h> 20 #include <asm/sections.h> 21 #include <asm/cachetype.h> 22 #include <asm/setup.h> 23 #include <asm/sizes.h> 24 #include <asm/smp_plat.h> 25 #include <asm/tlb.h> 26 #include <asm/highmem.h> 27 #include <asm/traps.h> 28 29 #include <asm/mach/arch.h> 30 #include <asm/mach/map.h> 31 32 #include "mm.h" 33 34 /* 35 * empty_zero_page is a special page that is used for 36 * zero-initialized data and COW. 37 */ 38 struct page *empty_zero_page; 39 EXPORT_SYMBOL(empty_zero_page); 40 41 /* 42 * The pmd table for the upper-most set of pages. 43 */ 44 pmd_t *top_pmd; 45 46 #define CPOLICY_UNCACHED 0 47 #define CPOLICY_BUFFERED 1 48 #define CPOLICY_WRITETHROUGH 2 49 #define CPOLICY_WRITEBACK 3 50 #define CPOLICY_WRITEALLOC 4 51 52 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; 53 static unsigned int ecc_mask __initdata = 0; 54 pgprot_t pgprot_user; 55 pgprot_t pgprot_kernel; 56 57 EXPORT_SYMBOL(pgprot_user); 58 EXPORT_SYMBOL(pgprot_kernel); 59 60 struct cachepolicy { 61 const char policy[16]; 62 unsigned int cr_mask; 63 pmdval_t pmd; 64 pteval_t pte; 65 }; 66 67 static struct cachepolicy cache_policies[] __initdata = { 68 { 69 .policy = "uncached", 70 .cr_mask = CR_W|CR_C, 71 .pmd = PMD_SECT_UNCACHED, 72 .pte = L_PTE_MT_UNCACHED, 73 }, { 74 .policy = "buffered", 75 .cr_mask = CR_C, 76 .pmd = PMD_SECT_BUFFERED, 77 .pte = L_PTE_MT_BUFFERABLE, 78 }, { 79 .policy = "writethrough", 80 .cr_mask = 0, 81 .pmd = PMD_SECT_WT, 82 .pte = L_PTE_MT_WRITETHROUGH, 83 }, { 84 .policy = "writeback", 85 .cr_mask = 0, 86 .pmd = PMD_SECT_WB, 87 .pte = L_PTE_MT_WRITEBACK, 88 }, { 89 .policy = "writealloc", 90 .cr_mask = 0, 91 .pmd = PMD_SECT_WBWA, 92 .pte = L_PTE_MT_WRITEALLOC, 93 } 94 }; 95 96 /* 97 * These are useful for identifying cache coherency 98 * problems by allowing the cache or the cache and 99 * writebuffer to be turned off. (Note: the write 100 * buffer should not be on and the cache off). 101 */ 102 static int __init early_cachepolicy(char *p) 103 { 104 int i; 105 106 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { 107 int len = strlen(cache_policies[i].policy); 108 109 if (memcmp(p, cache_policies[i].policy, len) == 0) { 110 cachepolicy = i; 111 cr_alignment &= ~cache_policies[i].cr_mask; 112 cr_no_alignment &= ~cache_policies[i].cr_mask; 113 break; 114 } 115 } 116 if (i == ARRAY_SIZE(cache_policies)) 117 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); 118 /* 119 * This restriction is partly to do with the way we boot; it is 120 * unpredictable to have memory mapped using two different sets of 121 * memory attributes (shared, type, and cache attribs). We can not 122 * change these attributes once the initial assembly has setup the 123 * page tables. 124 */ 125 if (cpu_architecture() >= CPU_ARCH_ARMv6) { 126 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); 127 cachepolicy = CPOLICY_WRITEBACK; 128 } 129 flush_cache_all(); 130 set_cr(cr_alignment); 131 return 0; 132 } 133 early_param("cachepolicy", early_cachepolicy); 134 135 static int __init early_nocache(char *__unused) 136 { 137 char *p = "buffered"; 138 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); 139 early_cachepolicy(p); 140 return 0; 141 } 142 early_param("nocache", early_nocache); 143 144 static int __init early_nowrite(char *__unused) 145 { 146 char *p = "uncached"; 147 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); 148 early_cachepolicy(p); 149 return 0; 150 } 151 early_param("nowb", early_nowrite); 152 153 static int __init early_ecc(char *p) 154 { 155 if (memcmp(p, "on", 2) == 0) 156 ecc_mask = PMD_PROTECTION; 157 else if (memcmp(p, "off", 3) == 0) 158 ecc_mask = 0; 159 return 0; 160 } 161 early_param("ecc", early_ecc); 162 163 static int __init noalign_setup(char *__unused) 164 { 165 cr_alignment &= ~CR_A; 166 cr_no_alignment &= ~CR_A; 167 set_cr(cr_alignment); 168 return 1; 169 } 170 __setup("noalign", noalign_setup); 171 172 #ifndef CONFIG_SMP 173 void adjust_cr(unsigned long mask, unsigned long set) 174 { 175 unsigned long flags; 176 177 mask &= ~CR_A; 178 179 set &= mask; 180 181 local_irq_save(flags); 182 183 cr_no_alignment = (cr_no_alignment & ~mask) | set; 184 cr_alignment = (cr_alignment & ~mask) | set; 185 186 set_cr((get_cr() & ~mask) | set); 187 188 local_irq_restore(flags); 189 } 190 #endif 191 192 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN 193 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE 194 195 static struct mem_type mem_types[] = { 196 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ 197 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | 198 L_PTE_SHARED, 199 .prot_l1 = PMD_TYPE_TABLE, 200 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, 201 .domain = DOMAIN_IO, 202 }, 203 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ 204 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, 205 .prot_l1 = PMD_TYPE_TABLE, 206 .prot_sect = PROT_SECT_DEVICE, 207 .domain = DOMAIN_IO, 208 }, 209 [MT_DEVICE_CACHED] = { /* ioremap_cached */ 210 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, 211 .prot_l1 = PMD_TYPE_TABLE, 212 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, 213 .domain = DOMAIN_IO, 214 }, 215 [MT_DEVICE_WC] = { /* ioremap_wc */ 216 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, 217 .prot_l1 = PMD_TYPE_TABLE, 218 .prot_sect = PROT_SECT_DEVICE, 219 .domain = DOMAIN_IO, 220 }, 221 [MT_UNCACHED] = { 222 .prot_pte = PROT_PTE_DEVICE, 223 .prot_l1 = PMD_TYPE_TABLE, 224 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 225 .domain = DOMAIN_IO, 226 }, 227 [MT_CACHECLEAN] = { 228 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 229 .domain = DOMAIN_KERNEL, 230 }, 231 [MT_MINICLEAN] = { 232 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, 233 .domain = DOMAIN_KERNEL, 234 }, 235 [MT_LOW_VECTORS] = { 236 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 237 L_PTE_RDONLY, 238 .prot_l1 = PMD_TYPE_TABLE, 239 .domain = DOMAIN_USER, 240 }, 241 [MT_HIGH_VECTORS] = { 242 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 243 L_PTE_USER | L_PTE_RDONLY, 244 .prot_l1 = PMD_TYPE_TABLE, 245 .domain = DOMAIN_USER, 246 }, 247 [MT_MEMORY] = { 248 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, 249 .prot_l1 = PMD_TYPE_TABLE, 250 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 251 .domain = DOMAIN_KERNEL, 252 }, 253 [MT_ROM] = { 254 .prot_sect = PMD_TYPE_SECT, 255 .domain = DOMAIN_KERNEL, 256 }, 257 [MT_MEMORY_NONCACHED] = { 258 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 259 L_PTE_MT_BUFFERABLE, 260 .prot_l1 = PMD_TYPE_TABLE, 261 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, 262 .domain = DOMAIN_KERNEL, 263 }, 264 [MT_MEMORY_DTCM] = { 265 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 266 L_PTE_XN, 267 .prot_l1 = PMD_TYPE_TABLE, 268 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, 269 .domain = DOMAIN_KERNEL, 270 }, 271 [MT_MEMORY_ITCM] = { 272 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, 273 .prot_l1 = PMD_TYPE_TABLE, 274 .domain = DOMAIN_KERNEL, 275 }, 276 [MT_MEMORY_SO] = { 277 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | 278 L_PTE_MT_UNCACHED, 279 .prot_l1 = PMD_TYPE_TABLE, 280 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | 281 PMD_SECT_UNCACHED | PMD_SECT_XN, 282 .domain = DOMAIN_KERNEL, 283 }, 284 }; 285 286 const struct mem_type *get_mem_type(unsigned int type) 287 { 288 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; 289 } 290 EXPORT_SYMBOL(get_mem_type); 291 292 /* 293 * Adjust the PMD section entries according to the CPU in use. 294 */ 295 static void __init build_mem_type_table(void) 296 { 297 struct cachepolicy *cp; 298 unsigned int cr = get_cr(); 299 pteval_t user_pgprot, kern_pgprot, vecs_pgprot; 300 int cpu_arch = cpu_architecture(); 301 int i; 302 303 if (cpu_arch < CPU_ARCH_ARMv6) { 304 #if defined(CONFIG_CPU_DCACHE_DISABLE) 305 if (cachepolicy > CPOLICY_BUFFERED) 306 cachepolicy = CPOLICY_BUFFERED; 307 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) 308 if (cachepolicy > CPOLICY_WRITETHROUGH) 309 cachepolicy = CPOLICY_WRITETHROUGH; 310 #endif 311 } 312 if (cpu_arch < CPU_ARCH_ARMv5) { 313 if (cachepolicy >= CPOLICY_WRITEALLOC) 314 cachepolicy = CPOLICY_WRITEBACK; 315 ecc_mask = 0; 316 } 317 if (is_smp()) 318 cachepolicy = CPOLICY_WRITEALLOC; 319 320 /* 321 * Strip out features not present on earlier architectures. 322 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those 323 * without extended page tables don't have the 'Shared' bit. 324 */ 325 if (cpu_arch < CPU_ARCH_ARMv5) 326 for (i = 0; i < ARRAY_SIZE(mem_types); i++) 327 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); 328 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) 329 for (i = 0; i < ARRAY_SIZE(mem_types); i++) 330 mem_types[i].prot_sect &= ~PMD_SECT_S; 331 332 /* 333 * ARMv5 and lower, bit 4 must be set for page tables (was: cache 334 * "update-able on write" bit on ARM610). However, Xscale and 335 * Xscale3 require this bit to be cleared. 336 */ 337 if (cpu_is_xscale() || cpu_is_xsc3()) { 338 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 339 mem_types[i].prot_sect &= ~PMD_BIT4; 340 mem_types[i].prot_l1 &= ~PMD_BIT4; 341 } 342 } else if (cpu_arch < CPU_ARCH_ARMv6) { 343 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 344 if (mem_types[i].prot_l1) 345 mem_types[i].prot_l1 |= PMD_BIT4; 346 if (mem_types[i].prot_sect) 347 mem_types[i].prot_sect |= PMD_BIT4; 348 } 349 } 350 351 /* 352 * Mark the device areas according to the CPU/architecture. 353 */ 354 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { 355 if (!cpu_is_xsc3()) { 356 /* 357 * Mark device regions on ARMv6+ as execute-never 358 * to prevent speculative instruction fetches. 359 */ 360 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; 361 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; 362 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; 363 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; 364 } 365 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { 366 /* 367 * For ARMv7 with TEX remapping, 368 * - shared device is SXCB=1100 369 * - nonshared device is SXCB=0100 370 * - write combine device mem is SXCB=0001 371 * (Uncached Normal memory) 372 */ 373 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); 374 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); 375 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; 376 } else if (cpu_is_xsc3()) { 377 /* 378 * For Xscale3, 379 * - shared device is TEXCB=00101 380 * - nonshared device is TEXCB=01000 381 * - write combine device mem is TEXCB=00100 382 * (Inner/Outer Uncacheable in xsc3 parlance) 383 */ 384 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; 385 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); 386 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); 387 } else { 388 /* 389 * For ARMv6 and ARMv7 without TEX remapping, 390 * - shared device is TEXCB=00001 391 * - nonshared device is TEXCB=01000 392 * - write combine device mem is TEXCB=00100 393 * (Uncached Normal in ARMv6 parlance). 394 */ 395 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; 396 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); 397 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); 398 } 399 } else { 400 /* 401 * On others, write combining is "Uncached/Buffered" 402 */ 403 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; 404 } 405 406 /* 407 * Now deal with the memory-type mappings 408 */ 409 cp = &cache_policies[cachepolicy]; 410 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; 411 412 /* 413 * Only use write-through for non-SMP systems 414 */ 415 if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) 416 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; 417 418 /* 419 * Enable CPU-specific coherency if supported. 420 * (Only available on XSC3 at the moment.) 421 */ 422 if (arch_is_coherent() && cpu_is_xsc3()) { 423 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; 424 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; 425 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; 426 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; 427 } 428 /* 429 * ARMv6 and above have extended page tables. 430 */ 431 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { 432 /* 433 * Mark cache clean areas and XIP ROM read only 434 * from SVC mode and no access from userspace. 435 */ 436 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 437 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 438 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; 439 440 if (is_smp()) { 441 /* 442 * Mark memory with the "shared" attribute 443 * for SMP systems 444 */ 445 user_pgprot |= L_PTE_SHARED; 446 kern_pgprot |= L_PTE_SHARED; 447 vecs_pgprot |= L_PTE_SHARED; 448 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; 449 mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; 450 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; 451 mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; 452 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; 453 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; 454 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; 455 mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; 456 } 457 } 458 459 /* 460 * Non-cacheable Normal - intended for memory areas that must 461 * not cause dirty cache line writebacks when used 462 */ 463 if (cpu_arch >= CPU_ARCH_ARMv6) { 464 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { 465 /* Non-cacheable Normal is XCB = 001 */ 466 mem_types[MT_MEMORY_NONCACHED].prot_sect |= 467 PMD_SECT_BUFFERED; 468 } else { 469 /* For both ARMv6 and non-TEX-remapping ARMv7 */ 470 mem_types[MT_MEMORY_NONCACHED].prot_sect |= 471 PMD_SECT_TEX(1); 472 } 473 } else { 474 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; 475 } 476 477 for (i = 0; i < 16; i++) { 478 unsigned long v = pgprot_val(protection_map[i]); 479 protection_map[i] = __pgprot(v | user_pgprot); 480 } 481 482 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; 483 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; 484 485 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); 486 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | 487 L_PTE_DIRTY | kern_pgprot); 488 489 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; 490 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; 491 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; 492 mem_types[MT_MEMORY].prot_pte |= kern_pgprot; 493 mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; 494 mem_types[MT_ROM].prot_sect |= cp->pmd; 495 496 switch (cp->pmd) { 497 case PMD_SECT_WT: 498 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; 499 break; 500 case PMD_SECT_WB: 501 case PMD_SECT_WBWA: 502 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; 503 break; 504 } 505 printk("Memory policy: ECC %sabled, Data cache %s\n", 506 ecc_mask ? "en" : "dis", cp->policy); 507 508 for (i = 0; i < ARRAY_SIZE(mem_types); i++) { 509 struct mem_type *t = &mem_types[i]; 510 if (t->prot_l1) 511 t->prot_l1 |= PMD_DOMAIN(t->domain); 512 if (t->prot_sect) 513 t->prot_sect |= PMD_DOMAIN(t->domain); 514 } 515 } 516 517 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE 518 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 519 unsigned long size, pgprot_t vma_prot) 520 { 521 if (!pfn_valid(pfn)) 522 return pgprot_noncached(vma_prot); 523 else if (file->f_flags & O_SYNC) 524 return pgprot_writecombine(vma_prot); 525 return vma_prot; 526 } 527 EXPORT_SYMBOL(phys_mem_access_prot); 528 #endif 529 530 #define vectors_base() (vectors_high() ? 0xffff0000 : 0) 531 532 static void __init *early_alloc(unsigned long sz) 533 { 534 void *ptr = __va(memblock_alloc(sz, sz)); 535 memset(ptr, 0, sz); 536 return ptr; 537 } 538 539 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) 540 { 541 if (pmd_none(*pmd)) { 542 pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); 543 __pmd_populate(pmd, __pa(pte), prot); 544 } 545 BUG_ON(pmd_bad(*pmd)); 546 return pte_offset_kernel(pmd, addr); 547 } 548 549 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, 550 unsigned long end, unsigned long pfn, 551 const struct mem_type *type) 552 { 553 pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1); 554 do { 555 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); 556 pfn++; 557 } while (pte++, addr += PAGE_SIZE, addr != end); 558 } 559 560 static void __init alloc_init_section(pud_t *pud, unsigned long addr, 561 unsigned long end, phys_addr_t phys, 562 const struct mem_type *type) 563 { 564 pmd_t *pmd = pmd_offset(pud, addr); 565 566 /* 567 * Try a section mapping - end, addr and phys must all be aligned 568 * to a section boundary. Note that PMDs refer to the individual 569 * L1 entries, whereas PGDs refer to a group of L1 entries making 570 * up one logical pointer to an L2 table. 571 */ 572 if (((addr | end | phys) & ~SECTION_MASK) == 0) { 573 pmd_t *p = pmd; 574 575 if (addr & SECTION_SIZE) 576 pmd++; 577 578 do { 579 *pmd = __pmd(phys | type->prot_sect); 580 phys += SECTION_SIZE; 581 } while (pmd++, addr += SECTION_SIZE, addr != end); 582 583 flush_pmd_entry(p); 584 } else { 585 /* 586 * No need to loop; pte's aren't interested in the 587 * individual L1 entries. 588 */ 589 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); 590 } 591 } 592 593 static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, 594 unsigned long phys, const struct mem_type *type) 595 { 596 pud_t *pud = pud_offset(pgd, addr); 597 unsigned long next; 598 599 do { 600 next = pud_addr_end(addr, end); 601 alloc_init_section(pud, addr, next, phys, type); 602 phys += next - addr; 603 } while (pud++, addr = next, addr != end); 604 } 605 606 static void __init create_36bit_mapping(struct map_desc *md, 607 const struct mem_type *type) 608 { 609 unsigned long addr, length, end; 610 phys_addr_t phys; 611 pgd_t *pgd; 612 613 addr = md->virtual; 614 phys = __pfn_to_phys(md->pfn); 615 length = PAGE_ALIGN(md->length); 616 617 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { 618 printk(KERN_ERR "MM: CPU does not support supersection " 619 "mapping for 0x%08llx at 0x%08lx\n", 620 (long long)__pfn_to_phys((u64)md->pfn), addr); 621 return; 622 } 623 624 /* N.B. ARMv6 supersections are only defined to work with domain 0. 625 * Since domain assignments can in fact be arbitrary, the 626 * 'domain == 0' check below is required to insure that ARMv6 627 * supersections are only allocated for domain 0 regardless 628 * of the actual domain assignments in use. 629 */ 630 if (type->domain) { 631 printk(KERN_ERR "MM: invalid domain in supersection " 632 "mapping for 0x%08llx at 0x%08lx\n", 633 (long long)__pfn_to_phys((u64)md->pfn), addr); 634 return; 635 } 636 637 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { 638 printk(KERN_ERR "MM: cannot create mapping for 0x%08llx" 639 " at 0x%08lx invalid alignment\n", 640 (long long)__pfn_to_phys((u64)md->pfn), addr); 641 return; 642 } 643 644 /* 645 * Shift bits [35:32] of address into bits [23:20] of PMD 646 * (See ARMv6 spec). 647 */ 648 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); 649 650 pgd = pgd_offset_k(addr); 651 end = addr + length; 652 do { 653 pud_t *pud = pud_offset(pgd, addr); 654 pmd_t *pmd = pmd_offset(pud, addr); 655 int i; 656 657 for (i = 0; i < 16; i++) 658 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); 659 660 addr += SUPERSECTION_SIZE; 661 phys += SUPERSECTION_SIZE; 662 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; 663 } while (addr != end); 664 } 665 666 /* 667 * Create the page directory entries and any necessary 668 * page tables for the mapping specified by `md'. We 669 * are able to cope here with varying sizes and address 670 * offsets, and we take full advantage of sections and 671 * supersections. 672 */ 673 static void __init create_mapping(struct map_desc *md) 674 { 675 unsigned long addr, length, end; 676 phys_addr_t phys; 677 const struct mem_type *type; 678 pgd_t *pgd; 679 680 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { 681 printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx" 682 " at 0x%08lx in user region\n", 683 (long long)__pfn_to_phys((u64)md->pfn), md->virtual); 684 return; 685 } 686 687 if ((md->type == MT_DEVICE || md->type == MT_ROM) && 688 md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) { 689 printk(KERN_WARNING "BUG: mapping for 0x%08llx" 690 " at 0x%08lx overlaps vmalloc space\n", 691 (long long)__pfn_to_phys((u64)md->pfn), md->virtual); 692 } 693 694 type = &mem_types[md->type]; 695 696 /* 697 * Catch 36-bit addresses 698 */ 699 if (md->pfn >= 0x100000) { 700 create_36bit_mapping(md, type); 701 return; 702 } 703 704 addr = md->virtual & PAGE_MASK; 705 phys = __pfn_to_phys(md->pfn); 706 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); 707 708 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { 709 printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " 710 "be mapped using pages, ignoring.\n", 711 (long long)__pfn_to_phys(md->pfn), addr); 712 return; 713 } 714 715 pgd = pgd_offset_k(addr); 716 end = addr + length; 717 do { 718 unsigned long next = pgd_addr_end(addr, end); 719 720 alloc_init_pud(pgd, addr, next, phys, type); 721 722 phys += next - addr; 723 addr = next; 724 } while (pgd++, addr != end); 725 } 726 727 /* 728 * Create the architecture specific mappings 729 */ 730 void __init iotable_init(struct map_desc *io_desc, int nr) 731 { 732 int i; 733 734 for (i = 0; i < nr; i++) 735 create_mapping(io_desc + i); 736 } 737 738 static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M); 739 740 /* 741 * vmalloc=size forces the vmalloc area to be exactly 'size' 742 * bytes. This can be used to increase (or decrease) the vmalloc 743 * area - the default is 128m. 744 */ 745 static int __init early_vmalloc(char *arg) 746 { 747 unsigned long vmalloc_reserve = memparse(arg, NULL); 748 749 if (vmalloc_reserve < SZ_16M) { 750 vmalloc_reserve = SZ_16M; 751 printk(KERN_WARNING 752 "vmalloc area too small, limiting to %luMB\n", 753 vmalloc_reserve >> 20); 754 } 755 756 if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) { 757 vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M); 758 printk(KERN_WARNING 759 "vmalloc area is too big, limiting to %luMB\n", 760 vmalloc_reserve >> 20); 761 } 762 763 vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve); 764 return 0; 765 } 766 early_param("vmalloc", early_vmalloc); 767 768 static phys_addr_t lowmem_limit __initdata = 0; 769 770 void __init sanity_check_meminfo(void) 771 { 772 int i, j, highmem = 0; 773 774 for (i = 0, j = 0; i < meminfo.nr_banks; i++) { 775 struct membank *bank = &meminfo.bank[j]; 776 *bank = meminfo.bank[i]; 777 778 #ifdef CONFIG_HIGHMEM 779 if (__va(bank->start) >= vmalloc_min || 780 __va(bank->start) < (void *)PAGE_OFFSET) 781 highmem = 1; 782 783 bank->highmem = highmem; 784 785 /* 786 * Split those memory banks which are partially overlapping 787 * the vmalloc area greatly simplifying things later. 788 */ 789 if (__va(bank->start) < vmalloc_min && 790 bank->size > vmalloc_min - __va(bank->start)) { 791 if (meminfo.nr_banks >= NR_BANKS) { 792 printk(KERN_CRIT "NR_BANKS too low, " 793 "ignoring high memory\n"); 794 } else { 795 memmove(bank + 1, bank, 796 (meminfo.nr_banks - i) * sizeof(*bank)); 797 meminfo.nr_banks++; 798 i++; 799 bank[1].size -= vmalloc_min - __va(bank->start); 800 bank[1].start = __pa(vmalloc_min - 1) + 1; 801 bank[1].highmem = highmem = 1; 802 j++; 803 } 804 bank->size = vmalloc_min - __va(bank->start); 805 } 806 #else 807 bank->highmem = highmem; 808 809 /* 810 * Check whether this memory bank would entirely overlap 811 * the vmalloc area. 812 */ 813 if (__va(bank->start) >= vmalloc_min || 814 __va(bank->start) < (void *)PAGE_OFFSET) { 815 printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " 816 "(vmalloc region overlap).\n", 817 (unsigned long long)bank->start, 818 (unsigned long long)bank->start + bank->size - 1); 819 continue; 820 } 821 822 /* 823 * Check whether this memory bank would partially overlap 824 * the vmalloc area. 825 */ 826 if (__va(bank->start + bank->size) > vmalloc_min || 827 __va(bank->start + bank->size) < __va(bank->start)) { 828 unsigned long newsize = vmalloc_min - __va(bank->start); 829 printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " 830 "to -%.8llx (vmalloc region overlap).\n", 831 (unsigned long long)bank->start, 832 (unsigned long long)bank->start + bank->size - 1, 833 (unsigned long long)bank->start + newsize - 1); 834 bank->size = newsize; 835 } 836 #endif 837 if (!bank->highmem && bank->start + bank->size > lowmem_limit) 838 lowmem_limit = bank->start + bank->size; 839 840 j++; 841 } 842 #ifdef CONFIG_HIGHMEM 843 if (highmem) { 844 const char *reason = NULL; 845 846 if (cache_is_vipt_aliasing()) { 847 /* 848 * Interactions between kmap and other mappings 849 * make highmem support with aliasing VIPT caches 850 * rather difficult. 851 */ 852 reason = "with VIPT aliasing cache"; 853 } 854 if (reason) { 855 printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", 856 reason); 857 while (j > 0 && meminfo.bank[j - 1].highmem) 858 j--; 859 } 860 } 861 #endif 862 meminfo.nr_banks = j; 863 memblock_set_current_limit(lowmem_limit); 864 } 865 866 static inline void prepare_page_table(void) 867 { 868 unsigned long addr; 869 phys_addr_t end; 870 871 /* 872 * Clear out all the mappings below the kernel image. 873 */ 874 for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) 875 pmd_clear(pmd_off_k(addr)); 876 877 #ifdef CONFIG_XIP_KERNEL 878 /* The XIP kernel is mapped in the module area -- skip over it */ 879 addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; 880 #endif 881 for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) 882 pmd_clear(pmd_off_k(addr)); 883 884 /* 885 * Find the end of the first block of lowmem. 886 */ 887 end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; 888 if (end >= lowmem_limit) 889 end = lowmem_limit; 890 891 /* 892 * Clear out all the kernel space mappings, except for the first 893 * memory bank, up to the end of the vmalloc region. 894 */ 895 for (addr = __phys_to_virt(end); 896 addr < VMALLOC_END; addr += PMD_SIZE) 897 pmd_clear(pmd_off_k(addr)); 898 } 899 900 #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) 901 902 /* 903 * Reserve the special regions of memory 904 */ 905 void __init arm_mm_memblock_reserve(void) 906 { 907 /* 908 * Reserve the page tables. These are already in use, 909 * and can only be in node 0. 910 */ 911 memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); 912 913 #ifdef CONFIG_SA1111 914 /* 915 * Because of the SA1111 DMA bug, we want to preserve our 916 * precious DMA-able memory... 917 */ 918 memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); 919 #endif 920 } 921 922 /* 923 * Set up device the mappings. Since we clear out the page tables for all 924 * mappings above VMALLOC_END, we will remove any debug device mappings. 925 * This means you have to be careful how you debug this function, or any 926 * called function. This means you can't use any function or debugging 927 * method which may touch any device, otherwise the kernel _will_ crash. 928 */ 929 static void __init devicemaps_init(struct machine_desc *mdesc) 930 { 931 struct map_desc map; 932 unsigned long addr; 933 934 /* 935 * Allocate the vector page early. 936 */ 937 vectors_page = early_alloc(PAGE_SIZE); 938 939 for (addr = VMALLOC_END; addr; addr += PMD_SIZE) 940 pmd_clear(pmd_off_k(addr)); 941 942 /* 943 * Map the kernel if it is XIP. 944 * It is always first in the modulearea. 945 */ 946 #ifdef CONFIG_XIP_KERNEL 947 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); 948 map.virtual = MODULES_VADDR; 949 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; 950 map.type = MT_ROM; 951 create_mapping(&map); 952 #endif 953 954 /* 955 * Map the cache flushing regions. 956 */ 957 #ifdef FLUSH_BASE 958 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); 959 map.virtual = FLUSH_BASE; 960 map.length = SZ_1M; 961 map.type = MT_CACHECLEAN; 962 create_mapping(&map); 963 #endif 964 #ifdef FLUSH_BASE_MINICACHE 965 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); 966 map.virtual = FLUSH_BASE_MINICACHE; 967 map.length = SZ_1M; 968 map.type = MT_MINICLEAN; 969 create_mapping(&map); 970 #endif 971 972 /* 973 * Create a mapping for the machine vectors at the high-vectors 974 * location (0xffff0000). If we aren't using high-vectors, also 975 * create a mapping at the low-vectors virtual address. 976 */ 977 map.pfn = __phys_to_pfn(virt_to_phys(vectors_page)); 978 map.virtual = 0xffff0000; 979 map.length = PAGE_SIZE; 980 map.type = MT_HIGH_VECTORS; 981 create_mapping(&map); 982 983 if (!vectors_high()) { 984 map.virtual = 0; 985 map.type = MT_LOW_VECTORS; 986 create_mapping(&map); 987 } 988 989 /* 990 * Ask the machine support to map in the statically mapped devices. 991 */ 992 if (mdesc->map_io) 993 mdesc->map_io(); 994 995 /* 996 * Finally flush the caches and tlb to ensure that we're in a 997 * consistent state wrt the writebuffer. This also ensures that 998 * any write-allocated cache lines in the vector page are written 999 * back. After this point, we can start to touch devices again. 1000 */ 1001 local_flush_tlb_all(); 1002 flush_cache_all(); 1003 } 1004 1005 static void __init kmap_init(void) 1006 { 1007 #ifdef CONFIG_HIGHMEM 1008 pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), 1009 PKMAP_BASE, _PAGE_KERNEL_TABLE); 1010 #endif 1011 } 1012 1013 static void __init map_lowmem(void) 1014 { 1015 struct memblock_region *reg; 1016 1017 /* Map all the lowmem memory banks. */ 1018 for_each_memblock(memory, reg) { 1019 phys_addr_t start = reg->base; 1020 phys_addr_t end = start + reg->size; 1021 struct map_desc map; 1022 1023 if (end > lowmem_limit) 1024 end = lowmem_limit; 1025 if (start >= end) 1026 break; 1027 1028 map.pfn = __phys_to_pfn(start); 1029 map.virtual = __phys_to_virt(start); 1030 map.length = end - start; 1031 map.type = MT_MEMORY; 1032 1033 create_mapping(&map); 1034 } 1035 } 1036 1037 /* 1038 * paging_init() sets up the page tables, initialises the zone memory 1039 * maps, and sets up the zero page, bad page and bad page tables. 1040 */ 1041 void __init paging_init(struct machine_desc *mdesc) 1042 { 1043 void *zero_page; 1044 1045 memblock_set_current_limit(lowmem_limit); 1046 1047 build_mem_type_table(); 1048 prepare_page_table(); 1049 map_lowmem(); 1050 devicemaps_init(mdesc); 1051 kmap_init(); 1052 1053 top_pmd = pmd_off_k(0xffff0000); 1054 1055 /* allocate the zero page. */ 1056 zero_page = early_alloc(PAGE_SIZE); 1057 1058 bootmem_init(); 1059 1060 empty_zero_page = virt_to_page(zero_page); 1061 __flush_dcache_page(NULL, empty_zero_page); 1062 } 1063