1 /* 2 * Copyright IBM Corp. 2006 3 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> 4 */ 5 6 #include <linux/bootmem.h> 7 #include <linux/pfn.h> 8 #include <linux/mm.h> 9 #include <linux/module.h> 10 #include <linux/list.h> 11 #include <linux/hugetlb.h> 12 #include <linux/slab.h> 13 #include <asm/pgalloc.h> 14 #include <asm/pgtable.h> 15 #include <asm/setup.h> 16 #include <asm/tlbflush.h> 17 #include <asm/sections.h> 18 19 static DEFINE_MUTEX(vmem_mutex); 20 21 struct memory_segment { 22 struct list_head list; 23 unsigned long start; 24 unsigned long size; 25 }; 26 27 static LIST_HEAD(mem_segs); 28 29 static void __ref *vmem_alloc_pages(unsigned int order) 30 { 31 if (slab_is_available()) 32 return (void *)__get_free_pages(GFP_KERNEL, order); 33 return alloc_bootmem_pages((1 << order) * PAGE_SIZE); 34 } 35 36 static inline pud_t *vmem_pud_alloc(void) 37 { 38 pud_t *pud = NULL; 39 40 #ifdef CONFIG_64BIT 41 pud = vmem_alloc_pages(2); 42 if (!pud) 43 return NULL; 44 clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); 45 #endif 46 return pud; 47 } 48 49 static inline pmd_t *vmem_pmd_alloc(void) 50 { 51 pmd_t *pmd = NULL; 52 53 #ifdef CONFIG_64BIT 54 pmd = vmem_alloc_pages(2); 55 if (!pmd) 56 return NULL; 57 clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); 58 #endif 59 return pmd; 60 } 61 62 static pte_t __ref *vmem_pte_alloc(unsigned long address) 63 { 64 pte_t *pte; 65 66 if (slab_is_available()) 67 pte = (pte_t *) page_table_alloc(&init_mm, address); 68 else 69 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); 70 if (!pte) 71 return NULL; 72 clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, 73 PTRS_PER_PTE * sizeof(pte_t)); 74 return pte; 75 } 76 77 /* 78 * Add a physical memory range to the 1:1 mapping. 79 */ 80 static int vmem_add_mem(unsigned long start, unsigned long size, int ro) 81 { 82 unsigned long end = start + size; 83 unsigned long address = start; 84 pgd_t *pg_dir; 85 pud_t *pu_dir; 86 pmd_t *pm_dir; 87 pte_t *pt_dir; 88 pte_t pte; 89 int ret = -ENOMEM; 90 91 while (address < end) { 92 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); 93 pg_dir = pgd_offset_k(address); 94 if (pgd_none(*pg_dir)) { 95 pu_dir = vmem_pud_alloc(); 96 if (!pu_dir) 97 goto out; 98 pgd_populate(&init_mm, pg_dir, pu_dir); 99 } 100 pu_dir = pud_offset(pg_dir, address); 101 #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) 102 if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && 103 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { 104 pte_val(pte) |= _REGION3_ENTRY_LARGE; 105 pte_val(pte) |= _REGION_ENTRY_TYPE_R3; 106 pud_val(*pu_dir) = pte_val(pte); 107 address += PUD_SIZE; 108 continue; 109 } 110 #endif 111 if (pud_none(*pu_dir)) { 112 pm_dir = vmem_pmd_alloc(); 113 if (!pm_dir) 114 goto out; 115 pud_populate(&init_mm, pu_dir, pm_dir); 116 } 117 pm_dir = pmd_offset(pu_dir, address); 118 #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC) 119 if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && 120 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { 121 pte_val(pte) |= _SEGMENT_ENTRY_LARGE; 122 pmd_val(*pm_dir) = pte_val(pte); 123 address += PMD_SIZE; 124 continue; 125 } 126 #endif 127 if (pmd_none(*pm_dir)) { 128 pt_dir = vmem_pte_alloc(address); 129 if (!pt_dir) 130 goto out; 131 pmd_populate(&init_mm, pm_dir, pt_dir); 132 } 133 134 pt_dir = pte_offset_kernel(pm_dir, address); 135 *pt_dir = pte; 136 address += PAGE_SIZE; 137 } 138 ret = 0; 139 out: 140 flush_tlb_kernel_range(start, end); 141 return ret; 142 } 143 144 /* 145 * Remove a physical memory range from the 1:1 mapping. 146 * Currently only invalidates page table entries. 147 */ 148 static void vmem_remove_range(unsigned long start, unsigned long size) 149 { 150 unsigned long end = start + size; 151 unsigned long address = start; 152 pgd_t *pg_dir; 153 pud_t *pu_dir; 154 pmd_t *pm_dir; 155 pte_t *pt_dir; 156 pte_t pte; 157 158 pte_val(pte) = _PAGE_TYPE_EMPTY; 159 while (address < end) { 160 pg_dir = pgd_offset_k(address); 161 if (pgd_none(*pg_dir)) { 162 address += PGDIR_SIZE; 163 continue; 164 } 165 pu_dir = pud_offset(pg_dir, address); 166 if (pud_none(*pu_dir)) { 167 address += PUD_SIZE; 168 continue; 169 } 170 if (pud_large(*pu_dir)) { 171 pud_clear(pu_dir); 172 address += PUD_SIZE; 173 continue; 174 } 175 pm_dir = pmd_offset(pu_dir, address); 176 if (pmd_none(*pm_dir)) { 177 address += PMD_SIZE; 178 continue; 179 } 180 if (pmd_large(*pm_dir)) { 181 pmd_clear(pm_dir); 182 address += PMD_SIZE; 183 continue; 184 } 185 pt_dir = pte_offset_kernel(pm_dir, address); 186 *pt_dir = pte; 187 address += PAGE_SIZE; 188 } 189 flush_tlb_kernel_range(start, end); 190 } 191 192 /* 193 * Add a backed mem_map array to the virtual mem_map array. 194 */ 195 int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) 196 { 197 unsigned long address, start_addr, end_addr; 198 pgd_t *pg_dir; 199 pud_t *pu_dir; 200 pmd_t *pm_dir; 201 pte_t *pt_dir; 202 pte_t pte; 203 int ret = -ENOMEM; 204 205 start_addr = (unsigned long) start; 206 end_addr = (unsigned long) (start + nr); 207 208 for (address = start_addr; address < end_addr;) { 209 pg_dir = pgd_offset_k(address); 210 if (pgd_none(*pg_dir)) { 211 pu_dir = vmem_pud_alloc(); 212 if (!pu_dir) 213 goto out; 214 pgd_populate(&init_mm, pg_dir, pu_dir); 215 } 216 217 pu_dir = pud_offset(pg_dir, address); 218 if (pud_none(*pu_dir)) { 219 pm_dir = vmem_pmd_alloc(); 220 if (!pm_dir) 221 goto out; 222 pud_populate(&init_mm, pu_dir, pm_dir); 223 } 224 225 pm_dir = pmd_offset(pu_dir, address); 226 if (pmd_none(*pm_dir)) { 227 #ifdef CONFIG_64BIT 228 /* Use 1MB frames for vmemmap if available. We always 229 * use large frames even if they are only partially 230 * used. 231 * Otherwise we would have also page tables since 232 * vmemmap_populate gets called for each section 233 * separately. */ 234 if (MACHINE_HAS_EDAT1) { 235 void *new_page; 236 237 new_page = vmemmap_alloc_block(PMD_SIZE, node); 238 if (!new_page) 239 goto out; 240 pte = mk_pte_phys(__pa(new_page), PAGE_RW); 241 pte_val(pte) |= _SEGMENT_ENTRY_LARGE; 242 pmd_val(*pm_dir) = pte_val(pte); 243 address = (address + PMD_SIZE) & PMD_MASK; 244 continue; 245 } 246 #endif 247 pt_dir = vmem_pte_alloc(address); 248 if (!pt_dir) 249 goto out; 250 pmd_populate(&init_mm, pm_dir, pt_dir); 251 } else if (pmd_large(*pm_dir)) { 252 address = (address + PMD_SIZE) & PMD_MASK; 253 continue; 254 } 255 256 pt_dir = pte_offset_kernel(pm_dir, address); 257 if (pte_none(*pt_dir)) { 258 unsigned long new_page; 259 260 new_page =__pa(vmem_alloc_pages(0)); 261 if (!new_page) 262 goto out; 263 pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); 264 *pt_dir = pte; 265 } 266 address += PAGE_SIZE; 267 } 268 memset(start, 0, nr * sizeof(struct page)); 269 ret = 0; 270 out: 271 flush_tlb_kernel_range(start_addr, end_addr); 272 return ret; 273 } 274 275 /* 276 * Add memory segment to the segment list if it doesn't overlap with 277 * an already present segment. 278 */ 279 static int insert_memory_segment(struct memory_segment *seg) 280 { 281 struct memory_segment *tmp; 282 283 if (seg->start + seg->size > VMEM_MAX_PHYS || 284 seg->start + seg->size < seg->start) 285 return -ERANGE; 286 287 list_for_each_entry(tmp, &mem_segs, list) { 288 if (seg->start >= tmp->start + tmp->size) 289 continue; 290 if (seg->start + seg->size <= tmp->start) 291 continue; 292 return -ENOSPC; 293 } 294 list_add(&seg->list, &mem_segs); 295 return 0; 296 } 297 298 /* 299 * Remove memory segment from the segment list. 300 */ 301 static void remove_memory_segment(struct memory_segment *seg) 302 { 303 list_del(&seg->list); 304 } 305 306 static void __remove_shared_memory(struct memory_segment *seg) 307 { 308 remove_memory_segment(seg); 309 vmem_remove_range(seg->start, seg->size); 310 } 311 312 int vmem_remove_mapping(unsigned long start, unsigned long size) 313 { 314 struct memory_segment *seg; 315 int ret; 316 317 mutex_lock(&vmem_mutex); 318 319 ret = -ENOENT; 320 list_for_each_entry(seg, &mem_segs, list) { 321 if (seg->start == start && seg->size == size) 322 break; 323 } 324 325 if (seg->start != start || seg->size != size) 326 goto out; 327 328 ret = 0; 329 __remove_shared_memory(seg); 330 kfree(seg); 331 out: 332 mutex_unlock(&vmem_mutex); 333 return ret; 334 } 335 336 int vmem_add_mapping(unsigned long start, unsigned long size) 337 { 338 struct memory_segment *seg; 339 int ret; 340 341 mutex_lock(&vmem_mutex); 342 ret = -ENOMEM; 343 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 344 if (!seg) 345 goto out; 346 seg->start = start; 347 seg->size = size; 348 349 ret = insert_memory_segment(seg); 350 if (ret) 351 goto out_free; 352 353 ret = vmem_add_mem(start, size, 0); 354 if (ret) 355 goto out_remove; 356 goto out; 357 358 out_remove: 359 __remove_shared_memory(seg); 360 out_free: 361 kfree(seg); 362 out: 363 mutex_unlock(&vmem_mutex); 364 return ret; 365 } 366 367 /* 368 * map whole physical memory to virtual memory (identity mapping) 369 * we reserve enough space in the vmalloc area for vmemmap to hotplug 370 * additional memory segments. 371 */ 372 void __init vmem_map_init(void) 373 { 374 unsigned long ro_start, ro_end; 375 unsigned long start, end; 376 int i; 377 378 ro_start = PFN_ALIGN((unsigned long)&_stext); 379 ro_end = (unsigned long)&_eshared & PAGE_MASK; 380 for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { 381 if (memory_chunk[i].type == CHUNK_CRASHK || 382 memory_chunk[i].type == CHUNK_OLDMEM) 383 continue; 384 start = memory_chunk[i].addr; 385 end = memory_chunk[i].addr + memory_chunk[i].size; 386 if (start >= ro_end || end <= ro_start) 387 vmem_add_mem(start, end - start, 0); 388 else if (start >= ro_start && end <= ro_end) 389 vmem_add_mem(start, end - start, 1); 390 else if (start >= ro_start) { 391 vmem_add_mem(start, ro_end - start, 1); 392 vmem_add_mem(ro_end, end - ro_end, 0); 393 } else if (end < ro_end) { 394 vmem_add_mem(start, ro_start - start, 0); 395 vmem_add_mem(ro_start, end - ro_start, 1); 396 } else { 397 vmem_add_mem(start, ro_start - start, 0); 398 vmem_add_mem(ro_start, ro_end - ro_start, 1); 399 vmem_add_mem(ro_end, end - ro_end, 0); 400 } 401 } 402 } 403 404 /* 405 * Convert memory chunk array to a memory segment list so there is a single 406 * list that contains both r/w memory and shared memory segments. 407 */ 408 static int __init vmem_convert_memory_chunk(void) 409 { 410 struct memory_segment *seg; 411 int i; 412 413 mutex_lock(&vmem_mutex); 414 for (i = 0; i < MEMORY_CHUNKS; i++) { 415 if (!memory_chunk[i].size) 416 continue; 417 if (memory_chunk[i].type == CHUNK_CRASHK || 418 memory_chunk[i].type == CHUNK_OLDMEM) 419 continue; 420 seg = kzalloc(sizeof(*seg), GFP_KERNEL); 421 if (!seg) 422 panic("Out of memory...\n"); 423 seg->start = memory_chunk[i].addr; 424 seg->size = memory_chunk[i].size; 425 insert_memory_segment(seg); 426 } 427 mutex_unlock(&vmem_mutex); 428 return 0; 429 } 430 431 core_initcall(vmem_convert_memory_chunk); 432