1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * sparse memory mappings. 4 */ 5 #include <linux/mm.h> 6 #include <linux/slab.h> 7 #include <linux/mmzone.h> 8 #include <linux/bootmem.h> 9 #include <linux/compiler.h> 10 #include <linux/highmem.h> 11 #include <linux/export.h> 12 #include <linux/spinlock.h> 13 #include <linux/vmalloc.h> 14 15 #include "internal.h" 16 #include <asm/dma.h> 17 #include <asm/pgalloc.h> 18 #include <asm/pgtable.h> 19 20 /* 21 * Permanent SPARSEMEM data: 22 * 23 * 1) mem_section - memory sections, mem_map's for valid memory 24 */ 25 #ifdef CONFIG_SPARSEMEM_EXTREME 26 struct mem_section *mem_section[NR_SECTION_ROOTS] 27 ____cacheline_internodealigned_in_smp; 28 #else 29 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] 30 ____cacheline_internodealigned_in_smp; 31 #endif 32 EXPORT_SYMBOL(mem_section); 33 34 #ifdef NODE_NOT_IN_PAGE_FLAGS 35 /* 36 * If we did not store the node number in the page then we have to 37 * do a lookup in the section_to_node_table in order to find which 38 * node the page belongs to. 39 */ 40 #if MAX_NUMNODES <= 256 41 static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 42 #else 43 static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; 44 #endif 45 46 int page_to_nid(const struct page *page) 47 { 48 return section_to_node_table[page_to_section(page)]; 49 } 50 EXPORT_SYMBOL(page_to_nid); 51 52 static void set_section_nid(unsigned long section_nr, int nid) 53 { 54 section_to_node_table[section_nr] = nid; 55 } 56 #else /* !NODE_NOT_IN_PAGE_FLAGS */ 57 static inline void set_section_nid(unsigned long section_nr, int nid) 58 { 59 } 60 #endif 61 62 #ifdef CONFIG_SPARSEMEM_EXTREME 63 static noinline struct mem_section __ref *sparse_index_alloc(int nid) 64 { 65 struct mem_section *section = NULL; 66 unsigned long array_size = SECTIONS_PER_ROOT * 67 sizeof(struct mem_section); 68 69 if (slab_is_available()) 70 section = kzalloc_node(array_size, GFP_KERNEL, nid); 71 else 72 section = memblock_virt_alloc_node(array_size, nid); 73 74 return section; 75 } 76 77 static int __meminit sparse_index_init(unsigned long section_nr, int nid) 78 { 79 unsigned long root = SECTION_NR_TO_ROOT(section_nr); 80 struct mem_section *section; 81 82 if (mem_section[root]) 83 return -EEXIST; 84 85 section = sparse_index_alloc(nid); 86 if (!section) 87 return -ENOMEM; 88 89 mem_section[root] = section; 90 91 return 0; 92 } 93 #else /* !SPARSEMEM_EXTREME */ 94 static inline int sparse_index_init(unsigned long section_nr, int nid) 95 { 96 return 0; 97 } 98 #endif 99 100 #ifdef CONFIG_SPARSEMEM_EXTREME 101 int __section_nr(struct mem_section* ms) 102 { 103 unsigned long root_nr; 104 struct mem_section* root; 105 106 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { 107 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); 108 if (!root) 109 continue; 110 111 if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) 112 break; 113 } 114 115 VM_BUG_ON(root_nr == NR_SECTION_ROOTS); 116 117 return (root_nr * SECTIONS_PER_ROOT) + (ms - root); 118 } 119 #else 120 int __section_nr(struct mem_section* ms) 121 { 122 return (int)(ms - mem_section[0]); 123 } 124 #endif 125 126 /* 127 * During early boot, before section_mem_map is used for an actual 128 * mem_map, we use section_mem_map to store the section's NUMA 129 * node. This keeps us from having to use another data structure. The 130 * node information is cleared just before we store the real mem_map. 131 */ 132 static inline unsigned long sparse_encode_early_nid(int nid) 133 { 134 return (nid << SECTION_NID_SHIFT); 135 } 136 137 static inline int sparse_early_nid(struct mem_section *section) 138 { 139 return (section->section_mem_map >> SECTION_NID_SHIFT); 140 } 141 142 /* Validate the physical addressing limitations of the model */ 143 void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, 144 unsigned long *end_pfn) 145 { 146 unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); 147 148 /* 149 * Sanity checks - do not allow an architecture to pass 150 * in larger pfns than the maximum scope of sparsemem: 151 */ 152 if (*start_pfn > max_sparsemem_pfn) { 153 mminit_dprintk(MMINIT_WARNING, "pfnvalidation", 154 "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n", 155 *start_pfn, *end_pfn, max_sparsemem_pfn); 156 WARN_ON_ONCE(1); 157 *start_pfn = max_sparsemem_pfn; 158 *end_pfn = max_sparsemem_pfn; 159 } else if (*end_pfn > max_sparsemem_pfn) { 160 mminit_dprintk(MMINIT_WARNING, "pfnvalidation", 161 "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n", 162 *start_pfn, *end_pfn, max_sparsemem_pfn); 163 WARN_ON_ONCE(1); 164 *end_pfn = max_sparsemem_pfn; 165 } 166 } 167 168 /* 169 * There are a number of times that we loop over NR_MEM_SECTIONS, 170 * looking for section_present() on each. But, when we have very 171 * large physical address spaces, NR_MEM_SECTIONS can also be 172 * very large which makes the loops quite long. 173 * 174 * Keeping track of this gives us an easy way to break out of 175 * those loops early. 176 */ 177 int __highest_present_section_nr; 178 static void section_mark_present(struct mem_section *ms) 179 { 180 int section_nr = __section_nr(ms); 181 182 if (section_nr > __highest_present_section_nr) 183 __highest_present_section_nr = section_nr; 184 185 ms->section_mem_map |= SECTION_MARKED_PRESENT; 186 } 187 188 static inline int next_present_section_nr(int section_nr) 189 { 190 do { 191 section_nr++; 192 if (present_section_nr(section_nr)) 193 return section_nr; 194 } while ((section_nr < NR_MEM_SECTIONS) && 195 (section_nr <= __highest_present_section_nr)); 196 197 return -1; 198 } 199 #define for_each_present_section_nr(start, section_nr) \ 200 for (section_nr = next_present_section_nr(start-1); \ 201 ((section_nr >= 0) && \ 202 (section_nr < NR_MEM_SECTIONS) && \ 203 (section_nr <= __highest_present_section_nr)); \ 204 section_nr = next_present_section_nr(section_nr)) 205 206 /* Record a memory area against a node. */ 207 void __init memory_present(int nid, unsigned long start, unsigned long end) 208 { 209 unsigned long pfn; 210 211 start &= PAGE_SECTION_MASK; 212 mminit_validate_memmodel_limits(&start, &end); 213 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { 214 unsigned long section = pfn_to_section_nr(pfn); 215 struct mem_section *ms; 216 217 sparse_index_init(section, nid); 218 set_section_nid(section, nid); 219 220 ms = __nr_to_section(section); 221 if (!ms->section_mem_map) { 222 ms->section_mem_map = sparse_encode_early_nid(nid) | 223 SECTION_IS_ONLINE; 224 section_mark_present(ms); 225 } 226 } 227 } 228 229 /* 230 * Only used by the i386 NUMA architecures, but relatively 231 * generic code. 232 */ 233 unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, 234 unsigned long end_pfn) 235 { 236 unsigned long pfn; 237 unsigned long nr_pages = 0; 238 239 mminit_validate_memmodel_limits(&start_pfn, &end_pfn); 240 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 241 if (nid != early_pfn_to_nid(pfn)) 242 continue; 243 244 if (pfn_present(pfn)) 245 nr_pages += PAGES_PER_SECTION; 246 } 247 248 return nr_pages * sizeof(struct page); 249 } 250 251 /* 252 * Subtle, we encode the real pfn into the mem_map such that 253 * the identity pfn - section_mem_map will return the actual 254 * physical page frame number. 255 */ 256 static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) 257 { 258 return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); 259 } 260 261 /* 262 * Decode mem_map from the coded memmap 263 */ 264 struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) 265 { 266 /* mask off the extra low bits of information */ 267 coded_mem_map &= SECTION_MAP_MASK; 268 return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); 269 } 270 271 static int __meminit sparse_init_one_section(struct mem_section *ms, 272 unsigned long pnum, struct page *mem_map, 273 unsigned long *pageblock_bitmap) 274 { 275 if (!present_section(ms)) 276 return -EINVAL; 277 278 ms->section_mem_map &= ~SECTION_MAP_MASK; 279 ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | 280 SECTION_HAS_MEM_MAP; 281 ms->pageblock_flags = pageblock_bitmap; 282 283 return 1; 284 } 285 286 unsigned long usemap_size(void) 287 { 288 return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); 289 } 290 291 #ifdef CONFIG_MEMORY_HOTPLUG 292 static unsigned long *__kmalloc_section_usemap(void) 293 { 294 return kmalloc(usemap_size(), GFP_KERNEL); 295 } 296 #endif /* CONFIG_MEMORY_HOTPLUG */ 297 298 #ifdef CONFIG_MEMORY_HOTREMOVE 299 static unsigned long * __init 300 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 301 unsigned long size) 302 { 303 unsigned long goal, limit; 304 unsigned long *p; 305 int nid; 306 /* 307 * A page may contain usemaps for other sections preventing the 308 * page being freed and making a section unremovable while 309 * other sections referencing the usemap remain active. Similarly, 310 * a pgdat can prevent a section being removed. If section A 311 * contains a pgdat and section B contains the usemap, both 312 * sections become inter-dependent. This allocates usemaps 313 * from the same section as the pgdat where possible to avoid 314 * this problem. 315 */ 316 goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); 317 limit = goal + (1UL << PA_SECTION_SHIFT); 318 nid = early_pfn_to_nid(goal >> PAGE_SHIFT); 319 again: 320 p = memblock_virt_alloc_try_nid_nopanic(size, 321 SMP_CACHE_BYTES, goal, limit, 322 nid); 323 if (!p && limit) { 324 limit = 0; 325 goto again; 326 } 327 return p; 328 } 329 330 static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 331 { 332 unsigned long usemap_snr, pgdat_snr; 333 static unsigned long old_usemap_snr = NR_MEM_SECTIONS; 334 static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; 335 struct pglist_data *pgdat = NODE_DATA(nid); 336 int usemap_nid; 337 338 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); 339 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 340 if (usemap_snr == pgdat_snr) 341 return; 342 343 if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr) 344 /* skip redundant message */ 345 return; 346 347 old_usemap_snr = usemap_snr; 348 old_pgdat_snr = pgdat_snr; 349 350 usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); 351 if (usemap_nid != nid) { 352 pr_info("node %d must be removed before remove section %ld\n", 353 nid, usemap_snr); 354 return; 355 } 356 /* 357 * There is a circular dependency. 358 * Some platforms allow un-removable section because they will just 359 * gather other removable sections for dynamic partitioning. 360 * Just notify un-removable section's number here. 361 */ 362 pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n", 363 usemap_snr, pgdat_snr, nid); 364 } 365 #else 366 static unsigned long * __init 367 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 368 unsigned long size) 369 { 370 return memblock_virt_alloc_node_nopanic(size, pgdat->node_id); 371 } 372 373 static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 374 { 375 } 376 #endif /* CONFIG_MEMORY_HOTREMOVE */ 377 378 static void __init sparse_early_usemaps_alloc_node(void *data, 379 unsigned long pnum_begin, 380 unsigned long pnum_end, 381 unsigned long usemap_count, int nodeid) 382 { 383 void *usemap; 384 unsigned long pnum; 385 unsigned long **usemap_map = (unsigned long **)data; 386 int size = usemap_size(); 387 388 usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), 389 size * usemap_count); 390 if (!usemap) { 391 pr_warn("%s: allocation failed\n", __func__); 392 return; 393 } 394 395 for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 396 if (!present_section_nr(pnum)) 397 continue; 398 usemap_map[pnum] = usemap; 399 usemap += size; 400 check_usemap_section_nr(nodeid, usemap_map[pnum]); 401 } 402 } 403 404 #ifndef CONFIG_SPARSEMEM_VMEMMAP 405 struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) 406 { 407 struct page *map; 408 unsigned long size; 409 410 map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); 411 if (map) 412 return map; 413 414 size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); 415 map = memblock_virt_alloc_try_nid(size, 416 PAGE_SIZE, __pa(MAX_DMA_ADDRESS), 417 BOOTMEM_ALLOC_ACCESSIBLE, nid); 418 return map; 419 } 420 void __init sparse_mem_maps_populate_node(struct page **map_map, 421 unsigned long pnum_begin, 422 unsigned long pnum_end, 423 unsigned long map_count, int nodeid) 424 { 425 void *map; 426 unsigned long pnum; 427 unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; 428 429 map = alloc_remap(nodeid, size * map_count); 430 if (map) { 431 for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 432 if (!present_section_nr(pnum)) 433 continue; 434 map_map[pnum] = map; 435 map += size; 436 } 437 return; 438 } 439 440 size = PAGE_ALIGN(size); 441 map = memblock_virt_alloc_try_nid(size * map_count, 442 PAGE_SIZE, __pa(MAX_DMA_ADDRESS), 443 BOOTMEM_ALLOC_ACCESSIBLE, nodeid); 444 if (map) { 445 for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 446 if (!present_section_nr(pnum)) 447 continue; 448 map_map[pnum] = map; 449 map += size; 450 } 451 return; 452 } 453 454 /* fallback */ 455 for (pnum = pnum_begin; pnum < pnum_end; pnum++) { 456 struct mem_section *ms; 457 458 if (!present_section_nr(pnum)) 459 continue; 460 map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); 461 if (map_map[pnum]) 462 continue; 463 ms = __nr_to_section(pnum); 464 pr_err("%s: sparsemem memory map backing failed some memory will not be available\n", 465 __func__); 466 ms->section_mem_map = 0; 467 } 468 } 469 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ 470 471 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER 472 static void __init sparse_early_mem_maps_alloc_node(void *data, 473 unsigned long pnum_begin, 474 unsigned long pnum_end, 475 unsigned long map_count, int nodeid) 476 { 477 struct page **map_map = (struct page **)data; 478 sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, 479 map_count, nodeid); 480 } 481 #else 482 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) 483 { 484 struct page *map; 485 struct mem_section *ms = __nr_to_section(pnum); 486 int nid = sparse_early_nid(ms); 487 488 map = sparse_mem_map_populate(pnum, nid); 489 if (map) 490 return map; 491 492 pr_err("%s: sparsemem memory map backing failed some memory will not be available\n", 493 __func__); 494 ms->section_mem_map = 0; 495 return NULL; 496 } 497 #endif 498 499 void __weak __meminit vmemmap_populate_print_last(void) 500 { 501 } 502 503 /** 504 * alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap 505 * @map: usemap_map for pageblock flags or mmap_map for vmemmap 506 */ 507 static void __init alloc_usemap_and_memmap(void (*alloc_func) 508 (void *, unsigned long, unsigned long, 509 unsigned long, int), void *data) 510 { 511 unsigned long pnum; 512 unsigned long map_count; 513 int nodeid_begin = 0; 514 unsigned long pnum_begin = 0; 515 516 for_each_present_section_nr(0, pnum) { 517 struct mem_section *ms; 518 519 ms = __nr_to_section(pnum); 520 nodeid_begin = sparse_early_nid(ms); 521 pnum_begin = pnum; 522 break; 523 } 524 map_count = 1; 525 for_each_present_section_nr(pnum_begin + 1, pnum) { 526 struct mem_section *ms; 527 int nodeid; 528 529 ms = __nr_to_section(pnum); 530 nodeid = sparse_early_nid(ms); 531 if (nodeid == nodeid_begin) { 532 map_count++; 533 continue; 534 } 535 /* ok, we need to take cake of from pnum_begin to pnum - 1*/ 536 alloc_func(data, pnum_begin, pnum, 537 map_count, nodeid_begin); 538 /* new start, update count etc*/ 539 nodeid_begin = nodeid; 540 pnum_begin = pnum; 541 map_count = 1; 542 } 543 /* ok, last chunk */ 544 alloc_func(data, pnum_begin, NR_MEM_SECTIONS, 545 map_count, nodeid_begin); 546 } 547 548 /* 549 * Allocate the accumulated non-linear sections, allocate a mem_map 550 * for each and record the physical to section mapping. 551 */ 552 void __init sparse_init(void) 553 { 554 unsigned long pnum; 555 struct page *map; 556 unsigned long *usemap; 557 unsigned long **usemap_map; 558 int size; 559 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER 560 int size2; 561 struct page **map_map; 562 #endif 563 564 /* see include/linux/mmzone.h 'struct mem_section' definition */ 565 BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section))); 566 567 /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ 568 set_pageblock_order(); 569 570 /* 571 * map is using big page (aka 2M in x86 64 bit) 572 * usemap is less one page (aka 24 bytes) 573 * so alloc 2M (with 2M align) and 24 bytes in turn will 574 * make next 2M slip to one more 2M later. 575 * then in big system, the memory will have a lot of holes... 576 * here try to allocate 2M pages continuously. 577 * 578 * powerpc need to call sparse_init_one_section right after each 579 * sparse_early_mem_map_alloc, so allocate usemap_map at first. 580 */ 581 size = sizeof(unsigned long *) * NR_MEM_SECTIONS; 582 usemap_map = memblock_virt_alloc(size, 0); 583 if (!usemap_map) 584 panic("can not allocate usemap_map\n"); 585 alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node, 586 (void *)usemap_map); 587 588 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER 589 size2 = sizeof(struct page *) * NR_MEM_SECTIONS; 590 map_map = memblock_virt_alloc(size2, 0); 591 if (!map_map) 592 panic("can not allocate map_map\n"); 593 alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node, 594 (void *)map_map); 595 #endif 596 597 for_each_present_section_nr(0, pnum) { 598 usemap = usemap_map[pnum]; 599 if (!usemap) 600 continue; 601 602 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER 603 map = map_map[pnum]; 604 #else 605 map = sparse_early_mem_map_alloc(pnum); 606 #endif 607 if (!map) 608 continue; 609 610 sparse_init_one_section(__nr_to_section(pnum), pnum, map, 611 usemap); 612 } 613 614 vmemmap_populate_print_last(); 615 616 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER 617 memblock_free_early(__pa(map_map), size2); 618 #endif 619 memblock_free_early(__pa(usemap_map), size); 620 } 621 622 #ifdef CONFIG_MEMORY_HOTPLUG 623 624 /* Mark all memory sections within the pfn range as online */ 625 void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn) 626 { 627 unsigned long pfn; 628 629 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 630 unsigned long section_nr = pfn_to_section_nr(pfn); 631 struct mem_section *ms; 632 633 /* onlining code should never touch invalid ranges */ 634 if (WARN_ON(!valid_section_nr(section_nr))) 635 continue; 636 637 ms = __nr_to_section(section_nr); 638 ms->section_mem_map |= SECTION_IS_ONLINE; 639 } 640 } 641 642 #ifdef CONFIG_MEMORY_HOTREMOVE 643 /* Mark all memory sections within the pfn range as online */ 644 void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) 645 { 646 unsigned long pfn; 647 648 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 649 unsigned long section_nr = pfn_to_section_nr(start_pfn); 650 struct mem_section *ms; 651 652 /* 653 * TODO this needs some double checking. Offlining code makes 654 * sure to check pfn_valid but those checks might be just bogus 655 */ 656 if (WARN_ON(!valid_section_nr(section_nr))) 657 continue; 658 659 ms = __nr_to_section(section_nr); 660 ms->section_mem_map &= ~SECTION_IS_ONLINE; 661 } 662 } 663 #endif 664 665 #ifdef CONFIG_SPARSEMEM_VMEMMAP 666 static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) 667 { 668 /* This will make the necessary allocations eventually. */ 669 return sparse_mem_map_populate(pnum, nid); 670 } 671 static void __kfree_section_memmap(struct page *memmap) 672 { 673 unsigned long start = (unsigned long)memmap; 674 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 675 676 vmemmap_free(start, end); 677 } 678 #ifdef CONFIG_MEMORY_HOTREMOVE 679 static void free_map_bootmem(struct page *memmap) 680 { 681 unsigned long start = (unsigned long)memmap; 682 unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); 683 684 vmemmap_free(start, end); 685 } 686 #endif /* CONFIG_MEMORY_HOTREMOVE */ 687 #else 688 static struct page *__kmalloc_section_memmap(void) 689 { 690 struct page *page, *ret; 691 unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION; 692 693 page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); 694 if (page) 695 goto got_map_page; 696 697 ret = vmalloc(memmap_size); 698 if (ret) 699 goto got_map_ptr; 700 701 return NULL; 702 got_map_page: 703 ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); 704 got_map_ptr: 705 706 return ret; 707 } 708 709 static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) 710 { 711 return __kmalloc_section_memmap(); 712 } 713 714 static void __kfree_section_memmap(struct page *memmap) 715 { 716 if (is_vmalloc_addr(memmap)) 717 vfree(memmap); 718 else 719 free_pages((unsigned long)memmap, 720 get_order(sizeof(struct page) * PAGES_PER_SECTION)); 721 } 722 723 #ifdef CONFIG_MEMORY_HOTREMOVE 724 static void free_map_bootmem(struct page *memmap) 725 { 726 unsigned long maps_section_nr, removing_section_nr, i; 727 unsigned long magic, nr_pages; 728 struct page *page = virt_to_page(memmap); 729 730 nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) 731 >> PAGE_SHIFT; 732 733 for (i = 0; i < nr_pages; i++, page++) { 734 magic = (unsigned long) page->freelist; 735 736 BUG_ON(magic == NODE_INFO); 737 738 maps_section_nr = pfn_to_section_nr(page_to_pfn(page)); 739 removing_section_nr = page_private(page); 740 741 /* 742 * When this function is called, the removing section is 743 * logical offlined state. This means all pages are isolated 744 * from page allocator. If removing section's memmap is placed 745 * on the same section, it must not be freed. 746 * If it is freed, page allocator may allocate it which will 747 * be removed physically soon. 748 */ 749 if (maps_section_nr != removing_section_nr) 750 put_page_bootmem(page); 751 } 752 } 753 #endif /* CONFIG_MEMORY_HOTREMOVE */ 754 #endif /* CONFIG_SPARSEMEM_VMEMMAP */ 755 756 /* 757 * returns the number of sections whose mem_maps were properly 758 * set. If this is <=0, then that means that the passed-in 759 * map was not consumed and must be freed. 760 */ 761 int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) 762 { 763 unsigned long section_nr = pfn_to_section_nr(start_pfn); 764 struct mem_section *ms; 765 struct page *memmap; 766 unsigned long *usemap; 767 unsigned long flags; 768 int ret; 769 770 /* 771 * no locking for this, because it does its own 772 * plus, it does a kmalloc 773 */ 774 ret = sparse_index_init(section_nr, pgdat->node_id); 775 if (ret < 0 && ret != -EEXIST) 776 return ret; 777 memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); 778 if (!memmap) 779 return -ENOMEM; 780 usemap = __kmalloc_section_usemap(); 781 if (!usemap) { 782 __kfree_section_memmap(memmap); 783 return -ENOMEM; 784 } 785 786 pgdat_resize_lock(pgdat, &flags); 787 788 ms = __pfn_to_section(start_pfn); 789 if (ms->section_mem_map & SECTION_MARKED_PRESENT) { 790 ret = -EEXIST; 791 goto out; 792 } 793 794 memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION); 795 796 section_mark_present(ms); 797 798 ret = sparse_init_one_section(ms, section_nr, memmap, usemap); 799 800 out: 801 pgdat_resize_unlock(pgdat, &flags); 802 if (ret <= 0) { 803 kfree(usemap); 804 __kfree_section_memmap(memmap); 805 } 806 return ret; 807 } 808 809 #ifdef CONFIG_MEMORY_HOTREMOVE 810 #ifdef CONFIG_MEMORY_FAILURE 811 static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) 812 { 813 int i; 814 815 if (!memmap) 816 return; 817 818 for (i = 0; i < nr_pages; i++) { 819 if (PageHWPoison(&memmap[i])) { 820 atomic_long_sub(1, &num_poisoned_pages); 821 ClearPageHWPoison(&memmap[i]); 822 } 823 } 824 } 825 #else 826 static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) 827 { 828 } 829 #endif 830 831 static void free_section_usemap(struct page *memmap, unsigned long *usemap) 832 { 833 struct page *usemap_page; 834 835 if (!usemap) 836 return; 837 838 usemap_page = virt_to_page(usemap); 839 /* 840 * Check to see if allocation came from hot-plug-add 841 */ 842 if (PageSlab(usemap_page) || PageCompound(usemap_page)) { 843 kfree(usemap); 844 if (memmap) 845 __kfree_section_memmap(memmap); 846 return; 847 } 848 849 /* 850 * The usemap came from bootmem. This is packed with other usemaps 851 * on the section which has pgdat at boot time. Just keep it as is now. 852 */ 853 854 if (memmap) 855 free_map_bootmem(memmap); 856 } 857 858 void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, 859 unsigned long map_offset) 860 { 861 struct page *memmap = NULL; 862 unsigned long *usemap = NULL, flags; 863 struct pglist_data *pgdat = zone->zone_pgdat; 864 865 pgdat_resize_lock(pgdat, &flags); 866 if (ms->section_mem_map) { 867 usemap = ms->pageblock_flags; 868 memmap = sparse_decode_mem_map(ms->section_mem_map, 869 __section_nr(ms)); 870 ms->section_mem_map = 0; 871 ms->pageblock_flags = NULL; 872 } 873 pgdat_resize_unlock(pgdat, &flags); 874 875 clear_hwpoisoned_pages(memmap + map_offset, 876 PAGES_PER_SECTION - map_offset); 877 free_section_usemap(memmap, usemap); 878 } 879 #endif /* CONFIG_MEMORY_HOTREMOVE */ 880 #endif /* CONFIG_MEMORY_HOTPLUG */ 881