1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 2eefa864bSJoonsoo Kim #include <linux/mm.h> 3eefa864bSJoonsoo Kim #include <linux/mmzone.h> 4*57c8a661SMike Rapoport #include <linux/memblock.h> 5eefa864bSJoonsoo Kim #include <linux/page_ext.h> 6eefa864bSJoonsoo Kim #include <linux/memory.h> 7eefa864bSJoonsoo Kim #include <linux/vmalloc.h> 8eefa864bSJoonsoo Kim #include <linux/kmemleak.h> 948c96a36SJoonsoo Kim #include <linux/page_owner.h> 1033c3fc71SVladimir Davydov #include <linux/page_idle.h> 11eefa864bSJoonsoo Kim 12eefa864bSJoonsoo Kim /* 13eefa864bSJoonsoo Kim * struct page extension 14eefa864bSJoonsoo Kim * 15eefa864bSJoonsoo Kim * This is the feature to manage memory for extended data per page. 16eefa864bSJoonsoo Kim * 17eefa864bSJoonsoo Kim * Until now, we must modify struct page itself to store extra data per page. 18eefa864bSJoonsoo Kim * This requires rebuilding the kernel and it is really time consuming process. 19eefa864bSJoonsoo Kim * And, sometimes, rebuild is impossible due to third party module dependency. 20eefa864bSJoonsoo Kim * At last, enlarging struct page could cause un-wanted system behaviour change. 21eefa864bSJoonsoo Kim * 22eefa864bSJoonsoo Kim * This feature is intended to overcome above mentioned problems. This feature 23eefa864bSJoonsoo Kim * allocates memory for extended data per page in certain place rather than 24eefa864bSJoonsoo Kim * the struct page itself. This memory can be accessed by the accessor 25eefa864bSJoonsoo Kim * functions provided by this code. During the boot process, it checks whether 26eefa864bSJoonsoo Kim * allocation of huge chunk of memory is needed or not. If not, it avoids 27eefa864bSJoonsoo Kim * allocating memory at all. With this advantage, we can include this feature 28eefa864bSJoonsoo Kim * into the kernel in default and can avoid rebuild and solve related problems. 29eefa864bSJoonsoo Kim * 30eefa864bSJoonsoo Kim * To help these things to work well, there are two callbacks for clients. One 31eefa864bSJoonsoo Kim * is the need callback which is mandatory if user wants to avoid useless 32eefa864bSJoonsoo Kim * memory allocation at boot-time. The other is optional, init callback, which 33eefa864bSJoonsoo Kim * is used to do proper initialization after memory is allocated. 34eefa864bSJoonsoo Kim * 35eefa864bSJoonsoo Kim * The need callback is used to decide whether extended memory allocation is 36eefa864bSJoonsoo Kim * needed or not. Sometimes users want to deactivate some features in this 37eefa864bSJoonsoo Kim * boot and extra memory would be unneccessary. In this case, to avoid 38eefa864bSJoonsoo Kim * allocating huge chunk of memory, each clients represent their need of 39eefa864bSJoonsoo Kim * extra memory through the need callback. If one of the need callbacks 40eefa864bSJoonsoo Kim * returns true, it means that someone needs extra memory so that 41eefa864bSJoonsoo Kim * page extension core should allocates memory for page extension. If 42eefa864bSJoonsoo Kim * none of need callbacks return true, memory isn't needed at all in this boot 43eefa864bSJoonsoo Kim * and page extension core can skip to allocate memory. As result, 44eefa864bSJoonsoo Kim * none of memory is wasted. 45eefa864bSJoonsoo Kim * 46980ac167SJoonsoo Kim * When need callback returns true, page_ext checks if there is a request for 47980ac167SJoonsoo Kim * extra memory through size in struct page_ext_operations. If it is non-zero, 48980ac167SJoonsoo Kim * extra space is allocated for each page_ext entry and offset is returned to 49980ac167SJoonsoo Kim * user through offset in struct page_ext_operations. 50980ac167SJoonsoo Kim * 51eefa864bSJoonsoo Kim * The init callback is used to do proper initialization after page extension 52eefa864bSJoonsoo Kim * is completely initialized. In sparse memory system, extra memory is 53eefa864bSJoonsoo Kim * allocated some time later than memmap is allocated. In other words, lifetime 54eefa864bSJoonsoo Kim * of memory for page extension isn't same with memmap for struct page. 55eefa864bSJoonsoo Kim * Therefore, clients can't store extra data until page extension is 56eefa864bSJoonsoo Kim * initialized, even if pages are allocated and used freely. This could 57eefa864bSJoonsoo Kim * cause inadequate state of extra data per page, so, to prevent it, client 58eefa864bSJoonsoo Kim * can utilize this callback to initialize the state of it correctly. 59eefa864bSJoonsoo Kim */ 60eefa864bSJoonsoo Kim 61eefa864bSJoonsoo Kim static struct page_ext_operations *page_ext_ops[] = { 623a45acc0SOscar Salvador #ifdef CONFIG_DEBUG_PAGEALLOC 63e30825f1SJoonsoo Kim &debug_guardpage_ops, 643a45acc0SOscar Salvador #endif 6548c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER 6648c96a36SJoonsoo Kim &page_owner_ops, 6748c96a36SJoonsoo Kim #endif 6833c3fc71SVladimir Davydov #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) 6933c3fc71SVladimir Davydov &page_idle_ops, 7033c3fc71SVladimir Davydov #endif 71eefa864bSJoonsoo Kim }; 72eefa864bSJoonsoo Kim 73eefa864bSJoonsoo Kim static unsigned long total_usage; 74980ac167SJoonsoo Kim static unsigned long extra_mem; 75eefa864bSJoonsoo Kim 76eefa864bSJoonsoo Kim static bool __init invoke_need_callbacks(void) 77eefa864bSJoonsoo Kim { 78eefa864bSJoonsoo Kim int i; 79eefa864bSJoonsoo Kim int entries = ARRAY_SIZE(page_ext_ops); 80980ac167SJoonsoo Kim bool need = false; 81eefa864bSJoonsoo Kim 82eefa864bSJoonsoo Kim for (i = 0; i < entries; i++) { 83980ac167SJoonsoo Kim if (page_ext_ops[i]->need && page_ext_ops[i]->need()) { 84980ac167SJoonsoo Kim page_ext_ops[i]->offset = sizeof(struct page_ext) + 85980ac167SJoonsoo Kim extra_mem; 86980ac167SJoonsoo Kim extra_mem += page_ext_ops[i]->size; 87980ac167SJoonsoo Kim need = true; 88980ac167SJoonsoo Kim } 89eefa864bSJoonsoo Kim } 90eefa864bSJoonsoo Kim 91980ac167SJoonsoo Kim return need; 92eefa864bSJoonsoo Kim } 93eefa864bSJoonsoo Kim 94eefa864bSJoonsoo Kim static void __init invoke_init_callbacks(void) 95eefa864bSJoonsoo Kim { 96eefa864bSJoonsoo Kim int i; 97eefa864bSJoonsoo Kim int entries = ARRAY_SIZE(page_ext_ops); 98eefa864bSJoonsoo Kim 99eefa864bSJoonsoo Kim for (i = 0; i < entries; i++) { 100eefa864bSJoonsoo Kim if (page_ext_ops[i]->init) 101eefa864bSJoonsoo Kim page_ext_ops[i]->init(); 102eefa864bSJoonsoo Kim } 103eefa864bSJoonsoo Kim } 104eefa864bSJoonsoo Kim 105980ac167SJoonsoo Kim static unsigned long get_entry_size(void) 106980ac167SJoonsoo Kim { 107980ac167SJoonsoo Kim return sizeof(struct page_ext) + extra_mem; 108980ac167SJoonsoo Kim } 109980ac167SJoonsoo Kim 110980ac167SJoonsoo Kim static inline struct page_ext *get_entry(void *base, unsigned long index) 111980ac167SJoonsoo Kim { 112980ac167SJoonsoo Kim return base + get_entry_size() * index; 113980ac167SJoonsoo Kim } 114980ac167SJoonsoo Kim 115eefa864bSJoonsoo Kim #if !defined(CONFIG_SPARSEMEM) 116eefa864bSJoonsoo Kim 117eefa864bSJoonsoo Kim 118eefa864bSJoonsoo Kim void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) 119eefa864bSJoonsoo Kim { 120eefa864bSJoonsoo Kim pgdat->node_page_ext = NULL; 121eefa864bSJoonsoo Kim } 122eefa864bSJoonsoo Kim 12310ed6341SKirill A. Shutemov struct page_ext *lookup_page_ext(const struct page *page) 124eefa864bSJoonsoo Kim { 125eefa864bSJoonsoo Kim unsigned long pfn = page_to_pfn(page); 1260b06bb3fSJoonsoo Kim unsigned long index; 127eefa864bSJoonsoo Kim struct page_ext *base; 128eefa864bSJoonsoo Kim 129eefa864bSJoonsoo Kim base = NODE_DATA(page_to_nid(page))->node_page_ext; 130eefa864bSJoonsoo Kim /* 131eefa864bSJoonsoo Kim * The sanity checks the page allocator does upon freeing a 132eefa864bSJoonsoo Kim * page can reach here before the page_ext arrays are 133eefa864bSJoonsoo Kim * allocated when feeding a range of pages to the allocator 134eefa864bSJoonsoo Kim * for the first time during bootup or memory hotplug. 135eefa864bSJoonsoo Kim */ 136eefa864bSJoonsoo Kim if (unlikely(!base)) 137eefa864bSJoonsoo Kim return NULL; 1380b06bb3fSJoonsoo Kim index = pfn - round_down(node_start_pfn(page_to_nid(page)), 139eefa864bSJoonsoo Kim MAX_ORDER_NR_PAGES); 140980ac167SJoonsoo Kim return get_entry(base, index); 141eefa864bSJoonsoo Kim } 142eefa864bSJoonsoo Kim 143eefa864bSJoonsoo Kim static int __init alloc_node_page_ext(int nid) 144eefa864bSJoonsoo Kim { 145eefa864bSJoonsoo Kim struct page_ext *base; 146eefa864bSJoonsoo Kim unsigned long table_size; 147eefa864bSJoonsoo Kim unsigned long nr_pages; 148eefa864bSJoonsoo Kim 149eefa864bSJoonsoo Kim nr_pages = NODE_DATA(nid)->node_spanned_pages; 150eefa864bSJoonsoo Kim if (!nr_pages) 151eefa864bSJoonsoo Kim return 0; 152eefa864bSJoonsoo Kim 153eefa864bSJoonsoo Kim /* 154eefa864bSJoonsoo Kim * Need extra space if node range is not aligned with 155eefa864bSJoonsoo Kim * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm 156eefa864bSJoonsoo Kim * checks buddy's status, range could be out of exact node range. 157eefa864bSJoonsoo Kim */ 158eefa864bSJoonsoo Kim if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) || 159eefa864bSJoonsoo Kim !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) 160eefa864bSJoonsoo Kim nr_pages += MAX_ORDER_NR_PAGES; 161eefa864bSJoonsoo Kim 162980ac167SJoonsoo Kim table_size = get_entry_size() * nr_pages; 163eefa864bSJoonsoo Kim 164eb31d559SMike Rapoport base = memblock_alloc_try_nid_nopanic( 165eefa864bSJoonsoo Kim table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), 16697ad1087SMike Rapoport MEMBLOCK_ALLOC_ACCESSIBLE, nid); 167eefa864bSJoonsoo Kim if (!base) 168eefa864bSJoonsoo Kim return -ENOMEM; 169eefa864bSJoonsoo Kim NODE_DATA(nid)->node_page_ext = base; 170eefa864bSJoonsoo Kim total_usage += table_size; 171eefa864bSJoonsoo Kim return 0; 172eefa864bSJoonsoo Kim } 173eefa864bSJoonsoo Kim 174eefa864bSJoonsoo Kim void __init page_ext_init_flatmem(void) 175eefa864bSJoonsoo Kim { 176eefa864bSJoonsoo Kim 177eefa864bSJoonsoo Kim int nid, fail; 178eefa864bSJoonsoo Kim 179eefa864bSJoonsoo Kim if (!invoke_need_callbacks()) 180eefa864bSJoonsoo Kim return; 181eefa864bSJoonsoo Kim 182eefa864bSJoonsoo Kim for_each_online_node(nid) { 183eefa864bSJoonsoo Kim fail = alloc_node_page_ext(nid); 184eefa864bSJoonsoo Kim if (fail) 185eefa864bSJoonsoo Kim goto fail; 186eefa864bSJoonsoo Kim } 187eefa864bSJoonsoo Kim pr_info("allocated %ld bytes of page_ext\n", total_usage); 188eefa864bSJoonsoo Kim invoke_init_callbacks(); 189eefa864bSJoonsoo Kim return; 190eefa864bSJoonsoo Kim 191eefa864bSJoonsoo Kim fail: 192eefa864bSJoonsoo Kim pr_crit("allocation of page_ext failed.\n"); 193eefa864bSJoonsoo Kim panic("Out of memory"); 194eefa864bSJoonsoo Kim } 195eefa864bSJoonsoo Kim 196eefa864bSJoonsoo Kim #else /* CONFIG_FLAT_NODE_MEM_MAP */ 197eefa864bSJoonsoo Kim 19810ed6341SKirill A. Shutemov struct page_ext *lookup_page_ext(const struct page *page) 199eefa864bSJoonsoo Kim { 200eefa864bSJoonsoo Kim unsigned long pfn = page_to_pfn(page); 201eefa864bSJoonsoo Kim struct mem_section *section = __pfn_to_section(pfn); 202eefa864bSJoonsoo Kim /* 203eefa864bSJoonsoo Kim * The sanity checks the page allocator does upon freeing a 204eefa864bSJoonsoo Kim * page can reach here before the page_ext arrays are 205eefa864bSJoonsoo Kim * allocated when feeding a range of pages to the allocator 206eefa864bSJoonsoo Kim * for the first time during bootup or memory hotplug. 207eefa864bSJoonsoo Kim */ 208eefa864bSJoonsoo Kim if (!section->page_ext) 209eefa864bSJoonsoo Kim return NULL; 210980ac167SJoonsoo Kim return get_entry(section->page_ext, pfn); 211eefa864bSJoonsoo Kim } 212eefa864bSJoonsoo Kim 213eefa864bSJoonsoo Kim static void *__meminit alloc_page_ext(size_t size, int nid) 214eefa864bSJoonsoo Kim { 215eefa864bSJoonsoo Kim gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; 216eefa864bSJoonsoo Kim void *addr = NULL; 217eefa864bSJoonsoo Kim 218eefa864bSJoonsoo Kim addr = alloc_pages_exact_nid(nid, size, flags); 219eefa864bSJoonsoo Kim if (addr) { 220eefa864bSJoonsoo Kim kmemleak_alloc(addr, size, 1, flags); 221eefa864bSJoonsoo Kim return addr; 222eefa864bSJoonsoo Kim } 223eefa864bSJoonsoo Kim 224eefa864bSJoonsoo Kim addr = vzalloc_node(size, nid); 225eefa864bSJoonsoo Kim 226eefa864bSJoonsoo Kim return addr; 227eefa864bSJoonsoo Kim } 228eefa864bSJoonsoo Kim 229eefa864bSJoonsoo Kim static int __meminit init_section_page_ext(unsigned long pfn, int nid) 230eefa864bSJoonsoo Kim { 231eefa864bSJoonsoo Kim struct mem_section *section; 232eefa864bSJoonsoo Kim struct page_ext *base; 233eefa864bSJoonsoo Kim unsigned long table_size; 234eefa864bSJoonsoo Kim 235eefa864bSJoonsoo Kim section = __pfn_to_section(pfn); 236eefa864bSJoonsoo Kim 237eefa864bSJoonsoo Kim if (section->page_ext) 238eefa864bSJoonsoo Kim return 0; 239eefa864bSJoonsoo Kim 240980ac167SJoonsoo Kim table_size = get_entry_size() * PAGES_PER_SECTION; 241eefa864bSJoonsoo Kim base = alloc_page_ext(table_size, nid); 242eefa864bSJoonsoo Kim 243eefa864bSJoonsoo Kim /* 244eefa864bSJoonsoo Kim * The value stored in section->page_ext is (base - pfn) 245eefa864bSJoonsoo Kim * and it does not point to the memory block allocated above, 246eefa864bSJoonsoo Kim * causing kmemleak false positives. 247eefa864bSJoonsoo Kim */ 248eefa864bSJoonsoo Kim kmemleak_not_leak(base); 249eefa864bSJoonsoo Kim 250eefa864bSJoonsoo Kim if (!base) { 251eefa864bSJoonsoo Kim pr_err("page ext allocation failure\n"); 252eefa864bSJoonsoo Kim return -ENOMEM; 253eefa864bSJoonsoo Kim } 254eefa864bSJoonsoo Kim 255eefa864bSJoonsoo Kim /* 256eefa864bSJoonsoo Kim * The passed "pfn" may not be aligned to SECTION. For the calculation 257eefa864bSJoonsoo Kim * we need to apply a mask. 258eefa864bSJoonsoo Kim */ 259eefa864bSJoonsoo Kim pfn &= PAGE_SECTION_MASK; 260980ac167SJoonsoo Kim section->page_ext = (void *)base - get_entry_size() * pfn; 261eefa864bSJoonsoo Kim total_usage += table_size; 262eefa864bSJoonsoo Kim return 0; 263eefa864bSJoonsoo Kim } 264eefa864bSJoonsoo Kim #ifdef CONFIG_MEMORY_HOTPLUG 265eefa864bSJoonsoo Kim static void free_page_ext(void *addr) 266eefa864bSJoonsoo Kim { 267eefa864bSJoonsoo Kim if (is_vmalloc_addr(addr)) { 268eefa864bSJoonsoo Kim vfree(addr); 269eefa864bSJoonsoo Kim } else { 270eefa864bSJoonsoo Kim struct page *page = virt_to_page(addr); 271eefa864bSJoonsoo Kim size_t table_size; 272eefa864bSJoonsoo Kim 273980ac167SJoonsoo Kim table_size = get_entry_size() * PAGES_PER_SECTION; 274eefa864bSJoonsoo Kim 275eefa864bSJoonsoo Kim BUG_ON(PageReserved(page)); 276eefa864bSJoonsoo Kim free_pages_exact(addr, table_size); 277eefa864bSJoonsoo Kim } 278eefa864bSJoonsoo Kim } 279eefa864bSJoonsoo Kim 280eefa864bSJoonsoo Kim static void __free_page_ext(unsigned long pfn) 281eefa864bSJoonsoo Kim { 282eefa864bSJoonsoo Kim struct mem_section *ms; 283eefa864bSJoonsoo Kim struct page_ext *base; 284eefa864bSJoonsoo Kim 285eefa864bSJoonsoo Kim ms = __pfn_to_section(pfn); 286eefa864bSJoonsoo Kim if (!ms || !ms->page_ext) 287eefa864bSJoonsoo Kim return; 288980ac167SJoonsoo Kim base = get_entry(ms->page_ext, pfn); 289eefa864bSJoonsoo Kim free_page_ext(base); 290eefa864bSJoonsoo Kim ms->page_ext = NULL; 291eefa864bSJoonsoo Kim } 292eefa864bSJoonsoo Kim 293eefa864bSJoonsoo Kim static int __meminit online_page_ext(unsigned long start_pfn, 294eefa864bSJoonsoo Kim unsigned long nr_pages, 295eefa864bSJoonsoo Kim int nid) 296eefa864bSJoonsoo Kim { 297eefa864bSJoonsoo Kim unsigned long start, end, pfn; 298eefa864bSJoonsoo Kim int fail = 0; 299eefa864bSJoonsoo Kim 300eefa864bSJoonsoo Kim start = SECTION_ALIGN_DOWN(start_pfn); 301eefa864bSJoonsoo Kim end = SECTION_ALIGN_UP(start_pfn + nr_pages); 302eefa864bSJoonsoo Kim 303eefa864bSJoonsoo Kim if (nid == -1) { 304eefa864bSJoonsoo Kim /* 305eefa864bSJoonsoo Kim * In this case, "nid" already exists and contains valid memory. 306eefa864bSJoonsoo Kim * "start_pfn" passed to us is a pfn which is an arg for 307eefa864bSJoonsoo Kim * online__pages(), and start_pfn should exist. 308eefa864bSJoonsoo Kim */ 309eefa864bSJoonsoo Kim nid = pfn_to_nid(start_pfn); 310eefa864bSJoonsoo Kim VM_BUG_ON(!node_state(nid, N_ONLINE)); 311eefa864bSJoonsoo Kim } 312eefa864bSJoonsoo Kim 313eefa864bSJoonsoo Kim for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 314eefa864bSJoonsoo Kim if (!pfn_present(pfn)) 315eefa864bSJoonsoo Kim continue; 316eefa864bSJoonsoo Kim fail = init_section_page_ext(pfn, nid); 317eefa864bSJoonsoo Kim } 318eefa864bSJoonsoo Kim if (!fail) 319eefa864bSJoonsoo Kim return 0; 320eefa864bSJoonsoo Kim 321eefa864bSJoonsoo Kim /* rollback */ 322eefa864bSJoonsoo Kim for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 323eefa864bSJoonsoo Kim __free_page_ext(pfn); 324eefa864bSJoonsoo Kim 325eefa864bSJoonsoo Kim return -ENOMEM; 326eefa864bSJoonsoo Kim } 327eefa864bSJoonsoo Kim 328eefa864bSJoonsoo Kim static int __meminit offline_page_ext(unsigned long start_pfn, 329eefa864bSJoonsoo Kim unsigned long nr_pages, int nid) 330eefa864bSJoonsoo Kim { 331eefa864bSJoonsoo Kim unsigned long start, end, pfn; 332eefa864bSJoonsoo Kim 333eefa864bSJoonsoo Kim start = SECTION_ALIGN_DOWN(start_pfn); 334eefa864bSJoonsoo Kim end = SECTION_ALIGN_UP(start_pfn + nr_pages); 335eefa864bSJoonsoo Kim 336eefa864bSJoonsoo Kim for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 337eefa864bSJoonsoo Kim __free_page_ext(pfn); 338eefa864bSJoonsoo Kim return 0; 339eefa864bSJoonsoo Kim 340eefa864bSJoonsoo Kim } 341eefa864bSJoonsoo Kim 342eefa864bSJoonsoo Kim static int __meminit page_ext_callback(struct notifier_block *self, 343eefa864bSJoonsoo Kim unsigned long action, void *arg) 344eefa864bSJoonsoo Kim { 345eefa864bSJoonsoo Kim struct memory_notify *mn = arg; 346eefa864bSJoonsoo Kim int ret = 0; 347eefa864bSJoonsoo Kim 348eefa864bSJoonsoo Kim switch (action) { 349eefa864bSJoonsoo Kim case MEM_GOING_ONLINE: 350eefa864bSJoonsoo Kim ret = online_page_ext(mn->start_pfn, 351eefa864bSJoonsoo Kim mn->nr_pages, mn->status_change_nid); 352eefa864bSJoonsoo Kim break; 353eefa864bSJoonsoo Kim case MEM_OFFLINE: 354eefa864bSJoonsoo Kim offline_page_ext(mn->start_pfn, 355eefa864bSJoonsoo Kim mn->nr_pages, mn->status_change_nid); 356eefa864bSJoonsoo Kim break; 357eefa864bSJoonsoo Kim case MEM_CANCEL_ONLINE: 358eefa864bSJoonsoo Kim offline_page_ext(mn->start_pfn, 359eefa864bSJoonsoo Kim mn->nr_pages, mn->status_change_nid); 360eefa864bSJoonsoo Kim break; 361eefa864bSJoonsoo Kim case MEM_GOING_OFFLINE: 362eefa864bSJoonsoo Kim break; 363eefa864bSJoonsoo Kim case MEM_ONLINE: 364eefa864bSJoonsoo Kim case MEM_CANCEL_OFFLINE: 365eefa864bSJoonsoo Kim break; 366eefa864bSJoonsoo Kim } 367eefa864bSJoonsoo Kim 368eefa864bSJoonsoo Kim return notifier_from_errno(ret); 369eefa864bSJoonsoo Kim } 370eefa864bSJoonsoo Kim 371eefa864bSJoonsoo Kim #endif 372eefa864bSJoonsoo Kim 373eefa864bSJoonsoo Kim void __init page_ext_init(void) 374eefa864bSJoonsoo Kim { 375eefa864bSJoonsoo Kim unsigned long pfn; 376eefa864bSJoonsoo Kim int nid; 377eefa864bSJoonsoo Kim 378eefa864bSJoonsoo Kim if (!invoke_need_callbacks()) 379eefa864bSJoonsoo Kim return; 380eefa864bSJoonsoo Kim 381eefa864bSJoonsoo Kim for_each_node_state(nid, N_MEMORY) { 382eefa864bSJoonsoo Kim unsigned long start_pfn, end_pfn; 383eefa864bSJoonsoo Kim 384eefa864bSJoonsoo Kim start_pfn = node_start_pfn(nid); 385eefa864bSJoonsoo Kim end_pfn = node_end_pfn(nid); 386eefa864bSJoonsoo Kim /* 387eefa864bSJoonsoo Kim * start_pfn and end_pfn may not be aligned to SECTION and the 388eefa864bSJoonsoo Kim * page->flags of out of node pages are not initialized. So we 389eefa864bSJoonsoo Kim * scan [start_pfn, the biggest section's pfn < end_pfn) here. 390eefa864bSJoonsoo Kim */ 391eefa864bSJoonsoo Kim for (pfn = start_pfn; pfn < end_pfn; 392eefa864bSJoonsoo Kim pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { 393eefa864bSJoonsoo Kim 394eefa864bSJoonsoo Kim if (!pfn_valid(pfn)) 395eefa864bSJoonsoo Kim continue; 396eefa864bSJoonsoo Kim /* 397eefa864bSJoonsoo Kim * Nodes's pfns can be overlapping. 398eefa864bSJoonsoo Kim * We know some arch can have a nodes layout such as 399eefa864bSJoonsoo Kim * -------------pfn--------------> 400eefa864bSJoonsoo Kim * N0 | N1 | N2 | N0 | N1 | N2|.... 401fe53ca54SYang Shi * 402fe53ca54SYang Shi * Take into account DEFERRED_STRUCT_PAGE_INIT. 403eefa864bSJoonsoo Kim */ 404fe53ca54SYang Shi if (early_pfn_to_nid(pfn) != nid) 405eefa864bSJoonsoo Kim continue; 406eefa864bSJoonsoo Kim if (init_section_page_ext(pfn, nid)) 407eefa864bSJoonsoo Kim goto oom; 4080fc542b7SVlastimil Babka cond_resched(); 409eefa864bSJoonsoo Kim } 410eefa864bSJoonsoo Kim } 411eefa864bSJoonsoo Kim hotplug_memory_notifier(page_ext_callback, 0); 412eefa864bSJoonsoo Kim pr_info("allocated %ld bytes of page_ext\n", total_usage); 413eefa864bSJoonsoo Kim invoke_init_callbacks(); 414eefa864bSJoonsoo Kim return; 415eefa864bSJoonsoo Kim 416eefa864bSJoonsoo Kim oom: 417eefa864bSJoonsoo Kim panic("Out of memory"); 418eefa864bSJoonsoo Kim } 419eefa864bSJoonsoo Kim 420eefa864bSJoonsoo Kim void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) 421eefa864bSJoonsoo Kim { 422eefa864bSJoonsoo Kim } 423eefa864bSJoonsoo Kim 424eefa864bSJoonsoo Kim #endif 425