1eefa864bSJoonsoo Kim #include <linux/mm.h> 2eefa864bSJoonsoo Kim #include <linux/mmzone.h> 3eefa864bSJoonsoo Kim #include <linux/bootmem.h> 4eefa864bSJoonsoo Kim #include <linux/page_ext.h> 5eefa864bSJoonsoo Kim #include <linux/memory.h> 6eefa864bSJoonsoo Kim #include <linux/vmalloc.h> 7eefa864bSJoonsoo Kim #include <linux/kmemleak.h> 848c96a36SJoonsoo Kim #include <linux/page_owner.h> 933c3fc71SVladimir Davydov #include <linux/page_idle.h> 10eefa864bSJoonsoo Kim 11eefa864bSJoonsoo Kim /* 12eefa864bSJoonsoo Kim * struct page extension 13eefa864bSJoonsoo Kim * 14eefa864bSJoonsoo Kim * This is the feature to manage memory for extended data per page. 15eefa864bSJoonsoo Kim * 16eefa864bSJoonsoo Kim * Until now, we must modify struct page itself to store extra data per page. 17eefa864bSJoonsoo Kim * This requires rebuilding the kernel and it is really time consuming process. 18eefa864bSJoonsoo Kim * And, sometimes, rebuild is impossible due to third party module dependency. 19eefa864bSJoonsoo Kim * At last, enlarging struct page could cause un-wanted system behaviour change. 20eefa864bSJoonsoo Kim * 21eefa864bSJoonsoo Kim * This feature is intended to overcome above mentioned problems. This feature 22eefa864bSJoonsoo Kim * allocates memory for extended data per page in certain place rather than 23eefa864bSJoonsoo Kim * the struct page itself. This memory can be accessed by the accessor 24eefa864bSJoonsoo Kim * functions provided by this code. During the boot process, it checks whether 25eefa864bSJoonsoo Kim * allocation of huge chunk of memory is needed or not. If not, it avoids 26eefa864bSJoonsoo Kim * allocating memory at all. With this advantage, we can include this feature 27eefa864bSJoonsoo Kim * into the kernel in default and can avoid rebuild and solve related problems. 28eefa864bSJoonsoo Kim * 29eefa864bSJoonsoo Kim * To help these things to work well, there are two callbacks for clients. One 30eefa864bSJoonsoo Kim * is the need callback which is mandatory if user wants to avoid useless 31eefa864bSJoonsoo Kim * memory allocation at boot-time. The other is optional, init callback, which 32eefa864bSJoonsoo Kim * is used to do proper initialization after memory is allocated. 33eefa864bSJoonsoo Kim * 34eefa864bSJoonsoo Kim * The need callback is used to decide whether extended memory allocation is 35eefa864bSJoonsoo Kim * needed or not. Sometimes users want to deactivate some features in this 36eefa864bSJoonsoo Kim * boot and extra memory would be unneccessary. In this case, to avoid 37eefa864bSJoonsoo Kim * allocating huge chunk of memory, each clients represent their need of 38eefa864bSJoonsoo Kim * extra memory through the need callback. If one of the need callbacks 39eefa864bSJoonsoo Kim * returns true, it means that someone needs extra memory so that 40eefa864bSJoonsoo Kim * page extension core should allocates memory for page extension. If 41eefa864bSJoonsoo Kim * none of need callbacks return true, memory isn't needed at all in this boot 42eefa864bSJoonsoo Kim * and page extension core can skip to allocate memory. As result, 43eefa864bSJoonsoo Kim * none of memory is wasted. 44eefa864bSJoonsoo Kim * 45980ac167SJoonsoo Kim * When need callback returns true, page_ext checks if there is a request for 46980ac167SJoonsoo Kim * extra memory through size in struct page_ext_operations. If it is non-zero, 47980ac167SJoonsoo Kim * extra space is allocated for each page_ext entry and offset is returned to 48980ac167SJoonsoo Kim * user through offset in struct page_ext_operations. 49980ac167SJoonsoo Kim * 50eefa864bSJoonsoo Kim * The init callback is used to do proper initialization after page extension 51eefa864bSJoonsoo Kim * is completely initialized. In sparse memory system, extra memory is 52eefa864bSJoonsoo Kim * allocated some time later than memmap is allocated. In other words, lifetime 53eefa864bSJoonsoo Kim * of memory for page extension isn't same with memmap for struct page. 54eefa864bSJoonsoo Kim * Therefore, clients can't store extra data until page extension is 55eefa864bSJoonsoo Kim * initialized, even if pages are allocated and used freely. This could 56eefa864bSJoonsoo Kim * cause inadequate state of extra data per page, so, to prevent it, client 57eefa864bSJoonsoo Kim * can utilize this callback to initialize the state of it correctly. 58eefa864bSJoonsoo Kim */ 59eefa864bSJoonsoo Kim 60eefa864bSJoonsoo Kim static struct page_ext_operations *page_ext_ops[] = { 61e30825f1SJoonsoo Kim &debug_guardpage_ops, 6248c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER 6348c96a36SJoonsoo Kim &page_owner_ops, 6448c96a36SJoonsoo Kim #endif 6533c3fc71SVladimir Davydov #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) 6633c3fc71SVladimir Davydov &page_idle_ops, 6733c3fc71SVladimir Davydov #endif 68eefa864bSJoonsoo Kim }; 69eefa864bSJoonsoo Kim 70eefa864bSJoonsoo Kim static unsigned long total_usage; 71980ac167SJoonsoo Kim static unsigned long extra_mem; 72eefa864bSJoonsoo Kim 73eefa864bSJoonsoo Kim static bool __init invoke_need_callbacks(void) 74eefa864bSJoonsoo Kim { 75eefa864bSJoonsoo Kim int i; 76eefa864bSJoonsoo Kim int entries = ARRAY_SIZE(page_ext_ops); 77980ac167SJoonsoo Kim bool need = false; 78eefa864bSJoonsoo Kim 79eefa864bSJoonsoo Kim for (i = 0; i < entries; i++) { 80980ac167SJoonsoo Kim if (page_ext_ops[i]->need && page_ext_ops[i]->need()) { 81980ac167SJoonsoo Kim page_ext_ops[i]->offset = sizeof(struct page_ext) + 82980ac167SJoonsoo Kim extra_mem; 83980ac167SJoonsoo Kim extra_mem += page_ext_ops[i]->size; 84980ac167SJoonsoo Kim need = true; 85980ac167SJoonsoo Kim } 86eefa864bSJoonsoo Kim } 87eefa864bSJoonsoo Kim 88980ac167SJoonsoo Kim return need; 89eefa864bSJoonsoo Kim } 90eefa864bSJoonsoo Kim 91eefa864bSJoonsoo Kim static void __init invoke_init_callbacks(void) 92eefa864bSJoonsoo Kim { 93eefa864bSJoonsoo Kim int i; 94eefa864bSJoonsoo Kim int entries = ARRAY_SIZE(page_ext_ops); 95eefa864bSJoonsoo Kim 96eefa864bSJoonsoo Kim for (i = 0; i < entries; i++) { 97eefa864bSJoonsoo Kim if (page_ext_ops[i]->init) 98eefa864bSJoonsoo Kim page_ext_ops[i]->init(); 99eefa864bSJoonsoo Kim } 100eefa864bSJoonsoo Kim } 101eefa864bSJoonsoo Kim 102980ac167SJoonsoo Kim static unsigned long get_entry_size(void) 103980ac167SJoonsoo Kim { 104980ac167SJoonsoo Kim return sizeof(struct page_ext) + extra_mem; 105980ac167SJoonsoo Kim } 106980ac167SJoonsoo Kim 107980ac167SJoonsoo Kim static inline struct page_ext *get_entry(void *base, unsigned long index) 108980ac167SJoonsoo Kim { 109980ac167SJoonsoo Kim return base + get_entry_size() * index; 110980ac167SJoonsoo Kim } 111980ac167SJoonsoo Kim 112eefa864bSJoonsoo Kim #if !defined(CONFIG_SPARSEMEM) 113eefa864bSJoonsoo Kim 114eefa864bSJoonsoo Kim 115eefa864bSJoonsoo Kim void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) 116eefa864bSJoonsoo Kim { 117eefa864bSJoonsoo Kim pgdat->node_page_ext = NULL; 118eefa864bSJoonsoo Kim } 119eefa864bSJoonsoo Kim 120eefa864bSJoonsoo Kim struct page_ext *lookup_page_ext(struct page *page) 121eefa864bSJoonsoo Kim { 122eefa864bSJoonsoo Kim unsigned long pfn = page_to_pfn(page); 1230b06bb3fSJoonsoo Kim unsigned long index; 124eefa864bSJoonsoo Kim struct page_ext *base; 125eefa864bSJoonsoo Kim 126eefa864bSJoonsoo Kim base = NODE_DATA(page_to_nid(page))->node_page_ext; 127bd33ef36SVinayak Menon #if defined(CONFIG_DEBUG_VM) 128eefa864bSJoonsoo Kim /* 129eefa864bSJoonsoo Kim * The sanity checks the page allocator does upon freeing a 130eefa864bSJoonsoo Kim * page can reach here before the page_ext arrays are 131eefa864bSJoonsoo Kim * allocated when feeding a range of pages to the allocator 132eefa864bSJoonsoo Kim * for the first time during bootup or memory hotplug. 133eefa864bSJoonsoo Kim */ 134eefa864bSJoonsoo Kim if (unlikely(!base)) 135eefa864bSJoonsoo Kim return NULL; 136eefa864bSJoonsoo Kim #endif 1370b06bb3fSJoonsoo Kim index = pfn - round_down(node_start_pfn(page_to_nid(page)), 138eefa864bSJoonsoo Kim MAX_ORDER_NR_PAGES); 139980ac167SJoonsoo Kim return get_entry(base, index); 140eefa864bSJoonsoo Kim } 141eefa864bSJoonsoo Kim 142eefa864bSJoonsoo Kim static int __init alloc_node_page_ext(int nid) 143eefa864bSJoonsoo Kim { 144eefa864bSJoonsoo Kim struct page_ext *base; 145eefa864bSJoonsoo Kim unsigned long table_size; 146eefa864bSJoonsoo Kim unsigned long nr_pages; 147eefa864bSJoonsoo Kim 148eefa864bSJoonsoo Kim nr_pages = NODE_DATA(nid)->node_spanned_pages; 149eefa864bSJoonsoo Kim if (!nr_pages) 150eefa864bSJoonsoo Kim return 0; 151eefa864bSJoonsoo Kim 152eefa864bSJoonsoo Kim /* 153eefa864bSJoonsoo Kim * Need extra space if node range is not aligned with 154eefa864bSJoonsoo Kim * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm 155eefa864bSJoonsoo Kim * checks buddy's status, range could be out of exact node range. 156eefa864bSJoonsoo Kim */ 157eefa864bSJoonsoo Kim if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) || 158eefa864bSJoonsoo Kim !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) 159eefa864bSJoonsoo Kim nr_pages += MAX_ORDER_NR_PAGES; 160eefa864bSJoonsoo Kim 161980ac167SJoonsoo Kim table_size = get_entry_size() * nr_pages; 162eefa864bSJoonsoo Kim 163eefa864bSJoonsoo Kim base = memblock_virt_alloc_try_nid_nopanic( 164eefa864bSJoonsoo Kim table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), 165eefa864bSJoonsoo Kim BOOTMEM_ALLOC_ACCESSIBLE, nid); 166eefa864bSJoonsoo Kim if (!base) 167eefa864bSJoonsoo Kim return -ENOMEM; 168eefa864bSJoonsoo Kim NODE_DATA(nid)->node_page_ext = base; 169eefa864bSJoonsoo Kim total_usage += table_size; 170eefa864bSJoonsoo Kim return 0; 171eefa864bSJoonsoo Kim } 172eefa864bSJoonsoo Kim 173eefa864bSJoonsoo Kim void __init page_ext_init_flatmem(void) 174eefa864bSJoonsoo Kim { 175eefa864bSJoonsoo Kim 176eefa864bSJoonsoo Kim int nid, fail; 177eefa864bSJoonsoo Kim 178eefa864bSJoonsoo Kim if (!invoke_need_callbacks()) 179eefa864bSJoonsoo Kim return; 180eefa864bSJoonsoo Kim 181eefa864bSJoonsoo Kim for_each_online_node(nid) { 182eefa864bSJoonsoo Kim fail = alloc_node_page_ext(nid); 183eefa864bSJoonsoo Kim if (fail) 184eefa864bSJoonsoo Kim goto fail; 185eefa864bSJoonsoo Kim } 186eefa864bSJoonsoo Kim pr_info("allocated %ld bytes of page_ext\n", total_usage); 187eefa864bSJoonsoo Kim invoke_init_callbacks(); 188eefa864bSJoonsoo Kim return; 189eefa864bSJoonsoo Kim 190eefa864bSJoonsoo Kim fail: 191eefa864bSJoonsoo Kim pr_crit("allocation of page_ext failed.\n"); 192eefa864bSJoonsoo Kim panic("Out of memory"); 193eefa864bSJoonsoo Kim } 194eefa864bSJoonsoo Kim 195eefa864bSJoonsoo Kim #else /* CONFIG_FLAT_NODE_MEM_MAP */ 196eefa864bSJoonsoo Kim 197eefa864bSJoonsoo Kim struct page_ext *lookup_page_ext(struct page *page) 198eefa864bSJoonsoo Kim { 199eefa864bSJoonsoo Kim unsigned long pfn = page_to_pfn(page); 200eefa864bSJoonsoo Kim struct mem_section *section = __pfn_to_section(pfn); 201bd33ef36SVinayak Menon #if defined(CONFIG_DEBUG_VM) 202eefa864bSJoonsoo Kim /* 203eefa864bSJoonsoo Kim * The sanity checks the page allocator does upon freeing a 204eefa864bSJoonsoo Kim * page can reach here before the page_ext arrays are 205eefa864bSJoonsoo Kim * allocated when feeding a range of pages to the allocator 206eefa864bSJoonsoo Kim * for the first time during bootup or memory hotplug. 207eefa864bSJoonsoo Kim */ 208eefa864bSJoonsoo Kim if (!section->page_ext) 209eefa864bSJoonsoo Kim return NULL; 210eefa864bSJoonsoo Kim #endif 211980ac167SJoonsoo Kim return get_entry(section->page_ext, pfn); 212eefa864bSJoonsoo Kim } 213eefa864bSJoonsoo Kim 214eefa864bSJoonsoo Kim static void *__meminit alloc_page_ext(size_t size, int nid) 215eefa864bSJoonsoo Kim { 216eefa864bSJoonsoo Kim gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; 217eefa864bSJoonsoo Kim void *addr = NULL; 218eefa864bSJoonsoo Kim 219eefa864bSJoonsoo Kim addr = alloc_pages_exact_nid(nid, size, flags); 220eefa864bSJoonsoo Kim if (addr) { 221eefa864bSJoonsoo Kim kmemleak_alloc(addr, size, 1, flags); 222eefa864bSJoonsoo Kim return addr; 223eefa864bSJoonsoo Kim } 224eefa864bSJoonsoo Kim 225eefa864bSJoonsoo Kim addr = vzalloc_node(size, nid); 226eefa864bSJoonsoo Kim 227eefa864bSJoonsoo Kim return addr; 228eefa864bSJoonsoo Kim } 229eefa864bSJoonsoo Kim 230eefa864bSJoonsoo Kim static int __meminit init_section_page_ext(unsigned long pfn, int nid) 231eefa864bSJoonsoo Kim { 232eefa864bSJoonsoo Kim struct mem_section *section; 233eefa864bSJoonsoo Kim struct page_ext *base; 234eefa864bSJoonsoo Kim unsigned long table_size; 235eefa864bSJoonsoo Kim 236eefa864bSJoonsoo Kim section = __pfn_to_section(pfn); 237eefa864bSJoonsoo Kim 238eefa864bSJoonsoo Kim if (section->page_ext) 239eefa864bSJoonsoo Kim return 0; 240eefa864bSJoonsoo Kim 241980ac167SJoonsoo Kim table_size = get_entry_size() * PAGES_PER_SECTION; 242eefa864bSJoonsoo Kim base = alloc_page_ext(table_size, nid); 243eefa864bSJoonsoo Kim 244eefa864bSJoonsoo Kim /* 245eefa864bSJoonsoo Kim * The value stored in section->page_ext is (base - pfn) 246eefa864bSJoonsoo Kim * and it does not point to the memory block allocated above, 247eefa864bSJoonsoo Kim * causing kmemleak false positives. 248eefa864bSJoonsoo Kim */ 249eefa864bSJoonsoo Kim kmemleak_not_leak(base); 250eefa864bSJoonsoo Kim 251eefa864bSJoonsoo Kim if (!base) { 252eefa864bSJoonsoo Kim pr_err("page ext allocation failure\n"); 253eefa864bSJoonsoo Kim return -ENOMEM; 254eefa864bSJoonsoo Kim } 255eefa864bSJoonsoo Kim 256eefa864bSJoonsoo Kim /* 257eefa864bSJoonsoo Kim * The passed "pfn" may not be aligned to SECTION. For the calculation 258eefa864bSJoonsoo Kim * we need to apply a mask. 259eefa864bSJoonsoo Kim */ 260eefa864bSJoonsoo Kim pfn &= PAGE_SECTION_MASK; 261980ac167SJoonsoo Kim section->page_ext = (void *)base - get_entry_size() * pfn; 262eefa864bSJoonsoo Kim total_usage += table_size; 263eefa864bSJoonsoo Kim return 0; 264eefa864bSJoonsoo Kim } 265eefa864bSJoonsoo Kim #ifdef CONFIG_MEMORY_HOTPLUG 266eefa864bSJoonsoo Kim static void free_page_ext(void *addr) 267eefa864bSJoonsoo Kim { 268eefa864bSJoonsoo Kim if (is_vmalloc_addr(addr)) { 269eefa864bSJoonsoo Kim vfree(addr); 270eefa864bSJoonsoo Kim } else { 271eefa864bSJoonsoo Kim struct page *page = virt_to_page(addr); 272eefa864bSJoonsoo Kim size_t table_size; 273eefa864bSJoonsoo Kim 274980ac167SJoonsoo Kim table_size = get_entry_size() * PAGES_PER_SECTION; 275eefa864bSJoonsoo Kim 276eefa864bSJoonsoo Kim BUG_ON(PageReserved(page)); 277eefa864bSJoonsoo Kim free_pages_exact(addr, table_size); 278eefa864bSJoonsoo Kim } 279eefa864bSJoonsoo Kim } 280eefa864bSJoonsoo Kim 281eefa864bSJoonsoo Kim static void __free_page_ext(unsigned long pfn) 282eefa864bSJoonsoo Kim { 283eefa864bSJoonsoo Kim struct mem_section *ms; 284eefa864bSJoonsoo Kim struct page_ext *base; 285eefa864bSJoonsoo Kim 286eefa864bSJoonsoo Kim ms = __pfn_to_section(pfn); 287eefa864bSJoonsoo Kim if (!ms || !ms->page_ext) 288eefa864bSJoonsoo Kim return; 289980ac167SJoonsoo Kim base = get_entry(ms->page_ext, pfn); 290eefa864bSJoonsoo Kim free_page_ext(base); 291eefa864bSJoonsoo Kim ms->page_ext = NULL; 292eefa864bSJoonsoo Kim } 293eefa864bSJoonsoo Kim 294eefa864bSJoonsoo Kim static int __meminit online_page_ext(unsigned long start_pfn, 295eefa864bSJoonsoo Kim unsigned long nr_pages, 296eefa864bSJoonsoo Kim int nid) 297eefa864bSJoonsoo Kim { 298eefa864bSJoonsoo Kim unsigned long start, end, pfn; 299eefa864bSJoonsoo Kim int fail = 0; 300eefa864bSJoonsoo Kim 301eefa864bSJoonsoo Kim start = SECTION_ALIGN_DOWN(start_pfn); 302eefa864bSJoonsoo Kim end = SECTION_ALIGN_UP(start_pfn + nr_pages); 303eefa864bSJoonsoo Kim 304eefa864bSJoonsoo Kim if (nid == -1) { 305eefa864bSJoonsoo Kim /* 306eefa864bSJoonsoo Kim * In this case, "nid" already exists and contains valid memory. 307eefa864bSJoonsoo Kim * "start_pfn" passed to us is a pfn which is an arg for 308eefa864bSJoonsoo Kim * online__pages(), and start_pfn should exist. 309eefa864bSJoonsoo Kim */ 310eefa864bSJoonsoo Kim nid = pfn_to_nid(start_pfn); 311eefa864bSJoonsoo Kim VM_BUG_ON(!node_state(nid, N_ONLINE)); 312eefa864bSJoonsoo Kim } 313eefa864bSJoonsoo Kim 314eefa864bSJoonsoo Kim for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 315eefa864bSJoonsoo Kim if (!pfn_present(pfn)) 316eefa864bSJoonsoo Kim continue; 317eefa864bSJoonsoo Kim fail = init_section_page_ext(pfn, nid); 318eefa864bSJoonsoo Kim } 319eefa864bSJoonsoo Kim if (!fail) 320eefa864bSJoonsoo Kim return 0; 321eefa864bSJoonsoo Kim 322eefa864bSJoonsoo Kim /* rollback */ 323eefa864bSJoonsoo Kim for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 324eefa864bSJoonsoo Kim __free_page_ext(pfn); 325eefa864bSJoonsoo Kim 326eefa864bSJoonsoo Kim return -ENOMEM; 327eefa864bSJoonsoo Kim } 328eefa864bSJoonsoo Kim 329eefa864bSJoonsoo Kim static int __meminit offline_page_ext(unsigned long start_pfn, 330eefa864bSJoonsoo Kim unsigned long nr_pages, int nid) 331eefa864bSJoonsoo Kim { 332eefa864bSJoonsoo Kim unsigned long start, end, pfn; 333eefa864bSJoonsoo Kim 334eefa864bSJoonsoo Kim start = SECTION_ALIGN_DOWN(start_pfn); 335eefa864bSJoonsoo Kim end = SECTION_ALIGN_UP(start_pfn + nr_pages); 336eefa864bSJoonsoo Kim 337eefa864bSJoonsoo Kim for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 338eefa864bSJoonsoo Kim __free_page_ext(pfn); 339eefa864bSJoonsoo Kim return 0; 340eefa864bSJoonsoo Kim 341eefa864bSJoonsoo Kim } 342eefa864bSJoonsoo Kim 343eefa864bSJoonsoo Kim static int __meminit page_ext_callback(struct notifier_block *self, 344eefa864bSJoonsoo Kim unsigned long action, void *arg) 345eefa864bSJoonsoo Kim { 346eefa864bSJoonsoo Kim struct memory_notify *mn = arg; 347eefa864bSJoonsoo Kim int ret = 0; 348eefa864bSJoonsoo Kim 349eefa864bSJoonsoo Kim switch (action) { 350eefa864bSJoonsoo Kim case MEM_GOING_ONLINE: 351eefa864bSJoonsoo Kim ret = online_page_ext(mn->start_pfn, 352eefa864bSJoonsoo Kim mn->nr_pages, mn->status_change_nid); 353eefa864bSJoonsoo Kim break; 354eefa864bSJoonsoo Kim case MEM_OFFLINE: 355eefa864bSJoonsoo Kim offline_page_ext(mn->start_pfn, 356eefa864bSJoonsoo Kim mn->nr_pages, mn->status_change_nid); 357eefa864bSJoonsoo Kim break; 358eefa864bSJoonsoo Kim case MEM_CANCEL_ONLINE: 359eefa864bSJoonsoo Kim offline_page_ext(mn->start_pfn, 360eefa864bSJoonsoo Kim mn->nr_pages, mn->status_change_nid); 361eefa864bSJoonsoo Kim break; 362eefa864bSJoonsoo Kim case MEM_GOING_OFFLINE: 363eefa864bSJoonsoo Kim break; 364eefa864bSJoonsoo Kim case MEM_ONLINE: 365eefa864bSJoonsoo Kim case MEM_CANCEL_OFFLINE: 366eefa864bSJoonsoo Kim break; 367eefa864bSJoonsoo Kim } 368eefa864bSJoonsoo Kim 369eefa864bSJoonsoo Kim return notifier_from_errno(ret); 370eefa864bSJoonsoo Kim } 371eefa864bSJoonsoo Kim 372eefa864bSJoonsoo Kim #endif 373eefa864bSJoonsoo Kim 374eefa864bSJoonsoo Kim void __init page_ext_init(void) 375eefa864bSJoonsoo Kim { 376eefa864bSJoonsoo Kim unsigned long pfn; 377eefa864bSJoonsoo Kim int nid; 378eefa864bSJoonsoo Kim 379eefa864bSJoonsoo Kim if (!invoke_need_callbacks()) 380eefa864bSJoonsoo Kim return; 381eefa864bSJoonsoo Kim 382eefa864bSJoonsoo Kim for_each_node_state(nid, N_MEMORY) { 383eefa864bSJoonsoo Kim unsigned long start_pfn, end_pfn; 384eefa864bSJoonsoo Kim 385eefa864bSJoonsoo Kim start_pfn = node_start_pfn(nid); 386eefa864bSJoonsoo Kim end_pfn = node_end_pfn(nid); 387eefa864bSJoonsoo Kim /* 388eefa864bSJoonsoo Kim * start_pfn and end_pfn may not be aligned to SECTION and the 389eefa864bSJoonsoo Kim * page->flags of out of node pages are not initialized. So we 390eefa864bSJoonsoo Kim * scan [start_pfn, the biggest section's pfn < end_pfn) here. 391eefa864bSJoonsoo Kim */ 392eefa864bSJoonsoo Kim for (pfn = start_pfn; pfn < end_pfn; 393eefa864bSJoonsoo Kim pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { 394eefa864bSJoonsoo Kim 395eefa864bSJoonsoo Kim if (!pfn_valid(pfn)) 396eefa864bSJoonsoo Kim continue; 397eefa864bSJoonsoo Kim /* 398eefa864bSJoonsoo Kim * Nodes's pfns can be overlapping. 399eefa864bSJoonsoo Kim * We know some arch can have a nodes layout such as 400eefa864bSJoonsoo Kim * -------------pfn--------------> 401eefa864bSJoonsoo Kim * N0 | N1 | N2 | N0 | N1 | N2|.... 402fe53ca54SYang Shi * 403fe53ca54SYang Shi * Take into account DEFERRED_STRUCT_PAGE_INIT. 404eefa864bSJoonsoo Kim */ 405fe53ca54SYang Shi if (early_pfn_to_nid(pfn) != nid) 406eefa864bSJoonsoo Kim continue; 407eefa864bSJoonsoo Kim if (init_section_page_ext(pfn, nid)) 408eefa864bSJoonsoo Kim goto oom; 409*0fc542b7SVlastimil Babka cond_resched(); 410eefa864bSJoonsoo Kim } 411eefa864bSJoonsoo Kim } 412eefa864bSJoonsoo Kim hotplug_memory_notifier(page_ext_callback, 0); 413eefa864bSJoonsoo Kim pr_info("allocated %ld bytes of page_ext\n", total_usage); 414eefa864bSJoonsoo Kim invoke_init_callbacks(); 415eefa864bSJoonsoo Kim return; 416eefa864bSJoonsoo Kim 417eefa864bSJoonsoo Kim oom: 418eefa864bSJoonsoo Kim panic("Out of memory"); 419eefa864bSJoonsoo Kim } 420eefa864bSJoonsoo Kim 421eefa864bSJoonsoo Kim void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) 422eefa864bSJoonsoo Kim { 423eefa864bSJoonsoo Kim } 424eefa864bSJoonsoo Kim 425eefa864bSJoonsoo Kim #endif 426