Lines Matching +full:high +full:- +full:end
1 // SPDX-License-Identifier: GPL-2.0-only
39 #include <linux/fault-inject.h>
63 /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
72 * reporting it and marking it "reported" - it only skips notifying
81 * page shuffling (relevant code - e.g., memory onlining - is expected to
84 * Note: No code should rely on this flag for correctness - it's purely
94 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
107 /* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
118 * interfered with and a high priority task cannot preempt the allocator.
129 * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
137 spin_lock(&_ret->member); \
146 if (!spin_trylock(&_ret->member)) { \
155 spin_unlock(&ptr->member); \
222 * 1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
223 * 1G machine -> (16M dma, 784M normal, 224M high)
228 * TBD: should special case ZONE_DMA32 machines here - in those we normally
276 int user_min_free_kbytes = -1;
301 * During boot we initialize deferred pages on-demand, as needed, but once
342 return page_zone(page)->pageblock_flags; in get_pageblock_bitmap()
349 pfn &= (PAGES_PER_SECTION-1); in pfn_to_bitidx()
351 pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn); in pfn_to_bitidx()
379 *bitidx &= (BITS_PER_LONG - 1); in get_pfnblock_bitmap_bitidx()
385 * __get_pfnblock_flags_mask - Return the requested group of flags for
412 * get_pfnblock_bit - Check if a standalone bit of a pageblock is set
434 * get_pfnblock_migratetype - Return the migratetype of a pageblock
459 * __set_pfnblock_flags_mask - Set the requested group of flags for
484 * set_pfnblock_bit - Set a standalone bit of a pageblock
504 * clear_pfnblock_bit - Clear a standalone bit of a pageblock
524 * set_pageblock_migratetype - Set the migratetype of a pageblock
586 start_pfn = zone->zone_start_pfn; in page_outside_zone_boundaries()
587 sp = zone->spanned_pages; in page_outside_zone_boundaries()
592 pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n", in page_outside_zone_boundaries()
593 pfn, zone_to_nid(zone), zone->name, in page_outside_zone_boundaries()
645 current->comm, page_to_pfn(page)); in bad_page()
702 * Higher-order pages are called "compound pages". They are structured thusly:
707 * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
709 * The first tail page's ->compound_order holds the order of allocation.
710 * This usage means that zero-order pages may not be compound.
734 struct capture_control *capc = current->capture_control; in task_capc()
737 !(current->flags & PF_KTHREAD) && in task_capc()
738 !capc->page && in task_capc()
739 capc->cc->zone == zone ? capc : NULL; in task_capc()
746 if (!capc || order != capc->cc->order) in compaction_capture()
758 * and vice-versa but no more than normal fallback logic which can in compaction_capture()
759 * have trouble finding a high-order free page. in compaction_capture()
762 capc->cc->migratetype != MIGRATE_MOVABLE) in compaction_capture()
765 if (migratetype != capc->cc->migratetype) in compaction_capture()
766 trace_mm_page_alloc_extfrag(page, capc->cc->order, order, in compaction_capture()
767 capc->cc->migratetype, migratetype); in compaction_capture()
769 capc->page = page; in compaction_capture()
790 lockdep_assert_held(&zone->lock); in account_freepages()
800 WRITE_ONCE(zone->nr_free_highatomic, in account_freepages()
801 zone->nr_free_highatomic + nr_pages); in account_freepages()
809 struct free_area *area = &zone->free_area[order]; in __add_to_free_list()
817 list_add_tail(&page->buddy_list, &area->free_list[migratetype]); in __add_to_free_list()
819 list_add(&page->buddy_list, &area->free_list[migratetype]); in __add_to_free_list()
820 area->nr_free++; in __add_to_free_list()
828 * of the list - so the moved pages won't immediately be considered for
834 struct free_area *area = &zone->free_area[order]; in move_to_free_list()
842 list_move_tail(&page->buddy_list, &area->free_list[new_mt]); in move_to_free_list()
844 account_freepages(zone, -nr_pages, old_mt); in move_to_free_list()
850 nr_pages = -nr_pages; in move_to_free_list()
868 list_del(&page->buddy_list); in __del_page_from_free_list()
871 zone->free_area[order].nr_free--; in __del_page_from_free_list()
874 __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, -nr_pages); in __del_page_from_free_list()
881 account_freepages(zone, -(1 << order), migratetype); in del_page_from_free_list()
887 return list_first_entry_or_null(&area->free_list[migratetype], in get_page_from_free_area()
893 * of the next-higher order is free. If it is, it's possible
897 * as a 2-level higher order page
906 if (order >= MAX_PAGE_ORDER - 1) in buddy_merge_likely()
910 higher_page = page + (higher_page_pfn - pfn); in buddy_merge_likely()
919 * The concept of a buddy system is to maintain direct-mapped table
924 * At a high level, all that happens here is marking the table entry
937 * -- nyc
952 VM_BUG_ON_PAGE(page->flags.f & PAGE_FLAGS_CHECK_AT_PREP, page); in __free_one_page()
954 VM_BUG_ON(migratetype == -1); in __free_one_page()
955 VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); in __free_one_page()
964 account_freepages(zone, -(1 << order), migratetype); in __free_one_page()
999 * expand() down the line puts the sub-blocks in __free_one_page()
1006 page = page + (combined_pfn - pfn); in __free_one_page()
1036 if (unlikely(atomic_read(&page->_mapcount) != -1)) in page_expected_state()
1039 if (unlikely((unsigned long)page->mapping | in page_expected_state()
1042 page->memcg_data | in page_expected_state()
1045 (page->flags.f & check_flags))) in page_expected_state()
1055 if (unlikely(atomic_read(&page->_mapcount) != -1)) in page_bad_reason()
1057 if (unlikely(page->mapping != NULL)) in page_bad_reason()
1058 bad_reason = "non-NULL mapping"; in page_bad_reason()
1061 if (unlikely(page->flags.f & flags)) { in page_bad_reason()
1068 if (unlikely(page->memcg_data)) in page_bad_reason()
1097 * We rely page->lru.next never has bit 0 set, unless the page in free_tail_page_prepare()
1098 * is PageTail(). Let's make sure that's true even for poisoned ->lru. in free_tail_page_prepare()
1106 switch (page - head_page) { in free_tail_page_prepare()
1108 /* the first tail page: these may be in place of ->mapping */ in free_tail_page_prepare()
1114 unlikely(atomic_read(&folio->_nr_pages_mapped))) { in free_tail_page_prepare()
1119 if (unlikely(folio->_mm_id_mapcount[0] != -1)) { in free_tail_page_prepare()
1123 if (unlikely(folio->_mm_id_mapcount[1] != -1)) { in free_tail_page_prepare()
1129 if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) { in free_tail_page_prepare()
1133 if (unlikely(atomic_read(&folio->_pincount))) { in free_tail_page_prepare()
1140 /* the second tail page: deferred_list overlaps ->mapping */ in free_tail_page_prepare()
1141 if (unlikely(!list_empty(&folio->_deferred_list))) { in free_tail_page_prepare()
1146 if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) { in free_tail_page_prepare()
1150 if (unlikely(atomic_read(&folio->_pincount))) { in free_tail_page_prepare()
1157 /* the third tail page: hugetlb specifics overlap ->mappings */ in free_tail_page_prepare()
1162 if (page->mapping != TAIL_MAPPING) { in free_tail_page_prepare()
1178 page->mapping = NULL; in free_tail_page_prepare()
1187 * Tag-based KASAN modes skip pages freed via deferred memory initialization
1189 * 2. For tag-based KASAN modes: the page has a match-all KASAN tag, indicating
1192 * Pages will have match-all tags in the following circumstances:
1209 * on-demand allocation and then freed again before the deferred pages
1255 alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); in __pgalloc_tag_add()
1292 this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); in pgalloc_tag_sub_pages()
1331 zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); in free_pages_prepare()
1354 * avoid checking PageCompound for order-0 pages. in free_pages_prepare()
1362 folio->_nr_pages = 0; in free_pages_prepare()
1374 (page + i)->flags.f &= ~PAGE_FLAGS_CHECK_AT_PREP; in free_pages_prepare()
1378 mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); in free_pages_prepare()
1379 folio->mapping = NULL; in free_pages_prepare()
1383 page->page_type = UINT_MAX; in free_pages_prepare()
1393 page->flags.f &= ~PAGE_FLAGS_CHECK_AT_PREP; in free_pages_prepare()
1412 * With hardware tag-based KASAN, memory tags must be set before the in free_pages_prepare()
1454 count = min(pcp->count, count); in free_pcppages_bulk()
1457 pindex = pindex - 1; in free_pcppages_bulk()
1459 spin_lock_irqsave(&zone->lock, flags); in free_pcppages_bulk()
1465 /* Remove pages from lists in a round-robin fashion. */ in free_pcppages_bulk()
1467 if (++pindex > NR_PCP_LISTS - 1) in free_pcppages_bulk()
1469 list = &pcp->lists[pindex]; in free_pcppages_bulk()
1483 list_del(&page->pcp_list); in free_pcppages_bulk()
1484 count -= nr_pages; in free_pcppages_bulk()
1485 pcp->count -= nr_pages; in free_pcppages_bulk()
1492 spin_unlock_irqrestore(&zone->lock, flags); in free_pcppages_bulk()
1495 /* Split a multi-block free page into its individual pageblocks. */
1499 unsigned long end = pfn + (1 << order); in split_large_buddy() local
1513 if (pfn == end) in split_large_buddy()
1523 page->private = order; in add_page_to_zone_llist()
1525 llist_add(&page->pcp_llist, &zone->trylock_free_pages); in add_page_to_zone_llist()
1536 if (!spin_trylock_irqsave(&zone->lock, flags)) { in free_one_page()
1541 spin_lock_irqsave(&zone->lock, flags); in free_one_page()
1545 llhead = &zone->trylock_free_pages; in free_one_page()
1552 unsigned int p_order = p->private; in free_one_page()
1559 spin_unlock_irqrestore(&zone->lock, flags); in free_one_page()
1605 atomic_long_add(nr_pages, &page_zone(page)->managed_pages); in __free_pages_core()
1637 * Note: the function may return non-NULL struct page even for a page block
1640 * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
1653 end_pfn--; in __pageblock_pfn_to_page()
1686 * -- nyc
1689 int high, int migratetype) in expand() argument
1691 unsigned int size = 1 << high; in expand()
1694 while (high > low) { in expand()
1695 high--; in expand()
1705 if (set_page_guard(zone, &page[size], high)) in expand()
1708 __add_to_free_list(&page[size], zone, high, migratetype, false); in expand()
1709 set_buddy_order(&page[size], high); in expand()
1718 int high, int migratetype) in page_del_and_expand() argument
1720 int nr_pages = 1 << high; in page_del_and_expand()
1722 __del_page_from_free_list(page, zone, high, migratetype); in page_del_and_expand()
1723 nr_pages -= expand(zone, page, low, high, migratetype); in page_del_and_expand()
1724 account_freepages(zone, -nr_pages, migratetype); in page_del_and_expand()
1774 /* Skip, if hardware tag-based KASAN is not enabled. */ in should_skip_kasan_unpoison()
1779 * With hardware tag-based KASAN enabled, skip if this has been in should_skip_kasan_unpoison()
1787 /* Don't skip, if hardware tag-based KASAN is not enabled. */ in should_skip_init()
1791 /* For hardware tag-based KASAN, skip if requested. */ in should_skip_init()
1884 area = &(zone->free_area[current_order]); in __rmqueue_smallest()
1907 static int fallbacks[MIGRATE_PCPTYPES][MIGRATE_PCPTYPES - 1] = {
1936 VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1)); in __move_freepages_block()
1965 unsigned long pfn, start, end; in prep_move_freepages_block() local
1969 end = pageblock_end_pfn(pfn); in prep_move_freepages_block()
1980 if (!zone_spans_pfn(zone, end - 1)) in prep_move_freepages_block()
1988 for (pfn = start; pfn < end;) { in prep_move_freepages_block()
2018 return -1; in move_freepages_block()
2032 * If start_pfn is not an order-0 PageBuddy, next PageBuddy containing in find_large_buddy()
2034 * the order with __ffs(start_pfn). If start_pfn is order-0 PageBuddy, in find_large_buddy()
2067 * __move_freepages_block_isolate - move free pages in block for page isolation
2120 /* Use MIGRATETYPE_MASK to get non-isolate migratetype */ in __move_freepages_block_isolate()
2152 int nr_pageblocks = 1 << (start_order - pageblock_order); in change_pageblock_range()
2154 while (nr_pageblocks--) { in change_pageblock_range()
2175 max_boost = mult_frac(zone->_watermark[WMARK_HIGH], in boost_watermark()
2179 * high watermark may be uninitialised if fragmentation occurs in boost_watermark()
2191 zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, in boost_watermark()
2247 * we would do this whole-block claiming. This would help to reduce
2256 return -2; in find_suitable_fallback()
2258 if (area->nr_free == 0) in find_suitable_fallback()
2259 return -1; in find_suitable_fallback()
2261 for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) { in find_suitable_fallback()
2268 return -1; in find_suitable_fallback()
2303 set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); in try_to_claim_block()
2320 * to MOVABLE pageblock, consider all non-movable pages as in try_to_claim_block()
2323 * exact migratetype of non-movable pages. in try_to_claim_block()
2327 - (free_pages + movable_pages); in try_to_claim_block()
2335 if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || in try_to_claim_block()
2377 --current_order) { in __rmqueue_claim()
2378 area = &(zone->free_area[current_order]); in __rmqueue_claim()
2383 if (fallback_mt == -1) in __rmqueue_claim()
2387 if (fallback_mt == -2) in __rmqueue_claim()
2417 area = &(zone->free_area[current_order]); in __rmqueue_steal()
2420 if (fallback_mt == -1) in __rmqueue_steal()
2442 * Call me with the zone->lock already held.
2470 * a loop with the zone->lock held, meaning the freelists are in __rmqueue()
2523 if (!spin_trylock_irqsave(&zone->lock, flags)) in rmqueue_bulk()
2526 spin_lock_irqsave(&zone->lock, flags); in rmqueue_bulk()
2544 list_add_tail(&page->pcp_list, list); in rmqueue_bulk()
2546 spin_unlock_irqrestore(&zone->lock, flags); in rmqueue_bulk()
2552 * Called from the vmstat counter updater to decay the PCP high.
2560 high_min = READ_ONCE(pcp->high_min); in decay_pcp_high()
2561 batch = READ_ONCE(pcp->batch); in decay_pcp_high()
2563 * Decrease pcp->high periodically to try to free possible in decay_pcp_high()
2565 * control latency. This caps pcp->high decrement too. in decay_pcp_high()
2567 if (pcp->high > high_min) { in decay_pcp_high()
2568 pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX), in decay_pcp_high()
2569 pcp->high - (pcp->high >> 3), high_min); in decay_pcp_high()
2570 if (pcp->high > high_min) in decay_pcp_high()
2574 to_drain = pcp->count - pcp->high; in decay_pcp_high()
2576 spin_lock(&pcp->lock); in decay_pcp_high()
2578 spin_unlock(&pcp->lock); in decay_pcp_high()
2595 batch = READ_ONCE(pcp->batch); in drain_zone_pages()
2596 to_drain = min(pcp->count, batch); in drain_zone_pages()
2598 spin_lock(&pcp->lock); in drain_zone_pages()
2600 spin_unlock(&pcp->lock); in drain_zone_pages()
2610 struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); in drain_pages_zone()
2614 spin_lock(&pcp->lock); in drain_pages_zone()
2615 count = pcp->count; in drain_pages_zone()
2618 pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); in drain_pages_zone()
2621 count -= to_drain; in drain_pages_zone()
2623 spin_unlock(&pcp->lock); in drain_pages_zone()
2640 * Spill all of this CPU's per-cpu pages back into the buddy allocator.
2657 * not empty. The check for non-emptiness can however race with a free to
2658 * pcplist that has not yet increased the pcp->count from 0 to 1. Callers
2701 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); in __drain_all_pages()
2702 if (pcp->count) in __drain_all_pages()
2706 pcp = per_cpu_ptr(z->per_cpu_pageset, cpu); in __drain_all_pages()
2707 if (pcp->count) { in __drain_all_pages()
2731 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
2733 * When zone parameter is non-NULL, spill just the single zone's pages.
2740 static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free_high) in nr_pcp_free() argument
2744 /* Free as much as possible if batch freeing high-order pages. */ in nr_pcp_free()
2746 return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX); in nr_pcp_free()
2749 if (unlikely(high < batch)) in nr_pcp_free()
2752 /* Leave at least pcp->batch pages on the list */ in nr_pcp_free()
2754 max_nr_free = high - batch; in nr_pcp_free()
2760 batch = clamp_t(int, pcp->free_count, min_nr_free, max_nr_free); in nr_pcp_free()
2768 int high, high_min, high_max; in nr_pcp_high() local
2770 high_min = READ_ONCE(pcp->high_min); in nr_pcp_high()
2771 high_max = READ_ONCE(pcp->high_max); in nr_pcp_high()
2772 high = pcp->high = clamp(pcp->high, high_min, high_max); in nr_pcp_high()
2774 if (unlikely(!high)) in nr_pcp_high()
2778 pcp->high = max(high - (batch << CONFIG_PCP_BATCH_SCALE_MAX), in nr_pcp_high()
2787 if (test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags)) { in nr_pcp_high()
2788 int free_count = max_t(int, pcp->free_count, batch); in nr_pcp_high()
2790 pcp->high = max(high - free_count, high_min); in nr_pcp_high()
2791 return min(batch << 2, pcp->high); in nr_pcp_high()
2795 return high; in nr_pcp_high()
2797 if (test_bit(ZONE_BELOW_HIGH, &zone->flags)) { in nr_pcp_high()
2798 int free_count = max_t(int, pcp->free_count, batch); in nr_pcp_high()
2800 pcp->high = max(high - free_count, high_min); in nr_pcp_high()
2801 high = max(pcp->count, high_min); in nr_pcp_high()
2802 } else if (pcp->count >= high) { in nr_pcp_high()
2803 int need_high = pcp->free_count + batch; in nr_pcp_high()
2805 /* pcp->high should be large enough to hold batch freed pages */ in nr_pcp_high()
2806 if (pcp->high < need_high) in nr_pcp_high()
2807 pcp->high = clamp(need_high, high_min, high_max); in nr_pcp_high()
2810 return high; in nr_pcp_high()
2817 int high, batch; in free_frozen_page_commit() local
2826 pcp->alloc_factor >>= 1; in free_frozen_page_commit()
2829 list_add(&page->pcp_list, &pcp->lists[pindex]); in free_frozen_page_commit()
2830 pcp->count += 1 << order; in free_frozen_page_commit()
2832 batch = READ_ONCE(pcp->batch); in free_frozen_page_commit()
2834 * As high-order pages other than THP's stored on PCP can contribute in free_frozen_page_commit()
2840 free_high = (pcp->free_count >= (batch + pcp->high_min / 2) && in free_frozen_page_commit()
2841 (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) && in free_frozen_page_commit()
2842 (!(pcp->flags & PCPF_FREE_HIGH_BATCH) || in free_frozen_page_commit()
2843 pcp->count >= batch)); in free_frozen_page_commit()
2844 pcp->flags |= PCPF_PREV_FREE_HIGH_ORDER; in free_frozen_page_commit()
2845 } else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) { in free_frozen_page_commit()
2846 pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER; in free_frozen_page_commit()
2848 if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX)) in free_frozen_page_commit()
2849 pcp->free_count += (1 << order); in free_frozen_page_commit()
2853 * Do not attempt to take a zone lock. Let pcp->count get in free_frozen_page_commit()
2854 * over high mark temporarily. in free_frozen_page_commit()
2859 high = nr_pcp_high(pcp, zone, batch, free_high); in free_frozen_page_commit()
2860 if (pcp->count < high) in free_frozen_page_commit()
2863 free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high), in free_frozen_page_commit()
2865 if (test_bit(ZONE_BELOW_HIGH, &zone->flags) && in free_frozen_page_commit()
2868 struct pglist_data *pgdat = zone->zone_pgdat; in free_frozen_page_commit()
2869 clear_bit(ZONE_BELOW_HIGH, &zone->flags); in free_frozen_page_commit()
2878 if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES && in free_frozen_page_commit()
2879 next_memory_node(pgdat->node_id) < MAX_NUMNODES) in free_frozen_page_commit()
2880 atomic_set(&pgdat->kswapd_failures, 0); in free_frozen_page_commit()
2927 pcp = pcp_spin_trylock(zone->per_cpu_pageset); in __free_frozen_pages()
2953 for (i = 0, j = 0; i < folios->nr; i++) { in free_unref_folios()
2954 struct folio *folio = folios->folios[i]; in free_unref_folios()
2958 if (!free_pages_prepare(&folio->page, order)) in free_unref_folios()
2965 free_one_page(folio_zone(folio), &folio->page, in free_unref_folios()
2969 folio->private = (void *)(unsigned long)order; in free_unref_folios()
2971 folios->folios[j] = folio; in free_unref_folios()
2974 folios->nr = j; in free_unref_folios()
2976 for (i = 0; i < folios->nr; i++) { in free_unref_folios()
2977 struct folio *folio = folios->folios[i]; in free_unref_folios()
2980 unsigned int order = (unsigned long)folio->private; in free_unref_folios()
2983 folio->private = NULL; in free_unref_folios()
2984 migratetype = get_pfnblock_migratetype(&folio->page, pfn); in free_unref_folios()
3001 free_one_page(zone, &folio->page, pfn, in free_unref_folios()
3011 pcp = pcp_spin_trylock(zone->per_cpu_pageset); in free_unref_folios()
3014 free_one_page(zone, &folio->page, pfn, in free_unref_folios()
3022 * Non-isolated types over MIGRATE_PCPTYPES get added in free_unref_folios()
3028 trace_mm_page_free_batched(&folio->page); in free_unref_folios()
3029 free_frozen_page_commit(zone, pcp, &folio->page, migratetype, in free_unref_folios()
3041 * split_page takes a non-compound higher-order page, and splits it into
3042 * n (1<<order) sub-pages: page[0..n]
3043 * Each sub-page must be freed individually.
3072 * emulate a high-order watermark check with a raised order-0 in __isolate_free_page()
3073 * watermark, because we already know our high-order page in __isolate_free_page()
3076 watermark = zone->_watermark[WMARK_MIN] + (1UL << order); in __isolate_free_page()
3087 if (order >= pageblock_order - 1) { in __isolate_free_page()
3088 struct page *endpage = page + (1 << order) - 1; in __isolate_free_page()
3105 * __putback_isolated_page - Return a now-isolated page back where we got it
3118 lockdep_assert_held(&zone->lock); in __putback_isolated_page()
3162 if (!spin_trylock_irqsave(&zone->lock, flags)) in rmqueue_buddy()
3165 spin_lock_irqsave(&zone->lock, flags); in rmqueue_buddy()
3176 * order-0 (atomic) allocs access to HIGHATOMIC in rmqueue_buddy()
3178 * high-order atomic allocation in the future. in rmqueue_buddy()
3184 spin_unlock_irqrestore(&zone->lock, flags); in rmqueue_buddy()
3188 spin_unlock_irqrestore(&zone->lock, flags); in rmqueue_buddy()
3199 int high, base_batch, batch, max_nr_alloc; in nr_pcp_alloc() local
3202 base_batch = READ_ONCE(pcp->batch); in nr_pcp_alloc()
3203 high_min = READ_ONCE(pcp->high_min); in nr_pcp_alloc()
3204 high_max = READ_ONCE(pcp->high_max); in nr_pcp_alloc()
3205 high = pcp->high = clamp(pcp->high, high_min, high_max); in nr_pcp_alloc()
3208 if (unlikely(high < base_batch)) in nr_pcp_alloc()
3214 batch = (base_batch << pcp->alloc_factor); in nr_pcp_alloc()
3217 * If we had larger pcp->high, we could avoid to allocate from in nr_pcp_alloc()
3220 if (high_min != high_max && !test_bit(ZONE_BELOW_HIGH, &zone->flags)) in nr_pcp_alloc()
3221 high = pcp->high = min(high + batch, high_max); in nr_pcp_alloc()
3224 max_nr_alloc = max(high - pcp->count - base_batch, base_batch); in nr_pcp_alloc()
3227 * subsequent allocation of order-0 pages without any freeing. in nr_pcp_alloc()
3230 pcp->alloc_factor < CONFIG_PCP_BATCH_SCALE_MAX) in nr_pcp_alloc()
3231 pcp->alloc_factor++; in nr_pcp_alloc()
3247 /* Remove page from the per-cpu list, caller must protect the list */
3266 pcp->count += alloced << order; in __rmqueue_pcplist()
3272 list_del(&page->pcp_list); in __rmqueue_pcplist()
3273 pcp->count -= 1 << order; in __rmqueue_pcplist()
3279 /* Lock and remove page from the per-cpu list */
3291 pcp = pcp_spin_trylock(zone->per_cpu_pageset); in rmqueue_pcplist()
3302 pcp->free_count >>= 1; in rmqueue_pcplist()
3303 list = &pcp->lists[order_to_pindex(migratetype, order)]; in rmqueue_pcplist()
3316 * Use pcplists for THP or "cheap" high-order allocations.
3347 unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) { in rmqueue()
3348 clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); in rmqueue()
3358 * exclusive use of high-order atomic allocations if there are no
3371 * Check is race-prone but harmless. in reserve_highatomic_pageblock()
3376 if (zone->nr_reserved_highatomic >= max_managed) in reserve_highatomic_pageblock()
3379 spin_lock_irqsave(&zone->lock, flags); in reserve_highatomic_pageblock()
3382 if (zone->nr_reserved_highatomic >= max_managed) in reserve_highatomic_pageblock()
3392 if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) in reserve_highatomic_pageblock()
3394 zone->nr_reserved_highatomic += pageblock_nr_pages; in reserve_highatomic_pageblock()
3397 zone->nr_reserved_highatomic += 1 << order; in reserve_highatomic_pageblock()
3401 spin_unlock_irqrestore(&zone->lock, flags); in reserve_highatomic_pageblock()
3406 * potentially hurts the reliability of high-order allocations when under
3416 struct zonelist *zonelist = ac->zonelist; in unreserve_highatomic_pageblock()
3424 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, in unreserve_highatomic_pageblock()
3425 ac->nodemask) { in unreserve_highatomic_pageblock()
3428 * is really high. in unreserve_highatomic_pageblock()
3430 if (!force && zone->nr_reserved_highatomic <= in unreserve_highatomic_pageblock()
3434 spin_lock_irqsave(&zone->lock, flags); in unreserve_highatomic_pageblock()
3436 struct free_area *area = &(zone->free_area[order]); in unreserve_highatomic_pageblock()
3446 * locking could inadvertently allow a per-cpu in unreserve_highatomic_pageblock()
3451 if (WARN_ON_ONCE(size > zone->nr_reserved_highatomic)) in unreserve_highatomic_pageblock()
3452 size = zone->nr_reserved_highatomic; in unreserve_highatomic_pageblock()
3453 zone->nr_reserved_highatomic -= size; in unreserve_highatomic_pageblock()
3456 * Convert to ac->migratetype and avoid the normal in unreserve_highatomic_pageblock()
3467 ac->migratetype); in unreserve_highatomic_pageblock()
3471 ac->migratetype); in unreserve_highatomic_pageblock()
3473 ac->migratetype); in unreserve_highatomic_pageblock()
3480 WARN_ON_ONCE(ret == -1); in unreserve_highatomic_pageblock()
3482 spin_unlock_irqrestore(&zone->lock, flags); in unreserve_highatomic_pageblock()
3486 spin_unlock_irqrestore(&zone->lock, flags); in unreserve_highatomic_pageblock()
3495 long unusable_free = (1 << order) - 1; in __zone_watermark_unusable_free()
3502 unusable_free += READ_ONCE(z->nr_free_highatomic); in __zone_watermark_unusable_free()
3514 * Return true if free base pages are above 'mark'. For high-order checks it
3515 * will return true of the order-0 watermark is reached and there is at least
3526 /* free_pages may go negative - that's OK */ in __zone_watermark_ok()
3527 free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); in __zone_watermark_ok()
3535 min -= min / 2; in __zone_watermark_ok()
3538 * Non-blocking allocations (e.g. GFP_ATOMIC) can in __zone_watermark_ok()
3540 * non-blocking allocations requests such as GFP_NOWAIT in __zone_watermark_ok()
3545 min -= min / 4; in __zone_watermark_ok()
3552 * makes during the free path will be small and short-lived. in __zone_watermark_ok()
3555 min -= min / 2; in __zone_watermark_ok()
3559 * Check watermarks for an order-0 allocation request. If these in __zone_watermark_ok()
3560 * are not met, then a high-order request also cannot go ahead in __zone_watermark_ok()
3563 if (free_pages <= min + z->lowmem_reserve[highest_zoneidx]) in __zone_watermark_ok()
3566 /* If this is an order-0 request then the watermark is fine */ in __zone_watermark_ok()
3570 /* For a high-order request, check at least one suitable page is free */ in __zone_watermark_ok()
3572 struct free_area *area = &z->free_area[o]; in __zone_watermark_ok()
3575 if (!area->nr_free) in __zone_watermark_ok()
3613 * Fast check for order-0 only. If this fails then the reserves in zone_watermark_fast()
3623 /* reserved may over estimate high-atomic reserves. */ in zone_watermark_fast()
3624 usable_free -= min(usable_free, reserved); in zone_watermark_fast()
3625 if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) in zone_watermark_fast()
3634 * Ignore watermark boosting for __GFP_HIGH order-0 allocations in zone_watermark_fast()
3639 if (unlikely(!order && (alloc_flags & ALLOC_MIN_RESERVE) && z->watermark_boost in zone_watermark_fast()
3641 mark = z->_watermark[WMARK_MIN]; in zone_watermark_fast()
3697 * the pointer is within zone->zone_pgdat->node_zones[]. Also assume in alloc_flags_nofragment()
3700 BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1); in alloc_flags_nofragment()
3701 if (nr_online_nodes > 1 && !populated_zone(--zone)) in alloc_flags_nofragment()
3742 z = ac->preferred_zoneref; in get_page_from_freelist()
3743 for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx, in get_page_from_freelist()
3744 ac->nodemask) { in get_page_from_freelist()
3758 * lowmem reserves and high watermark so that kswapd in get_page_from_freelist()
3763 * exceed the per-node dirty limit in the slowpath in get_page_from_freelist()
3769 * dirty-throttling and the flusher threads. in get_page_from_freelist()
3771 if (ac->spread_dirty_pages) { in get_page_from_freelist()
3772 if (last_pgdat != zone->zone_pgdat) { in get_page_from_freelist()
3773 last_pgdat = zone->zone_pgdat; in get_page_from_freelist()
3774 last_pgdat_dirty_ok = node_dirty_ok(zone->zone_pgdat); in get_page_from_freelist()
3782 zone != zonelist_zone(ac->preferred_zoneref)) { in get_page_from_freelist()
3790 local_nid = zonelist_node_idx(ac->preferred_zoneref); in get_page_from_freelist()
3802 * "node_reclaim_mode"-like behavior in this case. in get_page_from_freelist()
3805 !waitqueue_active(&zone->zone_pgdat->kswapd_wait)) { in get_page_from_freelist()
3813 * Detect whether the number of free pages is below high in get_page_from_freelist()
3814 * watermark. If so, we will decrease pcp->high and free in get_page_from_freelist()
3819 if (test_bit(ZONE_BELOW_HIGH, &zone->flags)) in get_page_from_freelist()
3824 ac->highest_zoneidx, alloc_flags, in get_page_from_freelist()
3828 set_bit(ZONE_BELOW_HIGH, &zone->flags); in get_page_from_freelist()
3833 ac->highest_zoneidx, alloc_flags, in get_page_from_freelist()
3854 !zone_allows_reclaim(zonelist_zone(ac->preferred_zoneref), zone)) in get_page_from_freelist()
3857 ret = node_reclaim(zone->zone_pgdat, gfp_mask, order); in get_page_from_freelist()
3868 ac->highest_zoneidx, alloc_flags)) in get_page_from_freelist()
3876 page = rmqueue(zonelist_zone(ac->preferred_zoneref), zone, order, in get_page_from_freelist()
3877 gfp_mask, alloc_flags, ac->migratetype); in get_page_from_freelist()
3882 * If this is a high-order atomic allocation then check in get_page_from_freelist()
3933 (current->flags & (PF_MEMALLOC | PF_EXITING))) in warn_alloc_show_mem()
3956 current->comm, &vaf, gfp_mask, &gfp_mask, in warn_alloc()
3990 .zonelist = ac->zonelist, in __alloc_pages_may_oom()
3991 .nodemask = ac->nodemask, in __alloc_pages_may_oom()
4011 * Go through the zonelist yet one more time, keep very high watermark in __alloc_pages_may_oom()
4024 if (current->flags & PF_DUMPCORE) in __alloc_pages_may_oom()
4040 if (ac->highest_zoneidx < ZONE_NORMAL) in __alloc_pages_may_oom()
4060 * Help non-failing allocations by giving them access to memory in __alloc_pages_may_oom()
4079 /* Try memory compaction for high-order allocations before reclaim */
4122 zone->compact_blockskip_flush = false; in __alloc_pages_direct_compact()
4158 * Compaction was skipped due to a lack of free order-0 in should_compact_retry()
4196 (*compact_priority)--; in should_compact_retry()
4229 * Let's give them a good hope and keep retrying while the order-0 in should_compact_retry()
4232 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, in should_compact_retry()
4233 ac->highest_zoneidx, ac->nodemask) { in should_compact_retry()
4235 ac->highest_zoneidx, alloc_flags)) in should_compact_retry()
4253 if (current->flags & PF_MEMALLOC) in __need_reclaim()
4340 progress = try_to_free_pages(ac->zonelist, order, gfp_mask, in __perform_reclaim()
4341 ac->nodemask); in __perform_reclaim()
4371 * pages are pinned on the per-cpu lists or in high alloc reserves. in __alloc_pages_direct_reclaim()
4392 enum zone_type highest_zoneidx = ac->highest_zoneidx; in wake_all_kswapds()
4400 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx, in wake_all_kswapds()
4401 ac->nodemask) { in wake_all_kswapds()
4404 if (last_pgdat == zone->zone_pgdat) in wake_all_kswapds()
4407 last_pgdat = zone->zone_pgdat; in wake_all_kswapds()
4446 * Ignore cpuset mems for non-blocking __GFP_HIGH (probably in gfp_to_alloc_flags()
4488 if (in_serving_softirq() && (current->flags & PF_MEMALLOC)) in __gfp_pfmemalloc_flags()
4491 if (current->flags & PF_MEMALLOC) in __gfp_pfmemalloc_flags()
4526 * their order will become available due to high fragmentation so in should_reclaim_retry()
4544 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, in should_reclaim_retry()
4545 ac->highest_zoneidx, ac->nodemask) { in should_reclaim_retry()
4564 ac->highest_zoneidx, alloc_flags, available); in should_reclaim_retry()
4580 if (current->flags & PF_WQ_WORKER) in should_reclaim_retry()
4601 * This assumes that for all allocations, ac->nodemask can come only in check_retry_cpuset()
4606 if (cpusets_enabled() && ac->nodemask && in check_retry_cpuset()
4607 !cpuset_nodemask_valid_mems_allowed(ac->nodemask)) { in check_retry_cpuset()
4608 ac->nodemask = NULL; in check_retry_cpuset()
4647 * allocate greater than order-1 page units with __GFP_NOFAIL. in __alloc_pages_slowpath()
4660 WARN_ON_ONCE(current->flags & PF_MEMALLOC); in __alloc_pages_slowpath()
4681 * there was a cpuset modification and we are retrying - otherwise we in __alloc_pages_slowpath()
4682 * could end up iterating over non-eligible zones endlessly. in __alloc_pages_slowpath()
4684 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, in __alloc_pages_slowpath()
4685 ac->highest_zoneidx, ac->nodemask); in __alloc_pages_slowpath()
4686 if (!zonelist_zone(ac->preferred_zoneref)) in __alloc_pages_slowpath()
4691 * any suitable zone to satisfy the request - e.g. non-movable in __alloc_pages_slowpath()
4695 struct zoneref *z = first_zones_zonelist(ac->zonelist, in __alloc_pages_slowpath()
4696 ac->highest_zoneidx, in __alloc_pages_slowpath()
4715 * that we have enough base pages and don't need to reclaim. For non- in __alloc_pages_slowpath()
4716 * movable high-order allocations, do that as well, as compaction will in __alloc_pages_slowpath()
4724 (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) in __alloc_pages_slowpath()
4746 * - potentially very expensive because zones are far in __alloc_pages_slowpath()
4748 * bursty high order allocations, in __alloc_pages_slowpath()
4749 * - not guaranteed to help because isolate_freepages() in __alloc_pages_slowpath()
4752 * - unlikely to make entire pageblocks free on its in __alloc_pages_slowpath()
4788 * ignored. These allocations are high priority and system rather than in __alloc_pages_slowpath()
4792 ac->nodemask = NULL; in __alloc_pages_slowpath()
4793 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, in __alloc_pages_slowpath()
4794 ac->highest_zoneidx, ac->nodemask); in __alloc_pages_slowpath()
4807 if (current->flags & PF_MEMALLOC) in __alloc_pages_slowpath()
4827 * Do not retry costly high order allocations unless they are in __alloc_pages_slowpath()
4839 * It doesn't make any sense to retry for the compaction if the order-0 in __alloc_pages_slowpath()
4904 * Help non-failing allocations by giving some access to memory in __alloc_pages_slowpath()
4905 * reserves normally used for high priority non-blocking in __alloc_pages_slowpath()
4918 warn_alloc(gfp_mask, ac->nodemask, in __alloc_pages_slowpath()
4929 ac->highest_zoneidx = gfp_zone(gfp_mask); in prepare_alloc_pages()
4930 ac->zonelist = node_zonelist(preferred_nid, gfp_mask); in prepare_alloc_pages()
4931 ac->nodemask = nodemask; in prepare_alloc_pages()
4932 ac->migratetype = gfp_migratetype(gfp_mask); in prepare_alloc_pages()
4940 if (in_task() && !ac->nodemask) in prepare_alloc_pages()
4941 ac->nodemask = &cpuset_current_mems_allowed; in prepare_alloc_pages()
4959 ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); in prepare_alloc_pages()
4966 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, in prepare_alloc_pages()
4967 ac->highest_zoneidx, ac->nodemask); in prepare_alloc_pages()
4973 * __alloc_pages_bulk - Allocate a number of order-0 pages to an array
5015 if (unlikely(nr_pages - nr_populated == 0)) in alloc_pages_bulk_noprof()
5023 if (nr_pages - nr_populated == 1) in alloc_pages_bulk_noprof()
5088 pcp = pcp_spin_trylock(zone->per_cpu_pageset); in alloc_pages_bulk_noprof()
5093 pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)]; in alloc_pages_bulk_noprof()
5187 * &cpuset_current_mems_allowed to optimize the fast-path attempt. in __alloc_frozen_pages_noprof()
5231 * you need to access high mem.
5261 pgalloc_tag_sub_pages(tag, (1 << order) - 1); in ___free_pages()
5262 while (order-- > 0) { in ___free_pages()
5264 * The "tail" pages of this non-compound high-order in ___free_pages()
5276 * __free_pages - Free pages allocated with alloc_pages().
5280 * This function can free multi-page allocations that are not compound
5286 * by put_page() which only frees the first page of a non-compound
5311 * free_pages - Free pages allocated with __get_free_pages().
5340 while (page < --last) in make_alloc_exact()
5351 * alloc_pages_exact - allocate an exact number physically-contiguous pages.
5357 * allocate memory in power-of-two pages.
5379 * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
5405 * free_pages_exact - release memory allocated via alloc_pages_exact()
5414 unsigned long end = addr + PAGE_ALIGN(size); in free_pages_exact() local
5416 while (addr < end) { in free_pages_exact()
5424 * nr_free_zone_pages - count number of pages beyond high watermark
5428 * high watermark within all zones at or below a given zone index. For each
5431 * nr_free_zone_pages = managed_pages - high_pages
5433 * Return: number of pages beyond high watermark.
5447 unsigned long high = high_wmark_pages(zone); in nr_free_zone_pages() local
5448 if (size > high) in nr_free_zone_pages()
5449 sum += size - high; in nr_free_zone_pages()
5456 * nr_free_buffer_pages - count number of pages beyond high watermark
5458 * nr_free_buffer_pages() counts the number of pages which are beyond the high
5461 * Return: number of pages beyond high watermark within ZONE_DMA and
5472 zoneref->zone = zone; in zoneref_set_zone()
5473 zoneref->zone_idx = zone_idx(zone); in zoneref_set_zone()
5488 zone_type--; in build_zonerefs_node()
5489 zone = pgdat->node_zones + zone_type; in build_zonerefs_node()
5511 return -EINVAL; in __parse_numa_zonelist_order()
5532 * find_next_best_node - find the next node that should appear in a given node's fallback list
5596 * This results in maximum locality--normal zone overflows into local
5597 * DMA zone, if any--but risks exhausting DMA zone.
5605 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs; in build_zonelists_in_node_order()
5615 zonerefs->zone = NULL; in build_zonelists_in_node_order()
5616 zonerefs->zone_idx = 0; in build_zonelists_in_node_order()
5627 zonerefs = pgdat->node_zonelists[ZONELIST_NOFALLBACK]._zonerefs; in build_thisnode_zonelists()
5630 zonerefs->zone = NULL; in build_thisnode_zonelists()
5631 zonerefs->zone_idx = 0; in build_thisnode_zonelists()
5641 /* NUMA-aware ordering of nodes */ in build_zonelists()
5642 local_node = pgdat->node_id; in build_zonelists()
5650 * distance group to make it round-robin. in build_zonelists()
5695 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs; in build_zonelists()
5699 zonerefs->zone = NULL; in build_zonelists()
5700 zonerefs->zone_idx = 0; in build_zonelists()
5741 * trying to hold port->lock, for in __build_all_zonelists()
5743 * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. in __build_all_zonelists()
5753 * building zonelists is fine - no need to touch other nodes. in __build_all_zonelists()
5755 if (self && !node_online(self->node_id)) { in __build_all_zonelists()
5770 * We now know the "local memory node" for each node-- in __build_all_zonelists()
5772 * Set up numa_mem percpu variable for on-line cpus. During in __build_all_zonelists()
5773 * boot, only the boot cpu should be on-line; we'll init the in __build_all_zonelists()
5774 * secondary cpus' numa_mem as they come on-line. During in __build_all_zonelists()
5775 * node/memory hotplug, we'll fixup all on-line cpus. in __build_all_zonelists()
5804 * (a chicken-egg dilemma). in build_all_zonelists_init()
5829 /* Get the number of free pages beyond high watermark in all zones. */ in build_all_zonelists()
5834 * more accurate, but expensive to check per-zone. This check is in build_all_zonelists()
5835 * made on memory-hotadd so a system can start with mobility in build_all_zonelists()
5869 * Clamp the batch to a 2^n - 1 value. Having a power in zone_batchsize()
5874 * batches of pages, one task can end up with a lot in zone_batchsize()
5878 batch = rounddown_pow_of_two(batch + batch/2) - 1; in zone_batchsize()
5894 * fragmented and becoming unavailable for high-order allocations. in zone_batchsize()
5905 int high; in zone_highsize() local
5911 * By default, the high value of the pcp is based on the zone in zone_highsize()
5918 * If percpu_pagelist_high_fraction is configured, the high in zone_highsize()
5926 * Split the high value across all online CPUs local to the zone. Note in zone_highsize()
5929 * onlined. For memory nodes that have no CPUs, split the high value in zone_highsize()
5936 high = total_pages / nr_split_cpus; in zone_highsize()
5939 * Ensure high is at least batch*4. The multiple is based on the in zone_highsize()
5940 * historical relationship between high and batch. in zone_highsize()
5942 high = max(high, batch << 2); in zone_highsize()
5944 return high; in zone_highsize()
5951 * pcp->high and pcp->batch values are related and generally batch is lower
5952 * than high. They are also related to pcp->count such that count is lower
5953 * than high, and as soon as it reaches high, the pcplist is flushed.
5958 * store tearing. Any new users of pcp->batch, pcp->high_min and pcp->high_max
5960 * fully trust only the pcp->count field on the local CPU with interrupts
5970 WRITE_ONCE(pcp->batch, batch); in pageset_update()
5971 WRITE_ONCE(pcp->high_min, high_min); in pageset_update()
5972 WRITE_ONCE(pcp->high_max, high_max); in pageset_update()
5982 spin_lock_init(&pcp->lock); in per_cpu_pages_init()
5984 INIT_LIST_HEAD(&pcp->lists[pindex]); in per_cpu_pages_init()
5987 * Set batch and high values safe for a boot pageset. A true percpu in per_cpu_pages_init()
5992 pcp->high_min = BOOT_PAGESET_HIGH; in per_cpu_pages_init()
5993 pcp->high_max = BOOT_PAGESET_HIGH; in per_cpu_pages_init()
5994 pcp->batch = BOOT_PAGESET_BATCH; in per_cpu_pages_init()
6004 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); in __zone_set_pageset_high_and_batch()
6010 * Calculate and set new high and batch values for all per-cpu pagesets of a
6022 * PCP high is tuned manually, disable auto-tuning via in zone_set_pageset_high_and_batch()
6032 if (zone->pageset_high_min == new_high_min && in zone_set_pageset_high_and_batch()
6033 zone->pageset_high_max == new_high_max && in zone_set_pageset_high_and_batch()
6034 zone->pageset_batch == new_batch) in zone_set_pageset_high_and_batch()
6037 zone->pageset_high_min = new_high_min; in zone_set_pageset_high_and_batch()
6038 zone->pageset_high_max = new_high_max; in zone_set_pageset_high_and_batch()
6039 zone->pageset_batch = new_batch; in zone_set_pageset_high_and_batch()
6051 zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat); in setup_zone_pageset()
6053 zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages); in setup_zone_pageset()
6058 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); in setup_zone_pageset()
6059 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); in setup_zone_pageset()
6068 * page high values need to be recalculated.
6082 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); in zone_pcp_update_cacheinfo()
6085 * If data cache slice of CPU is large enough, "pcp->batch" in zone_pcp_update_cacheinfo()
6087 * consecutive high-order pages freeing without allocation. in zone_pcp_update_cacheinfo()
6089 * cache-hot pages sharing. in zone_pcp_update_cacheinfo()
6091 spin_lock(&pcp->lock); in zone_pcp_update_cacheinfo()
6092 if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) in zone_pcp_update_cacheinfo()
6093 pcp->flags |= PCPF_FREE_HIGH_BATCH; in zone_pcp_update_cacheinfo()
6095 pcp->flags &= ~PCPF_FREE_HIGH_BATCH; in zone_pcp_update_cacheinfo()
6096 spin_unlock(&pcp->lock); in zone_pcp_update_cacheinfo()
6124 * Otherwise, they will end up skewing the stats of in setup_per_cpu_pageset()
6129 memset(pzstats->vm_numa_event, 0, in setup_per_cpu_pageset()
6130 sizeof(pzstats->vm_numa_event)); in setup_per_cpu_pageset()
6135 pgdat->per_cpu_nodestats = in setup_per_cpu_pageset()
6146 zone->per_cpu_pageset = &boot_pageset; in zone_pcp_init()
6147 zone->per_cpu_zonestats = &boot_zonestats; in zone_pcp_init()
6148 zone->pageset_high_min = BOOT_PAGESET_HIGH; in zone_pcp_init()
6149 zone->pageset_high_max = BOOT_PAGESET_HIGH; in zone_pcp_init()
6150 zone->pageset_batch = BOOT_PAGESET_BATCH; in zone_pcp_init()
6153 pr_debug(" %s zone: %lu pages, LIFO batch:%u\n", zone->name, in zone_pcp_init()
6154 zone->present_pages, zone_batchsize(zone)); in zone_pcp_init()
6161 atomic_long_add(count, &page_zone(page)->managed_pages); in adjust_managed_page_count()
6167 unsigned long free_reserved_area(void *start, void *end, int poison, const char *s) in free_reserved_area() argument
6173 end = (void *)((unsigned long)end & PAGE_MASK); in free_reserved_area()
6174 for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { in free_reserved_area()
6187 * Perform a kasan-unchecked memset() since this memory in free_reserved_area()
6265 * calculate_totalreserve_pages - called when sysctl_lowmem_reserve_ratio
6276 pgdat->totalreserve_pages = 0; in calculate_totalreserve_pages()
6279 struct zone *zone = pgdat->node_zones + i; in calculate_totalreserve_pages()
6285 max = max(max, zone->lowmem_reserve[j]); in calculate_totalreserve_pages()
6287 /* we treat the high watermark as reserved pages. */ in calculate_totalreserve_pages()
6292 pgdat->totalreserve_pages += max; in calculate_totalreserve_pages()
6302 * setup_per_zone_lowmem_reserve - called whenever
6313 for (i = 0; i < MAX_NR_ZONES - 1; i++) { in setup_per_zone_lowmem_reserve()
6314 struct zone *zone = &pgdat->node_zones[i]; in setup_per_zone_lowmem_reserve()
6320 struct zone *upper_zone = &pgdat->node_zones[j]; in setup_per_zone_lowmem_reserve()
6325 zone->lowmem_reserve[j] = 0; in setup_per_zone_lowmem_reserve()
6327 zone->lowmem_reserve[j] = managed_pages / ratio; in setup_per_zone_lowmem_reserve()
6329 zone->lowmem_reserve[j]); in setup_per_zone_lowmem_reserve()
6340 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); in __setup_per_zone_wmarks()
6354 spin_lock_irqsave(&zone->lock, flags); in __setup_per_zone_wmarks()
6363 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) in __setup_per_zone_wmarks()
6371 zone->_watermark[WMARK_MIN] = min_pages; in __setup_per_zone_wmarks()
6377 zone->_watermark[WMARK_MIN] = tmp; in __setup_per_zone_wmarks()
6389 zone->watermark_boost = 0; in __setup_per_zone_wmarks()
6390 zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; in __setup_per_zone_wmarks()
6391 zone->_watermark[WMARK_HIGH] = low_wmark_pages(zone) + tmp; in __setup_per_zone_wmarks()
6392 zone->_watermark[WMARK_PROMO] = high_wmark_pages(zone) + tmp; in __setup_per_zone_wmarks()
6395 spin_unlock_irqrestore(&zone->lock, flags); in __setup_per_zone_wmarks()
6403 * setup_per_zone_wmarks - called when min_free_kbytes changes
6404 * or when memory is hot-{added|removed}
6406 * Ensures that the watermark[min,low,high] values for each zone are set
6420 * and high limits or the limits may be inappropriate. in setup_per_zone_wmarks()
6485 * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so in postcore_initcall()
6527 pgdat->min_unmapped_pages = 0; in setup_min_unmapped_ratio()
6530 zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) * in setup_min_unmapped_ratio()
6555 pgdat->min_slab_pages = 0; in setup_min_slab_ratio()
6558 zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) * in setup_min_slab_ratio()
6578 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
6603 * percpu_pagelist_high_fraction - changes the pcp->high for each zone on each
6625 ret = -EINVAL; in percpu_pagelist_high_fraction_sysctl_handler()
6725 /* Usage: See admin-guide/dynamic-debug-howto.rst */
6739 /* [start, end) must belong to a single zone. */
6741 unsigned long start, unsigned long end) in __alloc_contig_migrate_range() argument
6749 .nid = zone_to_nid(cc->zone), in __alloc_contig_migrate_range()
6750 .gfp_mask = cc->gfp_mask, in __alloc_contig_migrate_range()
6756 while (pfn < end || !list_empty(&cc->migratepages)) { in __alloc_contig_migrate_range()
6758 ret = -EINTR; in __alloc_contig_migrate_range()
6762 if (list_empty(&cc->migratepages)) { in __alloc_contig_migrate_range()
6763 cc->nr_migratepages = 0; in __alloc_contig_migrate_range()
6764 ret = isolate_migratepages_range(cc, pfn, end); in __alloc_contig_migrate_range()
6765 if (ret && ret != -EAGAIN) in __alloc_contig_migrate_range()
6767 pfn = cc->migrate_pfn; in __alloc_contig_migrate_range()
6770 ret = -EBUSY; in __alloc_contig_migrate_range()
6774 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone, in __alloc_contig_migrate_range()
6775 &cc->migratepages); in __alloc_contig_migrate_range()
6776 cc->nr_migratepages -= nr_reclaimed; in __alloc_contig_migrate_range()
6778 ret = migrate_pages(&cc->migratepages, alloc_migration_target, in __alloc_contig_migrate_range()
6779 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL); in __alloc_contig_migrate_range()
6782 * On -ENOMEM, migrate_pages() bails out right away. It is pointless in __alloc_contig_migrate_range()
6785 if (ret == -ENOMEM) in __alloc_contig_migrate_range()
6791 if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY) in __alloc_contig_migrate_range()
6792 alloc_contig_dump_pages(&cc->migratepages); in __alloc_contig_migrate_range()
6793 putback_movable_pages(&cc->migratepages); in __alloc_contig_migrate_range()
6817 /* Add all subpages to the order-0 head, in sequence. */ in split_free_pages()
6818 list_del(&page->lru); in split_free_pages()
6844 return -EINVAL; in __alloc_contig_verify_gfp_mask()
6860 * alloc_contig_range() -- tries to allocate given range of pages
6862 * @end: one-past-the-last PFN to allocate
6876 * pages which PFN is in [start, end) are allocated for the caller and
6879 int alloc_contig_range_noprof(unsigned long start, unsigned long end, in alloc_contig_range_noprof() argument
6882 const unsigned int order = ilog2(end - start); in alloc_contig_range_noprof()
6888 .order = -1, in alloc_contig_range_noprof()
6906 return -EINVAL; in alloc_contig_range_noprof()
6910 return -EINVAL; in alloc_contig_range_noprof()
6933 ret = start_isolate_page_range(start, end, mode); in alloc_contig_range_noprof()
6940 * In case of -EBUSY, we'd like to know which page causes problem. in alloc_contig_range_noprof()
6947 * -EBUSY is not accidentally used or returned to caller. in alloc_contig_range_noprof()
6949 ret = __alloc_contig_migrate_range(&cc, start, end); in alloc_contig_range_noprof()
6950 if (ret && ret != -EBUSY) in alloc_contig_range_noprof()
6954 * When in-use hugetlb pages are migrated, they may simply be released in alloc_contig_range_noprof()
6956 * buddy system. After the migration of in-use huge pages is completed, in alloc_contig_range_noprof()
6960 ret = replace_free_hugepage_folios(start, end); in alloc_contig_range_noprof()
6965 * Pages from [start, end) are within a pageblock_nr_pages in alloc_contig_range_noprof()
6967 * more, all pages in [start, end) are free in page allocator. in alloc_contig_range_noprof()
6969 * [start, end) (that is remove them from page allocator). in alloc_contig_range_noprof()
6972 * end of interesting range may be not aligned with pages that in alloc_contig_range_noprof()
6977 * We don't have to hold zone->lock here because the pages are in alloc_contig_range_noprof()
6983 if (test_pages_isolated(outer_start, end, mode)) { in alloc_contig_range_noprof()
6984 ret = -EBUSY; in alloc_contig_range_noprof()
6989 outer_end = isolate_freepages_range(&cc, outer_start, end); in alloc_contig_range_noprof()
6991 ret = -EBUSY; in alloc_contig_range_noprof()
7000 free_contig_range(outer_start, start - outer_start); in alloc_contig_range_noprof()
7001 if (end != outer_end) in alloc_contig_range_noprof()
7002 free_contig_range(end, outer_end - end); in alloc_contig_range_noprof()
7003 } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) { in alloc_contig_range_noprof()
7010 ret = -EINVAL; in alloc_contig_range_noprof()
7012 start, end, outer_start, outer_end); in alloc_contig_range_noprof()
7015 undo_isolate_page_range(start, end); in alloc_contig_range_noprof()
7055 unsigned long last_pfn = start_pfn + nr_pages - 1; in zone_spans_last_pfn()
7061 * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
7094 spin_lock_irqsave(&zone->lock, flags); in alloc_contig_pages_noprof()
7096 pfn = ALIGN(zone->zone_start_pfn, nr_pages); in alloc_contig_pages_noprof()
7106 spin_unlock_irqrestore(&zone->lock, flags); in alloc_contig_pages_noprof()
7111 spin_lock_irqsave(&zone->lock, flags); in alloc_contig_pages_noprof()
7115 spin_unlock_irqrestore(&zone->lock, flags); in alloc_contig_pages_noprof()
7137 for (; nr_pages--; pfn++) { in free_contig_range()
7148 * Effectively disable pcplists for the zone by setting the high limit to 0
7151 * will be drained, or observe the new high limit and skip the pcplist.
7164 __zone_set_pageset_high_and_batch(zone, zone->pageset_high_min, in zone_pcp_enable()
7165 zone->pageset_high_max, zone->pageset_batch); in zone_pcp_enable()
7174 if (zone->per_cpu_pageset != &boot_pageset) { in zone_pcp_reset()
7176 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); in zone_pcp_reset()
7179 free_percpu(zone->per_cpu_pageset); in zone_pcp_reset()
7180 zone->per_cpu_pageset = &boot_pageset; in zone_pcp_reset()
7181 if (zone->per_cpu_zonestats != &boot_zonestats) { in zone_pcp_reset()
7182 free_percpu(zone->per_cpu_zonestats); in zone_pcp_reset()
7183 zone->per_cpu_zonestats = &boot_zonestats; in zone_pcp_reset()
7193 * Returns the number of managed (non-PageOffline()) pages in the range: the
7208 spin_lock_irqsave(&zone->lock, flags); in __offline_isolated_pages()
7238 spin_unlock_irqrestore(&zone->lock, flags); in __offline_isolated_pages()
7240 return end_pfn - start_pfn - already_offline; in __offline_isolated_pages()
7253 const struct page *head = page - (pfn & ((1 << order) - 1)); in is_free_buddy_page()
7274 * Break down a higher-order page in sub-pages, and keep our target out of
7278 struct page *target, int low, int high, in break_down_buddy_pages() argument
7281 unsigned long size = 1 << high; in break_down_buddy_pages()
7284 while (high > low) { in break_down_buddy_pages()
7285 high--; in break_down_buddy_pages()
7295 if (set_page_guard(zone, current_buddy, high)) in break_down_buddy_pages()
7298 add_to_free_list(current_buddy, zone, high, migratetype, false); in break_down_buddy_pages()
7299 set_buddy_order(current_buddy, high); in break_down_buddy_pages()
7314 spin_lock_irqsave(&zone->lock, flags); in take_page_off_buddy()
7316 struct page *page_head = page - (pfn & ((1 << order) - 1)); in take_page_off_buddy()
7335 spin_unlock_irqrestore(&zone->lock, flags); in take_page_off_buddy()
7348 spin_lock_irqsave(&zone->lock, flags); in put_page_back_buddy()
7359 spin_unlock_irqrestore(&zone->lock, flags); in put_page_back_buddy()
7371 struct zone *zone = &pgdat->node_zones[ZONE_DMA]; in has_managed_dma()
7393 return -EINVAL; in accept_memory_parse()
7408 list_del(&page->lru); in __accept_page()
7409 account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); in __accept_page()
7410 __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); in __accept_page()
7412 spin_unlock_irqrestore(&zone->lock, *flags); in __accept_page()
7424 spin_lock_irqsave(&zone->lock, flags); in accept_page()
7426 spin_unlock_irqrestore(&zone->lock, flags); in accept_page()
7430 /* Unlocks zone->lock */ in accept_page()
7439 spin_lock_irqsave(&zone->lock, flags); in try_to_accept_memory_one()
7440 page = list_first_entry_or_null(&zone->unaccepted_pages, in try_to_accept_memory_one()
7443 spin_unlock_irqrestore(&zone->lock, flags); in try_to_accept_memory_one()
7447 /* Unlocks zone->lock */ in try_to_accept_memory_one()
7459 if (list_empty(&zone->unaccepted_pages)) in cond_accept_memory()
7477 to_accept = wmark - in cond_accept_memory()
7478 (zone_page_state(zone, NR_FREE_PAGES) - in cond_accept_memory()
7479 __zone_watermark_unusable_free(zone, order, 0) - in cond_accept_memory()
7486 to_accept -= MAX_ORDER_NR_PAGES; in cond_accept_memory()
7500 spin_lock_irqsave(&zone->lock, flags); in __free_unaccepted()
7501 list_add_tail(&page->lru, &zone->unaccepted_pages); in __free_unaccepted()
7505 spin_unlock_irqrestore(&zone->lock, flags); in __free_unaccepted()
7588 * If it's empty attempt to spin_trylock zone->lock. in alloc_frozen_pages_nolock_noprof()
7604 * alloc_pages_nolock - opportunistic reentrant allocation from any context
7611 * allocator -> tracepoint -> alloc_pages_nolock_noprof).