Lines Matching +full:overrun +full:- +full:throttle +full:- +full:ms
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* memcontrol.c - Memory Controller
28 #include <linux/cgroup-defs.h>
39 #include <linux/page-flags.h>
40 #include <linux/backing-dev.h>
70 #include "memcontrol-v1.h"
101 (current->flags & PF_EXITING); in task_is_dying()
109 return &memcg->vmpressure; in memcg_to_vmpressure()
139 * objcg->nr_charged_bytes can't have an arbitrary byte value. in obj_cgroup_release()
143 * 1) CPU0: objcg == stock->cached_objcg in obj_cgroup_release()
148 * objcg->nr_charged_bytes = PAGE_SIZE - 92 in obj_cgroup_release()
150 * 92 bytes are added to stock->nr_bytes in obj_cgroup_release()
152 * 92 bytes are added to objcg->nr_charged_bytes in obj_cgroup_release()
157 nr_bytes = atomic_read(&objcg->nr_charged_bytes); in obj_cgroup_release()
158 WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1)); in obj_cgroup_release()
165 list_del(&objcg->list); in obj_cgroup_release()
181 ret = percpu_ref_init(&objcg->refcnt, obj_cgroup_release, 0, in obj_cgroup_alloc()
187 INIT_LIST_HEAD(&objcg->list); in obj_cgroup_alloc()
196 objcg = rcu_replace_pointer(memcg->objcg, NULL, true); in memcg_reparent_objcgs()
201 list_add(&objcg->list, &memcg->objcg_list); in memcg_reparent_objcgs()
203 list_for_each_entry(iter, &memcg->objcg_list, list) in memcg_reparent_objcgs()
204 WRITE_ONCE(iter->memcg, parent); in memcg_reparent_objcgs()
206 list_splice(&memcg->objcg_list, &parent->objcg_list); in memcg_reparent_objcgs()
210 percpu_ref_kill(&objcg->refcnt); in memcg_reparent_objcgs()
226 * mem_cgroup_css_from_folio - css of the memcg associated with a folio
243 return &memcg->css; in mem_cgroup_css_from_folio()
247 * page_cgroup_ino - return inode number of the memcg a page is charged to
268 while (memcg && !(memcg->css.flags & CSS_ONLINE)) in page_cgroup_ino()
271 ino = cgroup_ino(memcg->css.cgroup); in page_cgroup_ino()
363 /* Non-hierarchical (CPU aggregated) state */
384 x = READ_ONCE(pn->lruvec_stats->state[i]); in lruvec_page_state()
407 x = READ_ONCE(pn->lruvec_stats->state_local[i]); in lruvec_page_state_local()
501 /* Non-hierarchical (CPU aggregated) page state & events */
559 return atomic64_read(&vmstats->stats_updates) > in memcg_vmstats_needs_flush()
572 cgroup_rstat_updated(memcg->css.cgroup, cpu); in memcg_rstat_updated()
573 statc = this_cpu_ptr(memcg->vmstats_percpu); in memcg_rstat_updated()
574 for (; statc; statc = statc->parent) { in memcg_rstat_updated()
575 stats_updates = READ_ONCE(statc->stats_updates) + abs(val); in memcg_rstat_updated()
576 WRITE_ONCE(statc->stats_updates, stats_updates); in memcg_rstat_updated()
581 * If @memcg is already flush-able, increasing stats_updates is in memcg_rstat_updated()
584 if (!memcg_vmstats_needs_flush(statc->vmstats)) in memcg_rstat_updated()
586 &statc->vmstats->stats_updates); in memcg_rstat_updated()
587 WRITE_ONCE(statc->stats_updates, 0); in memcg_rstat_updated()
596 cgroup_rstat_flush(memcg->css.cgroup); in do_flush_stats()
600 * mem_cgroup_flush_stats - flush the stats of a memory cgroup subtree
616 if (memcg_vmstats_needs_flush(memcg->vmstats)) in mem_cgroup_flush_stats()
631 * in latency-sensitive paths is as cheap as possible. in flush_memcg_stats_dwork()
645 x = READ_ONCE(memcg->vmstats->state[i]); in memcg_page_state()
657 * up non-zero sub-page updates to 1 page as zero page updates are ignored.
670 * __mod_memcg_state - update cgroup memory statistics
672 * @idx: the stat item - can be enum memcg_stat_item or enum node_stat_item
686 __this_cpu_add(memcg->vmstats_percpu->state[i], val); in __mod_memcg_state()
699 x = READ_ONCE(memcg->vmstats->state_local[i]); in memcg_page_state_local()
719 memcg = pn->memcg; in __mod_memcg_lruvec_state()
723 * update their counter from in-interrupt context. For these two in __mod_memcg_lruvec_state()
741 __this_cpu_add(memcg->vmstats_percpu->state[i], val); in __mod_memcg_lruvec_state()
744 __this_cpu_add(pn->lruvec_stats_percpu->state[i], val); in __mod_memcg_lruvec_state()
751 * __mod_lruvec_state - update lruvec memory statistics
758 * change of state at this level: per-node, per-cgroup, per-lruvec.
805 * when we free the slab object, we need to update the per-memcg in __mod_lruvec_kmem_state()
818 * __count_memcg_events - account VM events in a cgroup
835 __this_cpu_add(memcg->vmstats_percpu->events[i], count); in __count_memcg_events()
847 return READ_ONCE(memcg->vmstats->events[i]); in memcg_events()
857 return READ_ONCE(memcg->vmstats->events_local[i]); in memcg_events_local()
863 * mm_update_next_owner() may clear mm->owner to NULL in mem_cgroup_from_task()
879 return current->active_memcg; in active_memcg()
886 * Obtain a reference on mm->memcg and returns it if successful. If mm
889 * 2) current->mm->memcg, if available
913 css_get(&memcg->css); in get_mem_cgroup_from_mm()
916 mm = current->mm; in get_mem_cgroup_from_mm()
923 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); in get_mem_cgroup_from_mm()
926 } while (!css_tryget(&memcg->css)); in get_mem_cgroup_from_mm()
933 * get_mem_cgroup_from_current - Obtain a reference on current task's memcg.
945 if (!css_tryget(&memcg->css)) { in get_mem_cgroup_from_current()
954 * get_mem_cgroup_from_folio - Obtain a reference on a given folio's memcg.
965 if (!memcg || WARN_ON_ONCE(!css_tryget(&memcg->css))) in get_mem_cgroup_from_folio()
972 * mem_cgroup_iter - iterate over memory cgroup hierarchy
978 * @root itself, or %NULL after a full round-trip.
982 * to cancel a hierarchy walk before the round-trip is complete.
1009 int nid = reclaim->pgdat->node_id; in mem_cgroup_iter()
1011 iter = &root->nodeinfo[nid]->iter; in mem_cgroup_iter()
1012 gen = atomic_read(&iter->generation); in mem_cgroup_iter()
1019 reclaim->generation = gen; in mem_cgroup_iter()
1020 else if (reclaim->generation != gen) in mem_cgroup_iter()
1023 pos = READ_ONCE(iter->position); in mem_cgroup_iter()
1027 css = pos ? &pos->css : NULL; in mem_cgroup_iter()
1029 while ((css = css_next_descendant_pre(css, &root->css))) { in mem_cgroup_iter()
1035 if (css == &root->css || css_tryget(css)) in mem_cgroup_iter()
1047 if (cmpxchg(&iter->position, pos, next) != pos) { in mem_cgroup_iter()
1048 if (css && css != &root->css) in mem_cgroup_iter()
1054 atomic_inc(&iter->generation); in mem_cgroup_iter()
1059 * the hierarchy - make sure they see at least in mem_cgroup_iter()
1070 css_put(&prev->css); in mem_cgroup_iter()
1076 * mem_cgroup_iter_break - abort a hierarchy walk prematurely
1086 css_put(&prev->css); in mem_cgroup_iter_break()
1097 mz = from->nodeinfo[nid]; in __invalidate_reclaim_iterators()
1098 iter = &mz->iter; in __invalidate_reclaim_iterators()
1099 cmpxchg(&iter->position, dead_memcg, NULL); in __invalidate_reclaim_iterators()
1114 * When cgroup1 non-hierarchy mode is used, in invalidate_reclaim_iterators()
1125 * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
1131 * descendants and calls @fn for each task. If @fn returns a non-zero
1149 css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); in mem_cgroup_scan_tasks()
1178 * folio_lruvec_lock - Lock the lruvec for a folio.
1182 * - folio locked
1183 * - folio_test_lru false
1184 * - folio_memcg_lock()
1185 * - folio frozen (refcount of 0)
1193 spin_lock(&lruvec->lru_lock); in folio_lruvec_lock()
1200 * folio_lruvec_lock_irq - Lock the lruvec for a folio.
1204 * - folio locked
1205 * - folio_test_lru false
1206 * - folio_memcg_lock()
1207 * - folio frozen (refcount of 0)
1216 spin_lock_irq(&lruvec->lru_lock); in folio_lruvec_lock_irq()
1223 * folio_lruvec_lock_irqsave - Lock the lruvec for a folio.
1228 * - folio locked
1229 * - folio_test_lru false
1230 * - folio_memcg_lock()
1231 * - folio frozen (refcount of 0)
1241 spin_lock_irqsave(&lruvec->lru_lock, *flags); in folio_lruvec_lock_irqsave()
1248 * mem_cgroup_update_lru_size - account for adding or removing an lru page
1268 lru_size = &mz->lru_zone_size[zid][lru]; in mem_cgroup_update_lru_size()
1286 * mem_cgroup_margin - calculate chargeable space of a memory cgroup
1298 count = page_counter_read(&memcg->memory); in mem_cgroup_margin()
1299 limit = READ_ONCE(memcg->memory.max); in mem_cgroup_margin()
1301 margin = limit - count; in mem_cgroup_margin()
1304 count = page_counter_read(&memcg->memsw); in mem_cgroup_margin()
1305 limit = READ_ONCE(memcg->memsw.max); in mem_cgroup_margin()
1307 margin = min(margin, limit - count); in mem_cgroup_margin()
1438 * 1) generic big picture -> specifics and details in memcg_stat_format()
1439 * 2) reflecting userspace activity -> reflecting kernel heuristics in memcg_stat_format()
1505 pr_cont_cgroup_path(memcg->css.cgroup); in mem_cgroup_print_oom_context()
1529 K((u64)page_counter_read(&memcg->memory)), in mem_cgroup_print_oom_meminfo()
1530 K((u64)READ_ONCE(memcg->memory.max)), memcg->memory.failcnt); in mem_cgroup_print_oom_meminfo()
1533 K((u64)page_counter_read(&memcg->swap)), in mem_cgroup_print_oom_meminfo()
1534 K((u64)READ_ONCE(memcg->swap.max)), memcg->swap.failcnt); in mem_cgroup_print_oom_meminfo()
1538 K((u64)page_counter_read(&memcg->memsw)), in mem_cgroup_print_oom_meminfo()
1539 K((u64)memcg->memsw.max), memcg->memsw.failcnt); in mem_cgroup_print_oom_meminfo()
1541 K((u64)page_counter_read(&memcg->kmem)), in mem_cgroup_print_oom_meminfo()
1542 K((u64)memcg->kmem.max), memcg->kmem.failcnt); in mem_cgroup_print_oom_meminfo()
1547 pr_cont_cgroup_path(memcg->css.cgroup); in mem_cgroup_print_oom_meminfo()
1559 unsigned long max = READ_ONCE(memcg->memory.max); in mem_cgroup_get_max()
1564 unsigned long swap = READ_ONCE(memcg->memsw.max) - max; in mem_cgroup_get_max()
1570 max += min(READ_ONCE(memcg->swap.max), in mem_cgroup_get_max()
1578 return page_counter_read(&memcg->memory); in mem_cgroup_size()
1634 * mem_cgroup_get_oom_group - get a memory cgroup to clean up after OOM
1636 * @oom_domain: memcg in case of memcg OOM, NULL in case of system-wide OOM
1639 * by killing all belonging OOM-killable tasks.
1641 * Caller has to call mem_cgroup_put() on the returned non-NULL memcg.
1672 * highest-level memory cgroup with oom.group set. in mem_cgroup_get_oom_group()
1675 if (READ_ONCE(memcg->oom_group)) in mem_cgroup_get_oom_group()
1683 css_get(&oom_group->css); in mem_cgroup_get_oom_group()
1693 pr_cont_cgroup_path(memcg->css.cgroup); in mem_cgroup_print_oom_group()
1745 stock_pages = READ_ONCE(stock->nr_pages); in consume_stock()
1746 if (memcg == READ_ONCE(stock->cached) && stock_pages >= nr_pages) { in consume_stock()
1747 WRITE_ONCE(stock->nr_pages, stock_pages - nr_pages); in consume_stock()
1761 unsigned int stock_pages = READ_ONCE(stock->nr_pages); in drain_stock()
1762 struct mem_cgroup *old = READ_ONCE(stock->cached); in drain_stock()
1768 page_counter_uncharge(&old->memory, stock_pages); in drain_stock()
1770 page_counter_uncharge(&old->memsw, stock_pages); in drain_stock()
1772 WRITE_ONCE(stock->nr_pages, 0); in drain_stock()
1775 css_put(&old->css); in drain_stock()
1776 WRITE_ONCE(stock->cached, NULL); in drain_stock()
1795 clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); in drain_local_stock()
1811 if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */ in __refill_stock()
1813 css_get(&memcg->css); in __refill_stock()
1814 WRITE_ONCE(stock->cached, memcg); in __refill_stock()
1816 stock_pages = READ_ONCE(stock->nr_pages) + nr_pages; in __refill_stock()
1817 WRITE_ONCE(stock->nr_pages, stock_pages); in __refill_stock()
1833 * Drains all per-CPU charge caches for given root_memcg resp. subtree
1844 * Notify other cpus that system-wide "drain" is running in drain_all_stock()
1847 * per-cpu data. CPU up doesn't touch memcg_stock at all. in drain_all_stock()
1857 memcg = READ_ONCE(stock->cached); in drain_all_stock()
1858 if (memcg && READ_ONCE(stock->nr_pages) && in drain_all_stock()
1866 !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { in drain_all_stock()
1868 drain_local_stock(&stock->work); in drain_all_stock()
1870 schedule_work_on(cpu, &stock->work); in drain_all_stock()
1896 if (page_counter_read(&memcg->memory) <= in reclaim_high()
1897 READ_ONCE(memcg->memory.high)) in reclaim_high()
1934 * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the
1936 * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down the
1941 * reasonable delay curve compared to precision-adjusted overage, not
1946 * +-------+------------------------+
1947 * | usage | time to allocate in ms |
1948 * +-------+------------------------+
1970 * +-------+------------------------+
1988 overage = usage - high; in calculate_overage()
1998 overage = calculate_overage(page_counter_read(&memcg->memory), in mem_find_max_overage()
1999 READ_ONCE(memcg->memory.high)); in mem_find_max_overage()
2012 overage = calculate_overage(page_counter_read(&memcg->swap), in swap_find_max_overage()
2013 READ_ONCE(memcg->swap.high)); in swap_find_max_overage()
2050 * N-sized allocations are throttled approximately the same as one in calculate_high_delay()
2051 * 4N-sized allocation. in calculate_high_delay()
2069 unsigned int nr_pages = current->memcg_nr_pages_over_high; in mem_cgroup_handle_over_high()
2077 memcg = get_mem_cgroup_from_mm(current->mm); in mem_cgroup_handle_over_high()
2078 current->memcg_nr_pages_over_high = 0; in mem_cgroup_handle_over_high()
2106 * memory.high is breached and reclaim is unable to keep up. Throttle in mem_cgroup_handle_over_high()
2136 if (nr_reclaimed || nr_retries--) { in mem_cgroup_handle_over_high()
2147 * need to account for any ill-begotten jiffies to pay them off later. in mem_cgroup_handle_over_high()
2154 css_put(&memcg->css); in mem_cgroup_handle_over_high()
2176 page_counter_try_charge(&memcg->memsw, batch, &counter)) { in try_charge_memcg()
2177 if (page_counter_try_charge(&memcg->memory, batch, &counter)) in try_charge_memcg()
2180 page_counter_uncharge(&memcg->memsw, batch); in try_charge_memcg()
2198 if (unlikely(current->flags & PF_MEMALLOC)) in try_charge_memcg()
2244 if (nr_retries--) in try_charge_memcg()
2273 return -ENOMEM; in try_charge_memcg()
2287 page_counter_charge(&memcg->memory, nr_pages); in try_charge_memcg()
2289 page_counter_charge(&memcg->memsw, nr_pages); in try_charge_memcg()
2295 refill_stock(memcg, batch - nr_pages); in try_charge_memcg()
2309 mem_high = page_counter_read(&memcg->memory) > in try_charge_memcg()
2310 READ_ONCE(memcg->memory.high); in try_charge_memcg()
2311 swap_high = page_counter_read(&memcg->swap) > in try_charge_memcg()
2312 READ_ONCE(memcg->swap.high); in try_charge_memcg()
2317 schedule_work(&memcg->high_work); in try_charge_memcg()
2329 * Target some best-effort fairness between the tasks, in try_charge_memcg()
2333 current->memcg_nr_pages_over_high += batch; in try_charge_memcg()
2342 * excessive overrun while the task is still inside the in try_charge_memcg()
2346 if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && in try_charge_memcg()
2347 !(current->flags & PF_MEMALLOC) && in try_charge_memcg()
2354 * mem_cgroup_cancel_charge() - cancel an uncommitted try_charge() call.
2363 page_counter_uncharge(&memcg->memory, nr_pages); in mem_cgroup_cancel_charge()
2365 page_counter_uncharge(&memcg->memsw, nr_pages); in mem_cgroup_cancel_charge()
2374 * - the page lock in commit_charge()
2375 * - LRU isolation in commit_charge()
2376 * - folio_memcg_lock() in commit_charge()
2377 * - exclusive reference in commit_charge()
2378 * - mem_cgroup_trylock_pages() in commit_charge()
2380 folio->memcg_data = (unsigned long)memcg; in commit_charge()
2384 * mem_cgroup_commit_charge - commit a previously successful try_charge().
2390 css_get(&memcg->css); in mem_cgroup_commit_charge()
2413 * Slab objects are accounted individually, not per-page. in mem_cgroup_from_obj_folio()
2415 * slab->obj_exts. in mem_cgroup_from_obj_folio()
2427 off = obj_to_index(slab->slab_cache, slab, p); in mem_cgroup_from_obj_folio()
2437 * slab->obj_exts has not been freed yet in mem_cgroup_from_obj_folio()
2466 objcg = rcu_dereference(memcg->objcg); in __get_obj_cgroup_from_memcg()
2481 old = xchg(¤t->objcg, NULL); in current_objcg_update()
2491 if (!current->mm || (current->flags & PF_KTHREAD)) in current_objcg_update()
2520 } while (!try_cmpxchg(¤t->objcg, &old, objcg)); in current_objcg_update()
2531 memcg = current->active_memcg; in current_obj_cgroup()
2535 objcg = READ_ONCE(current->objcg); in current_obj_cgroup()
2560 objcg = rcu_dereference_check(memcg->objcg, 1); in current_obj_cgroup()
2604 mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); in obj_cgroup_uncharge_pages()
2605 memcg1_account_kmem(memcg, -nr_pages); in obj_cgroup_uncharge_pages()
2608 css_put(&memcg->css); in obj_cgroup_uncharge_pages()
2634 css_put(&memcg->css); in obj_cgroup_charge_pages()
2657 page->memcg_data = (unsigned long)objcg | in __memcg_kmem_charge_page()
2681 folio->memcg_data = 0; in __memcg_kmem_uncharge_page()
2701 if (READ_ONCE(stock->cached_objcg) != objcg) { in mod_objcg_state()
2704 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) in mod_objcg_state()
2705 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; in mod_objcg_state()
2706 WRITE_ONCE(stock->cached_objcg, objcg); in mod_objcg_state()
2707 stock->cached_pgdat = pgdat; in mod_objcg_state()
2708 } else if (stock->cached_pgdat != pgdat) { in mod_objcg_state()
2710 struct pglist_data *oldpg = stock->cached_pgdat; in mod_objcg_state()
2712 if (stock->nr_slab_reclaimable_b) { in mod_objcg_state()
2714 stock->nr_slab_reclaimable_b); in mod_objcg_state()
2715 stock->nr_slab_reclaimable_b = 0; in mod_objcg_state()
2717 if (stock->nr_slab_unreclaimable_b) { in mod_objcg_state()
2719 stock->nr_slab_unreclaimable_b); in mod_objcg_state()
2720 stock->nr_slab_unreclaimable_b = 0; in mod_objcg_state()
2722 stock->cached_pgdat = pgdat; in mod_objcg_state()
2725 bytes = (idx == NR_SLAB_RECLAIMABLE_B) ? &stock->nr_slab_reclaimable_b in mod_objcg_state()
2726 : &stock->nr_slab_unreclaimable_b; in mod_objcg_state()
2759 if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { in consume_obj_stock()
2760 stock->nr_bytes -= nr_bytes; in consume_obj_stock()
2771 struct obj_cgroup *old = READ_ONCE(stock->cached_objcg); in drain_obj_stock()
2776 if (stock->nr_bytes) { in drain_obj_stock()
2777 unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT; in drain_obj_stock()
2778 unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1); in drain_obj_stock()
2785 mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); in drain_obj_stock()
2786 memcg1_account_kmem(memcg, -nr_pages); in drain_obj_stock()
2789 css_put(&memcg->css); in drain_obj_stock()
2793 * The leftover is flushed to the centralized per-memcg value. in drain_obj_stock()
2795 * to a per-cpu stock (probably, on an other CPU), see in drain_obj_stock()
2798 * How often it's flushed is a trade-off between the memory in drain_obj_stock()
2802 atomic_add(nr_bytes, &old->nr_charged_bytes); in drain_obj_stock()
2803 stock->nr_bytes = 0; in drain_obj_stock()
2809 if (stock->nr_slab_reclaimable_b || stock->nr_slab_unreclaimable_b) { in drain_obj_stock()
2810 if (stock->nr_slab_reclaimable_b) { in drain_obj_stock()
2811 __mod_objcg_mlstate(old, stock->cached_pgdat, in drain_obj_stock()
2813 stock->nr_slab_reclaimable_b); in drain_obj_stock()
2814 stock->nr_slab_reclaimable_b = 0; in drain_obj_stock()
2816 if (stock->nr_slab_unreclaimable_b) { in drain_obj_stock()
2817 __mod_objcg_mlstate(old, stock->cached_pgdat, in drain_obj_stock()
2819 stock->nr_slab_unreclaimable_b); in drain_obj_stock()
2820 stock->nr_slab_unreclaimable_b = 0; in drain_obj_stock()
2822 stock->cached_pgdat = NULL; in drain_obj_stock()
2825 WRITE_ONCE(stock->cached_objcg, NULL); in drain_obj_stock()
2836 struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg); in obj_stock_flush_required()
2859 if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ in refill_obj_stock()
2862 WRITE_ONCE(stock->cached_objcg, objcg); in refill_obj_stock()
2863 stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) in refill_obj_stock()
2864 ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; in refill_obj_stock()
2867 stock->nr_bytes += nr_bytes; in refill_obj_stock()
2869 if (allow_uncharge && (stock->nr_bytes > PAGE_SIZE)) { in refill_obj_stock()
2870 nr_pages = stock->nr_bytes >> PAGE_SHIFT; in refill_obj_stock()
2871 stock->nr_bytes &= (PAGE_SIZE - 1); in refill_obj_stock()
2890 * In theory, objcg->nr_charged_bytes can have enough in obj_cgroup_charge()
2891 * pre-charged bytes to satisfy the allocation. However, in obj_cgroup_charge()
2892 * flushing objcg->nr_charged_bytes requires two atomic in obj_cgroup_charge()
2893 * operations, and objcg->nr_charged_bytes can't be big. in obj_cgroup_charge()
2894 * The shared objcg->nr_charged_bytes can also become a in obj_cgroup_charge()
2898 * objcg->nr_charged_bytes later on when objcg changes. in obj_cgroup_charge()
2900 * The stock's nr_bytes may contain enough pre-charged bytes in obj_cgroup_charge()
2902 * on the pre-charged bytes not being changed outside of in obj_cgroup_charge()
2904 * pre-charged bytes as well when charging pages. To avoid a in obj_cgroup_charge()
2907 * to temporarily allow the pre-charged bytes to exceed the page in obj_cgroup_charge()
2908 * size limit. The maximum reachable value of the pre-charged in obj_cgroup_charge()
2909 * bytes is (sizeof(object) + PAGE_SIZE - 2) if there is no data in obj_cgroup_charge()
2913 nr_bytes = size & (PAGE_SIZE - 1); in obj_cgroup_charge()
2920 refill_obj_stock(objcg, PAGE_SIZE - nr_bytes, false); in obj_cgroup_charge()
2936 return s->size + sizeof(struct obj_cgroup *); in obj_full_size()
2973 css_put(&memcg->css); in __memcg_slab_post_alloc_hook()
3016 -obj_full_size(s)); in __memcg_slab_free_hook()
3035 folio_page(folio, i)->memcg_data = folio->memcg_data; in split_page_memcg()
3038 obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1); in split_page_memcg()
3040 css_get_many(&folio_memcg(folio)->css, old_nr / new_nr - 1); in split_page_memcg()
3055 val += total_swap_pages - get_nr_swap_pages(); in mem_cgroup_usage()
3058 val = page_counter_read(&memcg->memory); in mem_cgroup_usage()
3060 val = page_counter_read(&memcg->memsw); in mem_cgroup_usage()
3077 return -ENOMEM; in memcg_online_kmem()
3079 objcg->memcg = memcg; in memcg_online_kmem()
3080 rcu_assign_pointer(memcg->objcg, objcg); in memcg_online_kmem()
3082 memcg->orig_objcg = objcg; in memcg_online_kmem()
3086 memcg->kmemcg_id = memcg->id.id; in memcg_online_kmem()
3110 * The ordering is imposed by list_lru_node->lock taken by in memcg_offline_kmem()
3122 return wb_domain_init(&memcg->cgwb_domain, gfp); in memcg_wb_domain_init()
3127 wb_domain_exit(&memcg->cgwb_domain); in memcg_wb_domain_exit()
3132 wb_domain_size_changed(&memcg->cgwb_domain); in memcg_wb_domain_size_changed()
3137 struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); in mem_cgroup_wb_domain()
3139 if (!memcg->css.parent) in mem_cgroup_wb_domain()
3142 return &memcg->cgwb_domain; in mem_cgroup_wb_domain()
3146 * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
3154 * @wb's memcg. File, dirty and writeback are self-explanatory. Headroom
3157 * A memcg's headroom is "min(max, high) - used". In the hierarchy, the
3167 struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); in mem_cgroup_wb_stats()
3179 unsigned long ceiling = min(READ_ONCE(memcg->memory.max), in mem_cgroup_wb_stats()
3180 READ_ONCE(memcg->memory.high)); in mem_cgroup_wb_stats()
3181 unsigned long used = page_counter_read(&memcg->memory); in mem_cgroup_wb_stats()
3183 *pheadroom = min(*pheadroom, ceiling - min(ceiling, used)); in mem_cgroup_wb_stats()
3192 * tracks ownership per-page while the latter per-inode. This was a
3193 * deliberate design decision because honoring per-page ownership in the
3195 * and deemed unnecessary given that write-sharing an inode across
3196 * different cgroups isn't a common use-case.
3198 * Combined with inode majority-writer ownership switching, this works well
3219 * page - a page whose memcg and writeback ownerships don't match - is
3225 * recorded bdi_writebacks and concurrent in-flight foreign writebacks are
3239 int oldest = -1; in mem_cgroup_track_foreign_dirty_slowpath()
3250 frn = &memcg->cgwb_frn[i]; in mem_cgroup_track_foreign_dirty_slowpath()
3251 if (frn->bdi_id == wb->bdi->id && in mem_cgroup_track_foreign_dirty_slowpath()
3252 frn->memcg_id == wb->memcg_css->id) in mem_cgroup_track_foreign_dirty_slowpath()
3254 if (time_before64(frn->at, oldest_at) && in mem_cgroup_track_foreign_dirty_slowpath()
3255 atomic_read(&frn->done.cnt) == 1) { in mem_cgroup_track_foreign_dirty_slowpath()
3257 oldest_at = frn->at; in mem_cgroup_track_foreign_dirty_slowpath()
3263 * Re-using an existing one. Update timestamp lazily to in mem_cgroup_track_foreign_dirty_slowpath()
3265 * reasonably up-to-date and significantly shorter than in mem_cgroup_track_foreign_dirty_slowpath()
3273 if (time_before64(frn->at, now - update_intv)) in mem_cgroup_track_foreign_dirty_slowpath()
3274 frn->at = now; in mem_cgroup_track_foreign_dirty_slowpath()
3277 frn = &memcg->cgwb_frn[oldest]; in mem_cgroup_track_foreign_dirty_slowpath()
3278 frn->bdi_id = wb->bdi->id; in mem_cgroup_track_foreign_dirty_slowpath()
3279 frn->memcg_id = wb->memcg_css->id; in mem_cgroup_track_foreign_dirty_slowpath()
3280 frn->at = now; in mem_cgroup_track_foreign_dirty_slowpath()
3287 struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); in mem_cgroup_flush_foreign()
3293 struct memcg_cgwb_frn *frn = &memcg->cgwb_frn[i]; in mem_cgroup_flush_foreign()
3301 if (time_after64(frn->at, now - intv) && in mem_cgroup_flush_foreign()
3302 atomic_read(&frn->done.cnt) == 1) { in mem_cgroup_flush_foreign()
3303 frn->at = 0; in mem_cgroup_flush_foreign()
3304 trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id); in mem_cgroup_flush_foreign()
3305 cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, in mem_cgroup_flush_foreign()
3307 &frn->done); in mem_cgroup_flush_foreign()
3332 * Swap-out records and page cache shadow entries need to store memcg
3335 * memory-controlled cgroups to 64k.
3342 * even when there are much fewer than 64k cgroups - possibly none.
3344 * Maintain a private 16-bit ID space for memcg, and allow the ID to
3353 #define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1)
3358 if (memcg->id.id > 0) { in mem_cgroup_id_remove()
3359 xa_erase(&mem_cgroup_ids, memcg->id.id); in mem_cgroup_id_remove()
3360 memcg->id.id = 0; in mem_cgroup_id_remove()
3367 refcount_add(n, &memcg->id.ref); in mem_cgroup_id_get_many()
3372 if (refcount_sub_and_test(n, &memcg->id.ref)) { in mem_cgroup_id_put_many()
3376 css_put(&memcg->css); in mem_cgroup_id_put_many()
3386 * mem_cgroup_from_id - look up a memcg from a memcg id
3412 memcg = ERR_PTR(-ENOENT); in mem_cgroup_get_from_ino()
3428 pn->lruvec_stats = kzalloc_node(sizeof(struct lruvec_stats), in alloc_mem_cgroup_per_node_info()
3430 if (!pn->lruvec_stats) in alloc_mem_cgroup_per_node_info()
3433 pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu, in alloc_mem_cgroup_per_node_info()
3435 if (!pn->lruvec_stats_percpu) in alloc_mem_cgroup_per_node_info()
3438 lruvec_init(&pn->lruvec); in alloc_mem_cgroup_per_node_info()
3439 pn->memcg = memcg; in alloc_mem_cgroup_per_node_info()
3441 memcg->nodeinfo[node] = pn; in alloc_mem_cgroup_per_node_info()
3444 kfree(pn->lruvec_stats); in alloc_mem_cgroup_per_node_info()
3451 struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; in free_mem_cgroup_per_node_info()
3456 free_percpu(pn->lruvec_stats_percpu); in free_mem_cgroup_per_node_info()
3457 kfree(pn->lruvec_stats); in free_mem_cgroup_per_node_info()
3465 obj_cgroup_put(memcg->orig_objcg); in __mem_cgroup_free()
3470 kfree(memcg->vmstats); in __mem_cgroup_free()
3471 free_percpu(memcg->vmstats_percpu); in __mem_cgroup_free()
3492 return ERR_PTR(-ENOMEM); in mem_cgroup_alloc()
3494 error = xa_alloc(&mem_cgroup_ids, &memcg->id.id, NULL, in mem_cgroup_alloc()
3498 error = -ENOMEM; in mem_cgroup_alloc()
3500 memcg->vmstats = kzalloc(sizeof(struct memcg_vmstats), in mem_cgroup_alloc()
3502 if (!memcg->vmstats) in mem_cgroup_alloc()
3505 memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu, in mem_cgroup_alloc()
3507 if (!memcg->vmstats_percpu) in mem_cgroup_alloc()
3515 pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu); in mem_cgroup_alloc()
3516 statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); in mem_cgroup_alloc()
3517 statc->parent = parent ? pstatc : NULL; in mem_cgroup_alloc()
3518 statc->vmstats = memcg->vmstats; in mem_cgroup_alloc()
3528 INIT_WORK(&memcg->high_work, high_work_func); in mem_cgroup_alloc()
3529 vmpressure_init(&memcg->vmpressure); in mem_cgroup_alloc()
3530 INIT_LIST_HEAD(&memcg->memory_peaks); in mem_cgroup_alloc()
3531 INIT_LIST_HEAD(&memcg->swap_peaks); in mem_cgroup_alloc()
3532 spin_lock_init(&memcg->peaks_lock); in mem_cgroup_alloc()
3533 memcg->socket_pressure = jiffies; in mem_cgroup_alloc()
3535 memcg->kmemcg_id = -1; in mem_cgroup_alloc()
3536 INIT_LIST_HEAD(&memcg->objcg_list); in mem_cgroup_alloc()
3538 INIT_LIST_HEAD(&memcg->cgwb_list); in mem_cgroup_alloc()
3540 memcg->cgwb_frn[i].done = in mem_cgroup_alloc()
3544 spin_lock_init(&memcg->deferred_split_queue.split_queue_lock); in mem_cgroup_alloc()
3545 INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue); in mem_cgroup_alloc()
3546 memcg->deferred_split_queue.split_queue_len = 0; in mem_cgroup_alloc()
3568 page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX); in mem_cgroup_css_alloc()
3571 memcg->zswap_max = PAGE_COUNTER_MAX; in mem_cgroup_css_alloc()
3572 WRITE_ONCE(memcg->zswap_writeback, true); in mem_cgroup_css_alloc()
3574 page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); in mem_cgroup_css_alloc()
3576 WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent)); in mem_cgroup_css_alloc()
3578 page_counter_init(&memcg->memory, &parent->memory, true); in mem_cgroup_css_alloc()
3579 page_counter_init(&memcg->swap, &parent->swap, false); in mem_cgroup_css_alloc()
3581 WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable)); in mem_cgroup_css_alloc()
3582 page_counter_init(&memcg->kmem, &parent->kmem, false); in mem_cgroup_css_alloc()
3583 page_counter_init(&memcg->tcpmem, &parent->tcpmem, false); in mem_cgroup_css_alloc()
3588 page_counter_init(&memcg->memory, NULL, true); in mem_cgroup_css_alloc()
3589 page_counter_init(&memcg->swap, NULL, false); in mem_cgroup_css_alloc()
3591 page_counter_init(&memcg->kmem, NULL, false); in mem_cgroup_css_alloc()
3592 page_counter_init(&memcg->tcpmem, NULL, false); in mem_cgroup_css_alloc()
3595 return &memcg->css; in mem_cgroup_css_alloc()
3604 return &memcg->css; in mem_cgroup_css_alloc()
3628 refcount_set(&memcg->id.ref, 1); in mem_cgroup_css_online()
3641 xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL); in mem_cgroup_css_online()
3648 return -ENOMEM; in mem_cgroup_css_online()
3657 page_counter_set_min(&memcg->memory, 0); in mem_cgroup_css_offline()
3658 page_counter_set_low(&memcg->memory, 0); in mem_cgroup_css_offline()
3687 wb_wait_for_completion(&memcg->cgwb_frn[i].done); in mem_cgroup_css_free()
3698 vmpressure_cleanup(&memcg->vmpressure); in mem_cgroup_css_free()
3699 cancel_work_sync(&memcg->high_work); in mem_cgroup_css_free()
3706 * mem_cgroup_css_reset - reset the states of a mem_cgroup
3722 page_counter_set_max(&memcg->memory, PAGE_COUNTER_MAX); in mem_cgroup_css_reset()
3723 page_counter_set_max(&memcg->swap, PAGE_COUNTER_MAX); in mem_cgroup_css_reset()
3725 page_counter_set_max(&memcg->kmem, PAGE_COUNTER_MAX); in mem_cgroup_css_reset()
3726 page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX); in mem_cgroup_css_reset()
3728 page_counter_set_min(&memcg->memory, 0); in mem_cgroup_css_reset()
3729 page_counter_set_low(&memcg->memory, 0); in mem_cgroup_css_reset()
3730 page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX); in mem_cgroup_css_reset()
3732 page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); in mem_cgroup_css_reset()
3744 statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); in mem_cgroup_css_rstat_flush()
3749 * below us. We're in a per-cpu loop here and this is in mem_cgroup_css_rstat_flush()
3752 delta = memcg->vmstats->state_pending[i]; in mem_cgroup_css_rstat_flush()
3754 memcg->vmstats->state_pending[i] = 0; in mem_cgroup_css_rstat_flush()
3758 v = READ_ONCE(statc->state[i]); in mem_cgroup_css_rstat_flush()
3759 if (v != statc->state_prev[i]) { in mem_cgroup_css_rstat_flush()
3760 delta_cpu = v - statc->state_prev[i]; in mem_cgroup_css_rstat_flush()
3762 statc->state_prev[i] = v; in mem_cgroup_css_rstat_flush()
3767 memcg->vmstats->state_local[i] += delta_cpu; in mem_cgroup_css_rstat_flush()
3770 memcg->vmstats->state[i] += delta; in mem_cgroup_css_rstat_flush()
3772 parent->vmstats->state_pending[i] += delta; in mem_cgroup_css_rstat_flush()
3777 delta = memcg->vmstats->events_pending[i]; in mem_cgroup_css_rstat_flush()
3779 memcg->vmstats->events_pending[i] = 0; in mem_cgroup_css_rstat_flush()
3782 v = READ_ONCE(statc->events[i]); in mem_cgroup_css_rstat_flush()
3783 if (v != statc->events_prev[i]) { in mem_cgroup_css_rstat_flush()
3784 delta_cpu = v - statc->events_prev[i]; in mem_cgroup_css_rstat_flush()
3786 statc->events_prev[i] = v; in mem_cgroup_css_rstat_flush()
3790 memcg->vmstats->events_local[i] += delta_cpu; in mem_cgroup_css_rstat_flush()
3793 memcg->vmstats->events[i] += delta; in mem_cgroup_css_rstat_flush()
3795 parent->vmstats->events_pending[i] += delta; in mem_cgroup_css_rstat_flush()
3800 struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; in mem_cgroup_css_rstat_flush()
3801 struct lruvec_stats *lstats = pn->lruvec_stats; in mem_cgroup_css_rstat_flush()
3806 plstats = parent->nodeinfo[nid]->lruvec_stats; in mem_cgroup_css_rstat_flush()
3808 lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); in mem_cgroup_css_rstat_flush()
3811 delta = lstats->state_pending[i]; in mem_cgroup_css_rstat_flush()
3813 lstats->state_pending[i] = 0; in mem_cgroup_css_rstat_flush()
3816 v = READ_ONCE(lstatc->state[i]); in mem_cgroup_css_rstat_flush()
3817 if (v != lstatc->state_prev[i]) { in mem_cgroup_css_rstat_flush()
3818 delta_cpu = v - lstatc->state_prev[i]; in mem_cgroup_css_rstat_flush()
3820 lstatc->state_prev[i] = v; in mem_cgroup_css_rstat_flush()
3824 lstats->state_local[i] += delta_cpu; in mem_cgroup_css_rstat_flush()
3827 lstats->state[i] += delta; in mem_cgroup_css_rstat_flush()
3829 plstats->state_pending[i] += delta; in mem_cgroup_css_rstat_flush()
3833 WRITE_ONCE(statc->stats_updates, 0); in mem_cgroup_css_rstat_flush()
3834 /* We are in a per-cpu loop here, only do the atomic write once */ in mem_cgroup_css_rstat_flush()
3835 if (atomic64_read(&memcg->vmstats->stats_updates)) in mem_cgroup_css_rstat_flush()
3836 atomic64_set(&memcg->vmstats->stats_updates, 0); in mem_cgroup_css_rstat_flush()
3842 * Set the update flag to cause task->objcg to be initialized lazily in mem_cgroup_fork()
3847 task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG; in mem_cgroup_fork()
3852 struct obj_cgroup *objcg = task->objcg; in mem_cgroup_exit()
3864 task->objcg = NULL; in mem_cgroup_exit()
3881 if (task->mm && READ_ONCE(task->mm->owner) == task) in mem_cgroup_lru_gen_attach()
3882 lru_gen_migrate_mm(task->mm); in mem_cgroup_lru_gen_attach()
3896 set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg); in mem_cgroup_kmem_attach()
3921 return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE; in memory_current_read()
3924 #define OFP_PEAK_UNSET (((-1UL)))
3928 struct cgroup_of_peak *ofp = of_peak(sf->private); in peak_show()
3929 u64 fd_peak = READ_ONCE(ofp->value), peak; in peak_show()
3933 peak = pc->watermark; in peak_show()
3935 peak = max(fd_peak, READ_ONCE(pc->local_watermark)); in peak_show()
3945 return peak_show(sf, v, &memcg->memory); in memory_peak_show()
3952 ofp->value = OFP_PEAK_UNSET; in peak_open()
3961 if (ofp->value == OFP_PEAK_UNSET) { in peak_release()
3965 spin_lock(&memcg->peaks_lock); in peak_release()
3966 list_del(&ofp->list); in peak_release()
3967 spin_unlock(&memcg->peaks_lock); in peak_release()
3979 spin_lock(&memcg->peaks_lock); in peak_write()
3982 WRITE_ONCE(pc->local_watermark, usage); in peak_write()
3985 if (usage > peer_ctx->value) in peak_write()
3986 WRITE_ONCE(peer_ctx->value, usage); in peak_write()
3989 if (ofp->value == -1) in peak_write()
3990 list_add(&ofp->list, watchers); in peak_write()
3992 WRITE_ONCE(ofp->value, usage); in peak_write()
3993 spin_unlock(&memcg->peaks_lock); in peak_write()
4003 return peak_write(of, buf, nbytes, off, &memcg->memory, in memory_peak_write()
4004 &memcg->memory_peaks); in memory_peak_write()
4012 READ_ONCE(mem_cgroup_from_seq(m)->memory.min)); in memory_min_show()
4027 page_counter_set_min(&memcg->memory, min); in memory_min_write()
4035 READ_ONCE(mem_cgroup_from_seq(m)->memory.low)); in memory_low_show()
4050 page_counter_set_low(&memcg->memory, low); in memory_low_write()
4058 READ_ONCE(mem_cgroup_from_seq(m)->memory.high)); in memory_high_show()
4075 page_counter_set_high(&memcg->memory, high); in memory_high_write()
4078 unsigned long nr_pages = page_counter_read(&memcg->memory); in memory_high_write()
4093 reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, in memory_high_write()
4096 if (!reclaimed && !nr_retries--) in memory_high_write()
4107 READ_ONCE(mem_cgroup_from_seq(m)->memory.max)); in memory_max_show()
4124 xchg(&memcg->memory.max, max); in memory_max_write()
4127 unsigned long nr_pages = page_counter_read(&memcg->memory); in memory_max_write()
4142 if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, in memory_max_write()
4144 nr_reclaims--; in memory_max_write()
4177 __memory_events_show(m, memcg->memory_events); in memory_events_show()
4185 __memory_events_show(m, memcg->memory_events_local); in memory_events_local_show()
4196 return -ENOMEM; in memory_stat_show()
4246 seq_printf(m, "%d\n", READ_ONCE(memcg->oom_group)); in memory_oom_group_show()
4259 return -EINVAL; in memory_oom_group_write()
4266 return -EINVAL; in memory_oom_group_write()
4268 WRITE_ONCE(memcg->oom_group, oom_group); in memory_oom_group_write()
4289 int swappiness = -1; in memory_reclaim()
4299 return -EINVAL; in memory_reclaim()
4309 return -EINVAL; in memory_reclaim()
4311 return -EINVAL; in memory_reclaim()
4314 return -EINVAL; in memory_reclaim()
4321 unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4; in memory_reclaim()
4325 return -EINTR; in memory_reclaim()
4338 swappiness == -1 ? NULL : &swappiness); in memory_reclaim()
4340 if (!reclaimed && !nr_retries--) in memory_reclaim()
4341 return -EAGAIN; in memory_reclaim()
4445 * mem_cgroup_calculate_protection - check if memory consumption is in the normal range
4446 * @root: the top ancestor of the sub-tree being checked
4450 * of a top-down tree iteration, not for isolated queries.
4464 page_counter_calculate_protection(&root->memory, &memcg->memory, recursive_protection); in mem_cgroup_calculate_protection()
4488 css_put(&memcg->css); in __mem_cgroup_charge()
4494 * mem_cgroup_hugetlb_try_charge - try to charge the memcg for a hugetlb folio
4520 return -EOPNOTSUPP; in mem_cgroup_hugetlb_try_charge()
4523 return -ENOMEM; in mem_cgroup_hugetlb_try_charge()
4529 * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
4553 if (!memcg || !css_tryget_online(&memcg->css)) in mem_cgroup_swapin_charge_folio()
4559 css_put(&memcg->css); in mem_cgroup_swapin_charge_folio()
4564 * mem_cgroup_swapin_uncharge_swap - uncharge swap slot
4583 * so this is a non-issue here. Memory and swap charge lifetimes in mem_cgroup_swapin_uncharge_swap()
4612 if (ug->nr_memory) { in uncharge_batch()
4613 page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); in uncharge_batch()
4615 page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory); in uncharge_batch()
4616 if (ug->nr_kmem) { in uncharge_batch()
4617 mod_memcg_state(ug->memcg, MEMCG_KMEM, -ug->nr_kmem); in uncharge_batch()
4618 memcg1_account_kmem(ug->memcg, -ug->nr_kmem); in uncharge_batch()
4620 memcg1_oom_recover(ug->memcg); in uncharge_batch()
4623 memcg1_uncharge_batch(ug->memcg, ug->pgpgout, ug->nr_memory, ug->nid); in uncharge_batch()
4626 css_put(&ug->memcg->css); in uncharge_batch()
4656 if (ug->memcg != memcg) { in uncharge_folio()
4657 if (ug->memcg) { in uncharge_folio()
4661 ug->memcg = memcg; in uncharge_folio()
4662 ug->nid = folio_nid(folio); in uncharge_folio()
4665 css_get(&memcg->css); in uncharge_folio()
4671 ug->nr_memory += nr_pages; in uncharge_folio()
4672 ug->nr_kmem += nr_pages; in uncharge_folio()
4674 folio->memcg_data = 0; in uncharge_folio()
4679 ug->nr_memory += nr_pages; in uncharge_folio()
4680 ug->pgpgout++; in uncharge_folio()
4683 folio->memcg_data = 0; in uncharge_folio()
4686 css_put(&memcg->css); in uncharge_folio()
4693 /* Don't touch folio->lru of any random page, pre-check: */ in __mem_cgroup_uncharge()
4708 for (i = 0; i < folios->nr; i++) in __mem_cgroup_uncharge_folios()
4709 uncharge_folio(folios->folios[i], &ug); in __mem_cgroup_uncharge_folios()
4715 * mem_cgroup_replace_folio - Charge a folio's replacement.
4722 * Both folios must be locked, @new->mapping must be set up.
4746 /* Force-charge the new page. The old one will be freed soon */ in mem_cgroup_replace_folio()
4748 page_counter_charge(&memcg->memory, nr_pages); in mem_cgroup_replace_folio()
4750 page_counter_charge(&memcg->memsw, nr_pages); in mem_cgroup_replace_folio()
4753 css_get(&memcg->css); in mem_cgroup_replace_folio()
4759 * mem_cgroup_migrate - Transfer the memcg data from the old to the new folio.
4767 * Both folios must be locked, @new->mapping must be set up.
4795 /* Warning should never happen, so don't worry about refcount non-0 */ in mem_cgroup_migrate()
4797 old->memcg_data = 0; in mem_cgroup_migrate()
4820 if (css_tryget(&memcg->css)) in mem_cgroup_sk_alloc()
4821 sk->sk_memcg = memcg; in mem_cgroup_sk_alloc()
4828 if (sk->sk_memcg) in mem_cgroup_sk_free()
4829 css_put(&sk->sk_memcg->css); in mem_cgroup_sk_free()
4833 * mem_cgroup_charge_skmem - charge socket memory
4856 * mem_cgroup_uncharge_skmem - uncharge socket memory
4867 mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages); in mem_cgroup_uncharge_skmem()
4894 * context because of lock dependencies (cgroup_lock -> cpu hotplug) but
4904 * used for per-memcg-per-cpu caching of per-node statistics. In order in mem_cgroup_init()
4914 INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, in mem_cgroup_init()
4924 while (!refcount_inc_not_zero(&memcg->id.ref)) { in mem_cgroup_id_get_online()
4941 * mem_cgroup_swapout - transfer a memsw charge to swap
4977 mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); in mem_cgroup_swapout()
4984 folio->memcg_data = 0; in mem_cgroup_swapout()
4987 page_counter_uncharge(&memcg->memory, nr_entries); in mem_cgroup_swapout()
4991 page_counter_charge(&swap_memcg->memsw, nr_entries); in mem_cgroup_swapout()
4992 page_counter_uncharge(&memcg->memsw, nr_entries); in mem_cgroup_swapout()
4996 css_put(&memcg->css); in mem_cgroup_swapout()
5000 * __mem_cgroup_try_charge_swap - try charging swap space for a folio
5006 * Returns 0 on success, -ENOMEM on failure.
5032 !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { in __mem_cgroup_try_charge_swap()
5036 return -ENOMEM; in __mem_cgroup_try_charge_swap()
5041 mem_cgroup_id_get_many(memcg, nr_pages - 1); in __mem_cgroup_try_charge_swap()
5050 * __mem_cgroup_uncharge_swap - uncharge swap space
5065 page_counter_uncharge(&memcg->memsw, nr_pages); in __mem_cgroup_uncharge_swap()
5067 page_counter_uncharge(&memcg->swap, nr_pages); in __mem_cgroup_uncharge_swap()
5069 mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages); in __mem_cgroup_uncharge_swap()
5083 READ_ONCE(memcg->swap.max) - in mem_cgroup_get_nr_swap_pages()
5084 page_counter_read(&memcg->swap)); in mem_cgroup_get_nr_swap_pages()
5104 unsigned long usage = page_counter_read(&memcg->swap); in mem_cgroup_swap_full()
5106 if (usage * 2 >= READ_ONCE(memcg->swap.high) || in mem_cgroup_swap_full()
5107 usage * 2 >= READ_ONCE(memcg->swap.max)) in mem_cgroup_swap_full()
5121 "Please report your usecase to linux-mm@kvack.org if you " in setup_swap_account()
5132 return (u64)page_counter_read(&memcg->swap) * PAGE_SIZE; in swap_current_read()
5139 return peak_show(sf, v, &memcg->swap); in swap_peak_show()
5147 return peak_write(of, buf, nbytes, off, &memcg->swap, in swap_peak_write()
5148 &memcg->swap_peaks); in swap_peak_write()
5154 READ_ONCE(mem_cgroup_from_seq(m)->swap.high)); in swap_high_show()
5169 page_counter_set_high(&memcg->swap, high); in swap_high_write()
5177 READ_ONCE(mem_cgroup_from_seq(m)->swap.max)); in swap_max_show()
5192 xchg(&memcg->swap.max, max); in swap_max_write()
5202 atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH])); in swap_events_show()
5204 atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX])); in swap_events_show()
5206 atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL])); in swap_events_show()
5248 * obj_cgroup_may_zswap - check if this cgroup can zswap
5255 * once compression has occurred, and this optimistic pre-check avoids
5270 unsigned long max = READ_ONCE(memcg->zswap_max); in obj_cgroup_may_zswap()
5296 * obj_cgroup_charge_zswap - charge compression backend memory
5310 VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC)); in obj_cgroup_charge_zswap()
5324 * obj_cgroup_uncharge_zswap - uncharge compression backend memory
5341 mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size); in obj_cgroup_uncharge_zswap()
5342 mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1); in obj_cgroup_uncharge_zswap()
5353 if (!READ_ONCE(memcg->zswap_writeback)) in mem_cgroup_zswap_writeback_enabled()
5371 READ_ONCE(mem_cgroup_from_seq(m)->zswap_max)); in zswap_max_show()
5386 xchg(&memcg->zswap_max, max); in zswap_max_write()
5395 seq_printf(m, "%d\n", READ_ONCE(memcg->zswap_writeback)); in zswap_writeback_show()
5410 return -EINVAL; in zswap_writeback_write()
5412 WRITE_ONCE(memcg->zswap_writeback, zswap_writeback); in zswap_writeback_write()