1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2f6ac2354SChristoph Lameter /* 3f6ac2354SChristoph Lameter * linux/mm/vmstat.c 4f6ac2354SChristoph Lameter * 5f6ac2354SChristoph Lameter * Manages VM statistics 6f6ac2354SChristoph Lameter * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 72244b95aSChristoph Lameter * 82244b95aSChristoph Lameter * zoned VM statistics 92244b95aSChristoph Lameter * Copyright (C) 2006 Silicon Graphics, Inc., 102244b95aSChristoph Lameter * Christoph Lameter <christoph@lameter.com> 117cc36bbdSChristoph Lameter * Copyright (C) 2008-2014 Christoph Lameter 12f6ac2354SChristoph Lameter */ 138f32f7e5SAlexey Dobriyan #include <linux/fs.h> 14f6ac2354SChristoph Lameter #include <linux/mm.h> 154e950f6fSAlexey Dobriyan #include <linux/err.h> 162244b95aSChristoph Lameter #include <linux/module.h> 175a0e3ad6STejun Heo #include <linux/slab.h> 18df9ecabaSChristoph Lameter #include <linux/cpu.h> 197cc36bbdSChristoph Lameter #include <linux/cpumask.h> 20c748e134SAdrian Bunk #include <linux/vmstat.h> 213c486871SAndrew Morton #include <linux/proc_fs.h> 223c486871SAndrew Morton #include <linux/seq_file.h> 233c486871SAndrew Morton #include <linux/debugfs.h> 24e8edc6e0SAlexey Dobriyan #include <linux/sched.h> 25f1a5ab12SMel Gorman #include <linux/math64.h> 2679da826aSMichael Rubin #include <linux/writeback.h> 2736deb0beSNamhyung Kim #include <linux/compaction.h> 286e543d57SLisa Du #include <linux/mm_inline.h> 2948c96a36SJoonsoo Kim #include <linux/page_ext.h> 3048c96a36SJoonsoo Kim #include <linux/page_owner.h> 316e543d57SLisa Du 326e543d57SLisa Du #include "internal.h" 33f6ac2354SChristoph Lameter 344518085eSKemi Wang #ifdef CONFIG_NUMA 354518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; 364518085eSKemi Wang 374518085eSKemi Wang /* zero numa counters within a zone */ 384518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone) 394518085eSKemi Wang { 404518085eSKemi Wang int item, cpu; 414518085eSKemi Wang 42f19298b9SMel Gorman for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) { 43f19298b9SMel Gorman atomic_long_set(&zone->vm_numa_event[item], 0); 44f19298b9SMel Gorman for_each_online_cpu(cpu) { 45f19298b9SMel Gorman per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item] 464518085eSKemi Wang = 0; 474518085eSKemi Wang } 484518085eSKemi Wang } 49f19298b9SMel Gorman } 504518085eSKemi Wang 514518085eSKemi Wang /* zero numa counters of all the populated zones */ 524518085eSKemi Wang static void zero_zones_numa_counters(void) 534518085eSKemi Wang { 544518085eSKemi Wang struct zone *zone; 554518085eSKemi Wang 564518085eSKemi Wang for_each_populated_zone(zone) 574518085eSKemi Wang zero_zone_numa_counters(zone); 584518085eSKemi Wang } 594518085eSKemi Wang 604518085eSKemi Wang /* zero global numa counters */ 614518085eSKemi Wang static void zero_global_numa_counters(void) 624518085eSKemi Wang { 634518085eSKemi Wang int item; 644518085eSKemi Wang 65f19298b9SMel Gorman for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 66f19298b9SMel Gorman atomic_long_set(&vm_numa_event[item], 0); 674518085eSKemi Wang } 684518085eSKemi Wang 694518085eSKemi Wang static void invalid_numa_statistics(void) 704518085eSKemi Wang { 714518085eSKemi Wang zero_zones_numa_counters(); 724518085eSKemi Wang zero_global_numa_counters(); 734518085eSKemi Wang } 744518085eSKemi Wang 754518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock); 764518085eSKemi Wang 774518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, 7832927393SChristoph Hellwig void *buffer, size_t *length, loff_t *ppos) 794518085eSKemi Wang { 804518085eSKemi Wang int ret, oldval; 814518085eSKemi Wang 824518085eSKemi Wang mutex_lock(&vm_numa_stat_lock); 834518085eSKemi Wang if (write) 844518085eSKemi Wang oldval = sysctl_vm_numa_stat; 854518085eSKemi Wang ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 864518085eSKemi Wang if (ret || !write) 874518085eSKemi Wang goto out; 884518085eSKemi Wang 894518085eSKemi Wang if (oldval == sysctl_vm_numa_stat) 904518085eSKemi Wang goto out; 914518085eSKemi Wang else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) { 924518085eSKemi Wang static_branch_enable(&vm_numa_stat_key); 934518085eSKemi Wang pr_info("enable numa statistics\n"); 944518085eSKemi Wang } else { 954518085eSKemi Wang static_branch_disable(&vm_numa_stat_key); 964518085eSKemi Wang invalid_numa_statistics(); 974518085eSKemi Wang pr_info("disable numa statistics, and clear numa counters\n"); 984518085eSKemi Wang } 994518085eSKemi Wang 1004518085eSKemi Wang out: 1014518085eSKemi Wang mutex_unlock(&vm_numa_stat_lock); 1024518085eSKemi Wang return ret; 1034518085eSKemi Wang } 1044518085eSKemi Wang #endif 1054518085eSKemi Wang 106f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 107f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 108f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states); 109f8891e5eSChristoph Lameter 11031f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret) 111f8891e5eSChristoph Lameter { 1129eccf2a8SChristoph Lameter int cpu; 113f8891e5eSChristoph Lameter int i; 114f8891e5eSChristoph Lameter 115f8891e5eSChristoph Lameter memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 116f8891e5eSChristoph Lameter 11731f961a8SMinchan Kim for_each_online_cpu(cpu) { 118f8891e5eSChristoph Lameter struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 119f8891e5eSChristoph Lameter 120f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 121f8891e5eSChristoph Lameter ret[i] += this->event[i]; 122f8891e5eSChristoph Lameter } 123f8891e5eSChristoph Lameter } 124f8891e5eSChristoph Lameter 125f8891e5eSChristoph Lameter /* 126f8891e5eSChristoph Lameter * Accumulate the vm event counters across all CPUs. 127f8891e5eSChristoph Lameter * The result is unavoidably approximate - it can change 128f8891e5eSChristoph Lameter * during and after execution of this function. 129f8891e5eSChristoph Lameter */ 130f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret) 131f8891e5eSChristoph Lameter { 1327625eccdSSebastian Andrzej Siewior cpus_read_lock(); 13331f961a8SMinchan Kim sum_vm_events(ret); 1347625eccdSSebastian Andrzej Siewior cpus_read_unlock(); 135f8891e5eSChristoph Lameter } 13632dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events); 137f8891e5eSChristoph Lameter 138f8891e5eSChristoph Lameter /* 139f8891e5eSChristoph Lameter * Fold the foreign cpu events into our own. 140f8891e5eSChristoph Lameter * 141f8891e5eSChristoph Lameter * This is adding to the events on one processor 142f8891e5eSChristoph Lameter * but keeps the global counts constant. 143f8891e5eSChristoph Lameter */ 144f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu) 145f8891e5eSChristoph Lameter { 146f8891e5eSChristoph Lameter struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 147f8891e5eSChristoph Lameter int i; 148f8891e5eSChristoph Lameter 149f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 150f8891e5eSChristoph Lameter count_vm_events(i, fold_state->event[i]); 151f8891e5eSChristoph Lameter fold_state->event[i] = 0; 152f8891e5eSChristoph Lameter } 153f8891e5eSChristoph Lameter } 154f8891e5eSChristoph Lameter 155f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */ 156f8891e5eSChristoph Lameter 1572244b95aSChristoph Lameter /* 1582244b95aSChristoph Lameter * Manage combined zone based / global counters 1592244b95aSChristoph Lameter * 1602244b95aSChristoph Lameter * vm_stat contains the global counters 1612244b95aSChristoph Lameter */ 16275ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; 16375ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; 164f19298b9SMel Gorman atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp; 16575ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat); 16675ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat); 1672244b95aSChristoph Lameter 1682244b95aSChristoph Lameter #ifdef CONFIG_SMP 1692244b95aSChristoph Lameter 170b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone) 17188f5acf8SMel Gorman { 17288f5acf8SMel Gorman int threshold; 17388f5acf8SMel Gorman int watermark_distance; 17488f5acf8SMel Gorman 17588f5acf8SMel Gorman /* 17688f5acf8SMel Gorman * As vmstats are not up to date, there is drift between the estimated 17788f5acf8SMel Gorman * and real values. For high thresholds and a high number of CPUs, it 17888f5acf8SMel Gorman * is possible for the min watermark to be breached while the estimated 17988f5acf8SMel Gorman * value looks fine. The pressure threshold is a reduced value such 18088f5acf8SMel Gorman * that even the maximum amount of drift will not accidentally breach 18188f5acf8SMel Gorman * the min watermark 18288f5acf8SMel Gorman */ 18388f5acf8SMel Gorman watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone); 18488f5acf8SMel Gorman threshold = max(1, (int)(watermark_distance / num_online_cpus())); 18588f5acf8SMel Gorman 18688f5acf8SMel Gorman /* 18788f5acf8SMel Gorman * Maximum threshold is 125 18888f5acf8SMel Gorman */ 18988f5acf8SMel Gorman threshold = min(125, threshold); 19088f5acf8SMel Gorman 19188f5acf8SMel Gorman return threshold; 19288f5acf8SMel Gorman } 19388f5acf8SMel Gorman 194b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone) 195df9ecabaSChristoph Lameter { 196df9ecabaSChristoph Lameter int threshold; 197df9ecabaSChristoph Lameter int mem; /* memory in 128 MB units */ 1982244b95aSChristoph Lameter 1992244b95aSChristoph Lameter /* 200df9ecabaSChristoph Lameter * The threshold scales with the number of processors and the amount 201df9ecabaSChristoph Lameter * of memory per zone. More memory means that we can defer updates for 202df9ecabaSChristoph Lameter * longer, more processors could lead to more contention. 203df9ecabaSChristoph Lameter * fls() is used to have a cheap way of logarithmic scaling. 2042244b95aSChristoph Lameter * 205df9ecabaSChristoph Lameter * Some sample thresholds: 206df9ecabaSChristoph Lameter * 207ea15ba17SMiaohe Lin * Threshold Processors (fls) Zonesize fls(mem)+1 208df9ecabaSChristoph Lameter * ------------------------------------------------------------------ 209df9ecabaSChristoph Lameter * 8 1 1 0.9-1 GB 4 210df9ecabaSChristoph Lameter * 16 2 2 0.9-1 GB 4 211df9ecabaSChristoph Lameter * 20 2 2 1-2 GB 5 212df9ecabaSChristoph Lameter * 24 2 2 2-4 GB 6 213df9ecabaSChristoph Lameter * 28 2 2 4-8 GB 7 214df9ecabaSChristoph Lameter * 32 2 2 8-16 GB 8 215df9ecabaSChristoph Lameter * 4 2 2 <128M 1 216df9ecabaSChristoph Lameter * 30 4 3 2-4 GB 5 217df9ecabaSChristoph Lameter * 48 4 3 8-16 GB 8 218df9ecabaSChristoph Lameter * 32 8 4 1-2 GB 4 219df9ecabaSChristoph Lameter * 32 8 4 0.9-1GB 4 220df9ecabaSChristoph Lameter * 10 16 5 <128M 1 221df9ecabaSChristoph Lameter * 40 16 5 900M 4 222df9ecabaSChristoph Lameter * 70 64 7 2-4 GB 5 223df9ecabaSChristoph Lameter * 84 64 7 4-8 GB 6 224df9ecabaSChristoph Lameter * 108 512 9 4-8 GB 6 225df9ecabaSChristoph Lameter * 125 1024 10 8-16 GB 8 226df9ecabaSChristoph Lameter * 125 1024 10 16-32 GB 9 2272244b95aSChristoph Lameter */ 228df9ecabaSChristoph Lameter 2299705bea5SArun KS mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT); 230df9ecabaSChristoph Lameter 231df9ecabaSChristoph Lameter threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 232df9ecabaSChristoph Lameter 233df9ecabaSChristoph Lameter /* 234df9ecabaSChristoph Lameter * Maximum threshold is 125 235df9ecabaSChristoph Lameter */ 236df9ecabaSChristoph Lameter threshold = min(125, threshold); 237df9ecabaSChristoph Lameter 238df9ecabaSChristoph Lameter return threshold; 239df9ecabaSChristoph Lameter } 240df9ecabaSChristoph Lameter 241df9ecabaSChristoph Lameter /* 242df9ecabaSChristoph Lameter * Refresh the thresholds for each zone. 243df9ecabaSChristoph Lameter */ 244a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void) 2452244b95aSChristoph Lameter { 24675ef7184SMel Gorman struct pglist_data *pgdat; 247df9ecabaSChristoph Lameter struct zone *zone; 248df9ecabaSChristoph Lameter int cpu; 249df9ecabaSChristoph Lameter int threshold; 250df9ecabaSChristoph Lameter 25175ef7184SMel Gorman /* Zero current pgdat thresholds */ 25275ef7184SMel Gorman for_each_online_pgdat(pgdat) { 25375ef7184SMel Gorman for_each_online_cpu(cpu) { 25475ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0; 25575ef7184SMel Gorman } 25675ef7184SMel Gorman } 25775ef7184SMel Gorman 258ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 25975ef7184SMel Gorman struct pglist_data *pgdat = zone->zone_pgdat; 260aa454840SChristoph Lameter unsigned long max_drift, tolerate_drift; 261aa454840SChristoph Lameter 262b44129b3SMel Gorman threshold = calculate_normal_threshold(zone); 263df9ecabaSChristoph Lameter 26475ef7184SMel Gorman for_each_online_cpu(cpu) { 26575ef7184SMel Gorman int pgdat_threshold; 26675ef7184SMel Gorman 26728f836b6SMel Gorman per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold 26899dcc3e5SChristoph Lameter = threshold; 2691d90ca89SKemi Wang 27075ef7184SMel Gorman /* Base nodestat threshold on the largest populated zone. */ 27175ef7184SMel Gorman pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; 27275ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold 27375ef7184SMel Gorman = max(threshold, pgdat_threshold); 27475ef7184SMel Gorman } 27575ef7184SMel Gorman 276aa454840SChristoph Lameter /* 277aa454840SChristoph Lameter * Only set percpu_drift_mark if there is a danger that 278aa454840SChristoph Lameter * NR_FREE_PAGES reports the low watermark is ok when in fact 279aa454840SChristoph Lameter * the min watermark could be breached by an allocation 280aa454840SChristoph Lameter */ 281aa454840SChristoph Lameter tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 282aa454840SChristoph Lameter max_drift = num_online_cpus() * threshold; 283aa454840SChristoph Lameter if (max_drift > tolerate_drift) 284aa454840SChristoph Lameter zone->percpu_drift_mark = high_wmark_pages(zone) + 285aa454840SChristoph Lameter max_drift; 286df9ecabaSChristoph Lameter } 2872244b95aSChristoph Lameter } 2882244b95aSChristoph Lameter 289b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat, 290b44129b3SMel Gorman int (*calculate_pressure)(struct zone *)) 29188f5acf8SMel Gorman { 29288f5acf8SMel Gorman struct zone *zone; 29388f5acf8SMel Gorman int cpu; 29488f5acf8SMel Gorman int threshold; 29588f5acf8SMel Gorman int i; 29688f5acf8SMel Gorman 29788f5acf8SMel Gorman for (i = 0; i < pgdat->nr_zones; i++) { 29888f5acf8SMel Gorman zone = &pgdat->node_zones[i]; 29988f5acf8SMel Gorman if (!zone->percpu_drift_mark) 30088f5acf8SMel Gorman continue; 30188f5acf8SMel Gorman 302b44129b3SMel Gorman threshold = (*calculate_pressure)(zone); 3031d90ca89SKemi Wang for_each_online_cpu(cpu) 30428f836b6SMel Gorman per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold 30588f5acf8SMel Gorman = threshold; 30688f5acf8SMel Gorman } 30788f5acf8SMel Gorman } 30888f5acf8SMel Gorman 3092244b95aSChristoph Lameter /* 310bea04b07SJianyu Zhan * For use when we know that interrupts are disabled, 311bea04b07SJianyu Zhan * or when we know that preemption is disabled and that 312bea04b07SJianyu Zhan * particular counter cannot be updated from interrupt context. 3132244b95aSChristoph Lameter */ 3142244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 3156cdb18adSHeiko Carstens long delta) 3162244b95aSChristoph Lameter { 31728f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 31812938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 3192244b95aSChristoph Lameter long x; 32012938a92SChristoph Lameter long t; 3212244b95aSChristoph Lameter 322*c68ed794SIngo Molnar /* 323*c68ed794SIngo Molnar * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels, 324*c68ed794SIngo Molnar * atomicity is provided by IRQs being disabled -- either explicitly 325*c68ed794SIngo Molnar * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables 326*c68ed794SIngo Molnar * CPU migrations and preemption potentially corrupts a counter so 327*c68ed794SIngo Molnar * disable preemption. 328*c68ed794SIngo Molnar */ 329*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 330*c68ed794SIngo Molnar preempt_disable(); 331*c68ed794SIngo Molnar 33212938a92SChristoph Lameter x = delta + __this_cpu_read(*p); 3332244b95aSChristoph Lameter 33412938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 33512938a92SChristoph Lameter 33640610076SMiaohe Lin if (unlikely(abs(x) > t)) { 3372244b95aSChristoph Lameter zone_page_state_add(x, zone, item); 3382244b95aSChristoph Lameter x = 0; 3392244b95aSChristoph Lameter } 34012938a92SChristoph Lameter __this_cpu_write(*p, x); 341*c68ed794SIngo Molnar 342*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 343*c68ed794SIngo Molnar preempt_enable(); 3442244b95aSChristoph Lameter } 3452244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state); 3462244b95aSChristoph Lameter 34775ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 34875ef7184SMel Gorman long delta) 34975ef7184SMel Gorman { 35075ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 35175ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 35275ef7184SMel Gorman long x; 35375ef7184SMel Gorman long t; 35475ef7184SMel Gorman 355ea426c2aSRoman Gushchin if (vmstat_item_in_bytes(item)) { 356629484aeSJohannes Weiner /* 357629484aeSJohannes Weiner * Only cgroups use subpage accounting right now; at 358629484aeSJohannes Weiner * the global level, these items still change in 359629484aeSJohannes Weiner * multiples of whole pages. Store them as pages 360629484aeSJohannes Weiner * internally to keep the per-cpu counters compact. 361629484aeSJohannes Weiner */ 362ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 363ea426c2aSRoman Gushchin delta >>= PAGE_SHIFT; 364ea426c2aSRoman Gushchin } 365ea426c2aSRoman Gushchin 366*c68ed794SIngo Molnar /* See __mod_node_page_state */ 367*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 368*c68ed794SIngo Molnar preempt_disable(); 369*c68ed794SIngo Molnar 37075ef7184SMel Gorman x = delta + __this_cpu_read(*p); 37175ef7184SMel Gorman 37275ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 37375ef7184SMel Gorman 37440610076SMiaohe Lin if (unlikely(abs(x) > t)) { 37575ef7184SMel Gorman node_page_state_add(x, pgdat, item); 37675ef7184SMel Gorman x = 0; 37775ef7184SMel Gorman } 37875ef7184SMel Gorman __this_cpu_write(*p, x); 379*c68ed794SIngo Molnar 380*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 381*c68ed794SIngo Molnar preempt_enable(); 38275ef7184SMel Gorman } 38375ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state); 38475ef7184SMel Gorman 3852244b95aSChristoph Lameter /* 3862244b95aSChristoph Lameter * Optimized increment and decrement functions. 3872244b95aSChristoph Lameter * 3882244b95aSChristoph Lameter * These are only for a single page and therefore can take a struct page * 3892244b95aSChristoph Lameter * argument instead of struct zone *. This allows the inclusion of the code 3902244b95aSChristoph Lameter * generated for page_zone(page) into the optimized functions. 3912244b95aSChristoph Lameter * 3922244b95aSChristoph Lameter * No overflow check is necessary and therefore the differential can be 3932244b95aSChristoph Lameter * incremented or decremented in place which may allow the compilers to 3942244b95aSChristoph Lameter * generate better code. 3952244b95aSChristoph Lameter * The increment or decrement is known and therefore one boundary check can 3962244b95aSChristoph Lameter * be omitted. 3972244b95aSChristoph Lameter * 398df9ecabaSChristoph Lameter * NOTE: These functions are very performance sensitive. Change only 399df9ecabaSChristoph Lameter * with care. 400df9ecabaSChristoph Lameter * 4012244b95aSChristoph Lameter * Some processors have inc/dec instructions that are atomic vs an interrupt. 4022244b95aSChristoph Lameter * However, the code must first determine the differential location in a zone 4032244b95aSChristoph Lameter * based on the processor number and then inc/dec the counter. There is no 4042244b95aSChristoph Lameter * guarantee without disabling preemption that the processor will not change 4052244b95aSChristoph Lameter * in between and therefore the atomicity vs. interrupt cannot be exploited 4062244b95aSChristoph Lameter * in a useful way here. 4072244b95aSChristoph Lameter */ 408c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 4092244b95aSChristoph Lameter { 41028f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 41112938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 41212938a92SChristoph Lameter s8 v, t; 4132244b95aSChristoph Lameter 414*c68ed794SIngo Molnar /* See __mod_node_page_state */ 415*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 416*c68ed794SIngo Molnar preempt_disable(); 417*c68ed794SIngo Molnar 418908ee0f1SChristoph Lameter v = __this_cpu_inc_return(*p); 41912938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 42012938a92SChristoph Lameter if (unlikely(v > t)) { 42112938a92SChristoph Lameter s8 overstep = t >> 1; 4222244b95aSChristoph Lameter 42312938a92SChristoph Lameter zone_page_state_add(v + overstep, zone, item); 42412938a92SChristoph Lameter __this_cpu_write(*p, -overstep); 4252244b95aSChristoph Lameter } 426*c68ed794SIngo Molnar 427*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 428*c68ed794SIngo Molnar preempt_enable(); 4292244b95aSChristoph Lameter } 430ca889e6cSChristoph Lameter 43175ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 43275ef7184SMel Gorman { 43375ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 43475ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 43575ef7184SMel Gorman s8 v, t; 43675ef7184SMel Gorman 437ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 438ea426c2aSRoman Gushchin 439*c68ed794SIngo Molnar /* See __mod_node_page_state */ 440*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 441*c68ed794SIngo Molnar preempt_disable(); 442*c68ed794SIngo Molnar 44375ef7184SMel Gorman v = __this_cpu_inc_return(*p); 44475ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 44575ef7184SMel Gorman if (unlikely(v > t)) { 44675ef7184SMel Gorman s8 overstep = t >> 1; 44775ef7184SMel Gorman 44875ef7184SMel Gorman node_page_state_add(v + overstep, pgdat, item); 44975ef7184SMel Gorman __this_cpu_write(*p, -overstep); 45075ef7184SMel Gorman } 451*c68ed794SIngo Molnar 452*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 453*c68ed794SIngo Molnar preempt_enable(); 45475ef7184SMel Gorman } 45575ef7184SMel Gorman 456ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 457ca889e6cSChristoph Lameter { 458ca889e6cSChristoph Lameter __inc_zone_state(page_zone(page), item); 459ca889e6cSChristoph Lameter } 4602244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state); 4612244b95aSChristoph Lameter 46275ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item) 46375ef7184SMel Gorman { 46475ef7184SMel Gorman __inc_node_state(page_pgdat(page), item); 46575ef7184SMel Gorman } 46675ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state); 46775ef7184SMel Gorman 468c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 4692244b95aSChristoph Lameter { 47028f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 47112938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 47212938a92SChristoph Lameter s8 v, t; 4732244b95aSChristoph Lameter 474*c68ed794SIngo Molnar /* See __mod_node_page_state */ 475*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 476*c68ed794SIngo Molnar preempt_disable(); 477*c68ed794SIngo Molnar 478908ee0f1SChristoph Lameter v = __this_cpu_dec_return(*p); 47912938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 48012938a92SChristoph Lameter if (unlikely(v < - t)) { 48112938a92SChristoph Lameter s8 overstep = t >> 1; 4822244b95aSChristoph Lameter 48312938a92SChristoph Lameter zone_page_state_add(v - overstep, zone, item); 48412938a92SChristoph Lameter __this_cpu_write(*p, overstep); 4852244b95aSChristoph Lameter } 486*c68ed794SIngo Molnar 487*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 488*c68ed794SIngo Molnar preempt_enable(); 4892244b95aSChristoph Lameter } 490c8785385SChristoph Lameter 49175ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) 49275ef7184SMel Gorman { 49375ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 49475ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 49575ef7184SMel Gorman s8 v, t; 49675ef7184SMel Gorman 497ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 498ea426c2aSRoman Gushchin 499*c68ed794SIngo Molnar /* See __mod_node_page_state */ 500*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 501*c68ed794SIngo Molnar preempt_disable(); 502*c68ed794SIngo Molnar 50375ef7184SMel Gorman v = __this_cpu_dec_return(*p); 50475ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 50575ef7184SMel Gorman if (unlikely(v < - t)) { 50675ef7184SMel Gorman s8 overstep = t >> 1; 50775ef7184SMel Gorman 50875ef7184SMel Gorman node_page_state_add(v - overstep, pgdat, item); 50975ef7184SMel Gorman __this_cpu_write(*p, overstep); 51075ef7184SMel Gorman } 511*c68ed794SIngo Molnar 512*c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 513*c68ed794SIngo Molnar preempt_enable(); 51475ef7184SMel Gorman } 51575ef7184SMel Gorman 516c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 517c8785385SChristoph Lameter { 518c8785385SChristoph Lameter __dec_zone_state(page_zone(page), item); 519c8785385SChristoph Lameter } 5202244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state); 5212244b95aSChristoph Lameter 52275ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item) 52375ef7184SMel Gorman { 52475ef7184SMel Gorman __dec_node_state(page_pgdat(page), item); 52575ef7184SMel Gorman } 52675ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state); 52775ef7184SMel Gorman 5284156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL 5297c839120SChristoph Lameter /* 5307c839120SChristoph Lameter * If we have cmpxchg_local support then we do not need to incur the overhead 5317c839120SChristoph Lameter * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. 5327c839120SChristoph Lameter * 5337c839120SChristoph Lameter * mod_state() modifies the zone counter state through atomic per cpu 5347c839120SChristoph Lameter * operations. 5357c839120SChristoph Lameter * 5367c839120SChristoph Lameter * Overstep mode specifies how overstep should handled: 5377c839120SChristoph Lameter * 0 No overstepping 5387c839120SChristoph Lameter * 1 Overstepping half of threshold 5397c839120SChristoph Lameter * -1 Overstepping minus half of threshold 5407c839120SChristoph Lameter */ 54175ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone, 54275ef7184SMel Gorman enum zone_stat_item item, long delta, int overstep_mode) 5437c839120SChristoph Lameter { 54428f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 5457c839120SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 5467c839120SChristoph Lameter long o, n, t, z; 5477c839120SChristoph Lameter 5487c839120SChristoph Lameter do { 5497c839120SChristoph Lameter z = 0; /* overflow to zone counters */ 5507c839120SChristoph Lameter 5517c839120SChristoph Lameter /* 5527c839120SChristoph Lameter * The fetching of the stat_threshold is racy. We may apply 5537c839120SChristoph Lameter * a counter threshold to the wrong the cpu if we get 554d3bc2367SChristoph Lameter * rescheduled while executing here. However, the next 555d3bc2367SChristoph Lameter * counter update will apply the threshold again and 556d3bc2367SChristoph Lameter * therefore bring the counter under the threshold again. 557d3bc2367SChristoph Lameter * 558d3bc2367SChristoph Lameter * Most of the time the thresholds are the same anyways 559d3bc2367SChristoph Lameter * for all cpus in a zone. 5607c839120SChristoph Lameter */ 5617c839120SChristoph Lameter t = this_cpu_read(pcp->stat_threshold); 5627c839120SChristoph Lameter 5637c839120SChristoph Lameter o = this_cpu_read(*p); 5647c839120SChristoph Lameter n = delta + o; 5657c839120SChristoph Lameter 56640610076SMiaohe Lin if (abs(n) > t) { 5677c839120SChristoph Lameter int os = overstep_mode * (t >> 1) ; 5687c839120SChristoph Lameter 5697c839120SChristoph Lameter /* Overflow must be added to zone counters */ 5707c839120SChristoph Lameter z = n + os; 5717c839120SChristoph Lameter n = -os; 5727c839120SChristoph Lameter } 5737c839120SChristoph Lameter } while (this_cpu_cmpxchg(*p, o, n) != o); 5747c839120SChristoph Lameter 5757c839120SChristoph Lameter if (z) 5767c839120SChristoph Lameter zone_page_state_add(z, zone, item); 5777c839120SChristoph Lameter } 5787c839120SChristoph Lameter 5797c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 5806cdb18adSHeiko Carstens long delta) 5817c839120SChristoph Lameter { 58275ef7184SMel Gorman mod_zone_state(zone, item, delta, 0); 5837c839120SChristoph Lameter } 5847c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 5857c839120SChristoph Lameter 5867c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 5877c839120SChristoph Lameter { 58875ef7184SMel Gorman mod_zone_state(page_zone(page), item, 1, 1); 5897c839120SChristoph Lameter } 5907c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 5917c839120SChristoph Lameter 5927c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 5937c839120SChristoph Lameter { 59475ef7184SMel Gorman mod_zone_state(page_zone(page), item, -1, -1); 5957c839120SChristoph Lameter } 5967c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 59775ef7184SMel Gorman 59875ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat, 59975ef7184SMel Gorman enum node_stat_item item, int delta, int overstep_mode) 60075ef7184SMel Gorman { 60175ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 60275ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 60375ef7184SMel Gorman long o, n, t, z; 60475ef7184SMel Gorman 605ea426c2aSRoman Gushchin if (vmstat_item_in_bytes(item)) { 606629484aeSJohannes Weiner /* 607629484aeSJohannes Weiner * Only cgroups use subpage accounting right now; at 608629484aeSJohannes Weiner * the global level, these items still change in 609629484aeSJohannes Weiner * multiples of whole pages. Store them as pages 610629484aeSJohannes Weiner * internally to keep the per-cpu counters compact. 611629484aeSJohannes Weiner */ 612ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 613ea426c2aSRoman Gushchin delta >>= PAGE_SHIFT; 614ea426c2aSRoman Gushchin } 615ea426c2aSRoman Gushchin 61675ef7184SMel Gorman do { 61775ef7184SMel Gorman z = 0; /* overflow to node counters */ 61875ef7184SMel Gorman 61975ef7184SMel Gorman /* 62075ef7184SMel Gorman * The fetching of the stat_threshold is racy. We may apply 62175ef7184SMel Gorman * a counter threshold to the wrong the cpu if we get 62275ef7184SMel Gorman * rescheduled while executing here. However, the next 62375ef7184SMel Gorman * counter update will apply the threshold again and 62475ef7184SMel Gorman * therefore bring the counter under the threshold again. 62575ef7184SMel Gorman * 62675ef7184SMel Gorman * Most of the time the thresholds are the same anyways 62775ef7184SMel Gorman * for all cpus in a node. 62875ef7184SMel Gorman */ 62975ef7184SMel Gorman t = this_cpu_read(pcp->stat_threshold); 63075ef7184SMel Gorman 63175ef7184SMel Gorman o = this_cpu_read(*p); 63275ef7184SMel Gorman n = delta + o; 63375ef7184SMel Gorman 63440610076SMiaohe Lin if (abs(n) > t) { 63575ef7184SMel Gorman int os = overstep_mode * (t >> 1) ; 63675ef7184SMel Gorman 63775ef7184SMel Gorman /* Overflow must be added to node counters */ 63875ef7184SMel Gorman z = n + os; 63975ef7184SMel Gorman n = -os; 64075ef7184SMel Gorman } 64175ef7184SMel Gorman } while (this_cpu_cmpxchg(*p, o, n) != o); 64275ef7184SMel Gorman 64375ef7184SMel Gorman if (z) 64475ef7184SMel Gorman node_page_state_add(z, pgdat, item); 64575ef7184SMel Gorman } 64675ef7184SMel Gorman 64775ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 64875ef7184SMel Gorman long delta) 64975ef7184SMel Gorman { 65075ef7184SMel Gorman mod_node_state(pgdat, item, delta, 0); 65175ef7184SMel Gorman } 65275ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 65375ef7184SMel Gorman 65475ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 65575ef7184SMel Gorman { 65675ef7184SMel Gorman mod_node_state(pgdat, item, 1, 1); 65775ef7184SMel Gorman } 65875ef7184SMel Gorman 65975ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 66075ef7184SMel Gorman { 66175ef7184SMel Gorman mod_node_state(page_pgdat(page), item, 1, 1); 66275ef7184SMel Gorman } 66375ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 66475ef7184SMel Gorman 66575ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 66675ef7184SMel Gorman { 66775ef7184SMel Gorman mod_node_state(page_pgdat(page), item, -1, -1); 66875ef7184SMel Gorman } 66975ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 6707c839120SChristoph Lameter #else 6717c839120SChristoph Lameter /* 6727c839120SChristoph Lameter * Use interrupt disable to serialize counter updates 6737c839120SChristoph Lameter */ 6747c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 6756cdb18adSHeiko Carstens long delta) 6767c839120SChristoph Lameter { 6777c839120SChristoph Lameter unsigned long flags; 6787c839120SChristoph Lameter 6797c839120SChristoph Lameter local_irq_save(flags); 6807c839120SChristoph Lameter __mod_zone_page_state(zone, item, delta); 6817c839120SChristoph Lameter local_irq_restore(flags); 6827c839120SChristoph Lameter } 6837c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 6847c839120SChristoph Lameter 6852244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 6862244b95aSChristoph Lameter { 6872244b95aSChristoph Lameter unsigned long flags; 6882244b95aSChristoph Lameter struct zone *zone; 6892244b95aSChristoph Lameter 6902244b95aSChristoph Lameter zone = page_zone(page); 6912244b95aSChristoph Lameter local_irq_save(flags); 692ca889e6cSChristoph Lameter __inc_zone_state(zone, item); 6932244b95aSChristoph Lameter local_irq_restore(flags); 6942244b95aSChristoph Lameter } 6952244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 6962244b95aSChristoph Lameter 6972244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 6982244b95aSChristoph Lameter { 6992244b95aSChristoph Lameter unsigned long flags; 7002244b95aSChristoph Lameter 7012244b95aSChristoph Lameter local_irq_save(flags); 702a302eb4eSChristoph Lameter __dec_zone_page_state(page, item); 7032244b95aSChristoph Lameter local_irq_restore(flags); 7042244b95aSChristoph Lameter } 7052244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 7062244b95aSChristoph Lameter 70775ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 70875ef7184SMel Gorman { 70975ef7184SMel Gorman unsigned long flags; 71075ef7184SMel Gorman 71175ef7184SMel Gorman local_irq_save(flags); 71275ef7184SMel Gorman __inc_node_state(pgdat, item); 71375ef7184SMel Gorman local_irq_restore(flags); 71475ef7184SMel Gorman } 71575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state); 71675ef7184SMel Gorman 71775ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 71875ef7184SMel Gorman long delta) 71975ef7184SMel Gorman { 72075ef7184SMel Gorman unsigned long flags; 72175ef7184SMel Gorman 72275ef7184SMel Gorman local_irq_save(flags); 72375ef7184SMel Gorman __mod_node_page_state(pgdat, item, delta); 72475ef7184SMel Gorman local_irq_restore(flags); 72575ef7184SMel Gorman } 72675ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 72775ef7184SMel Gorman 72875ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 72975ef7184SMel Gorman { 73075ef7184SMel Gorman unsigned long flags; 73175ef7184SMel Gorman struct pglist_data *pgdat; 73275ef7184SMel Gorman 73375ef7184SMel Gorman pgdat = page_pgdat(page); 73475ef7184SMel Gorman local_irq_save(flags); 73575ef7184SMel Gorman __inc_node_state(pgdat, item); 73675ef7184SMel Gorman local_irq_restore(flags); 73775ef7184SMel Gorman } 73875ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 73975ef7184SMel Gorman 74075ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 74175ef7184SMel Gorman { 74275ef7184SMel Gorman unsigned long flags; 74375ef7184SMel Gorman 74475ef7184SMel Gorman local_irq_save(flags); 74575ef7184SMel Gorman __dec_node_page_state(page, item); 74675ef7184SMel Gorman local_irq_restore(flags); 74775ef7184SMel Gorman } 74875ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 74975ef7184SMel Gorman #endif 7507cc36bbdSChristoph Lameter 7517cc36bbdSChristoph Lameter /* 7527cc36bbdSChristoph Lameter * Fold a differential into the global counters. 7537cc36bbdSChristoph Lameter * Returns the number of counters updated. 7547cc36bbdSChristoph Lameter */ 75575ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff) 7564edb0748SChristoph Lameter { 7574edb0748SChristoph Lameter int i; 7587cc36bbdSChristoph Lameter int changes = 0; 7594edb0748SChristoph Lameter 7604edb0748SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 76175ef7184SMel Gorman if (zone_diff[i]) { 76275ef7184SMel Gorman atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 76375ef7184SMel Gorman changes++; 76475ef7184SMel Gorman } 76575ef7184SMel Gorman 76675ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 76775ef7184SMel Gorman if (node_diff[i]) { 76875ef7184SMel Gorman atomic_long_add(node_diff[i], &vm_node_stat[i]); 7697cc36bbdSChristoph Lameter changes++; 7707cc36bbdSChristoph Lameter } 7717cc36bbdSChristoph Lameter return changes; 7724edb0748SChristoph Lameter } 773f19298b9SMel Gorman 774f19298b9SMel Gorman #ifdef CONFIG_NUMA 775f19298b9SMel Gorman static void fold_vm_zone_numa_events(struct zone *zone) 776f19298b9SMel Gorman { 777f19298b9SMel Gorman unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; 778f19298b9SMel Gorman int cpu; 779f19298b9SMel Gorman enum numa_stat_item item; 780f19298b9SMel Gorman 781f19298b9SMel Gorman for_each_online_cpu(cpu) { 782f19298b9SMel Gorman struct per_cpu_zonestat *pzstats; 783f19298b9SMel Gorman 784f19298b9SMel Gorman pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 785f19298b9SMel Gorman for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 786f19298b9SMel Gorman zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); 787f19298b9SMel Gorman } 788f19298b9SMel Gorman 789f19298b9SMel Gorman for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 790f19298b9SMel Gorman zone_numa_event_add(zone_numa_events[item], zone, item); 791f19298b9SMel Gorman } 792f19298b9SMel Gorman 793f19298b9SMel Gorman void fold_vm_numa_events(void) 794f19298b9SMel Gorman { 795f19298b9SMel Gorman struct zone *zone; 796f19298b9SMel Gorman 797f19298b9SMel Gorman for_each_populated_zone(zone) 798f19298b9SMel Gorman fold_vm_zone_numa_events(zone); 799f19298b9SMel Gorman } 800f19298b9SMel Gorman #endif 8014edb0748SChristoph Lameter 8022244b95aSChristoph Lameter /* 8032bb921e5SChristoph Lameter * Update the zone counters for the current cpu. 804a7f75e25SChristoph Lameter * 8054037d452SChristoph Lameter * Note that refresh_cpu_vm_stats strives to only access 8064037d452SChristoph Lameter * node local memory. The per cpu pagesets on remote zones are placed 8074037d452SChristoph Lameter * in the memory local to the processor using that pageset. So the 8084037d452SChristoph Lameter * loop over all zones will access a series of cachelines local to 8094037d452SChristoph Lameter * the processor. 8104037d452SChristoph Lameter * 8114037d452SChristoph Lameter * The call to zone_page_state_add updates the cachelines with the 8124037d452SChristoph Lameter * statistics in the remote zone struct as well as the global cachelines 8134037d452SChristoph Lameter * with the global counters. These could cause remote node cache line 8144037d452SChristoph Lameter * bouncing and will have to be only done when necessary. 8157cc36bbdSChristoph Lameter * 8167cc36bbdSChristoph Lameter * The function returns the number of global counters updated. 8172244b95aSChristoph Lameter */ 8180eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets) 8192244b95aSChristoph Lameter { 82075ef7184SMel Gorman struct pglist_data *pgdat; 8212244b95aSChristoph Lameter struct zone *zone; 8222244b95aSChristoph Lameter int i; 82375ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 82475ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 8257cc36bbdSChristoph Lameter int changes = 0; 8262244b95aSChristoph Lameter 827ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 82828f836b6SMel Gorman struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; 82928f836b6SMel Gorman #ifdef CONFIG_NUMA 83028f836b6SMel Gorman struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset; 83128f836b6SMel Gorman #endif 8322244b95aSChristoph Lameter 833fbc2edb0SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 834a7f75e25SChristoph Lameter int v; 835a7f75e25SChristoph Lameter 83628f836b6SMel Gorman v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0); 837fbc2edb0SChristoph Lameter if (v) { 838fbc2edb0SChristoph Lameter 839a7f75e25SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 84075ef7184SMel Gorman global_zone_diff[i] += v; 8414037d452SChristoph Lameter #ifdef CONFIG_NUMA 8424037d452SChristoph Lameter /* 3 seconds idle till flush */ 84328f836b6SMel Gorman __this_cpu_write(pcp->expire, 3); 8444037d452SChristoph Lameter #endif 8452244b95aSChristoph Lameter } 846fbc2edb0SChristoph Lameter } 8474037d452SChristoph Lameter #ifdef CONFIG_NUMA 8483a321d2aSKemi Wang 8490eb77e98SChristoph Lameter if (do_pagesets) { 8500eb77e98SChristoph Lameter cond_resched(); 8514037d452SChristoph Lameter /* 8524037d452SChristoph Lameter * Deal with draining the remote pageset of this 8534037d452SChristoph Lameter * processor 8544037d452SChristoph Lameter * 8554037d452SChristoph Lameter * Check if there are pages remaining in this pageset 8564037d452SChristoph Lameter * if not then there is nothing to expire. 8574037d452SChristoph Lameter */ 85828f836b6SMel Gorman if (!__this_cpu_read(pcp->expire) || 85928f836b6SMel Gorman !__this_cpu_read(pcp->count)) 8604037d452SChristoph Lameter continue; 8614037d452SChristoph Lameter 8624037d452SChristoph Lameter /* 8634037d452SChristoph Lameter * We never drain zones local to this processor. 8644037d452SChristoph Lameter */ 8654037d452SChristoph Lameter if (zone_to_nid(zone) == numa_node_id()) { 86628f836b6SMel Gorman __this_cpu_write(pcp->expire, 0); 8674037d452SChristoph Lameter continue; 8684037d452SChristoph Lameter } 8694037d452SChristoph Lameter 87028f836b6SMel Gorman if (__this_cpu_dec_return(pcp->expire)) 8714037d452SChristoph Lameter continue; 8724037d452SChristoph Lameter 87328f836b6SMel Gorman if (__this_cpu_read(pcp->count)) { 87428f836b6SMel Gorman drain_zone_pages(zone, this_cpu_ptr(pcp)); 8757cc36bbdSChristoph Lameter changes++; 8767cc36bbdSChristoph Lameter } 8770eb77e98SChristoph Lameter } 8784037d452SChristoph Lameter #endif 8792244b95aSChristoph Lameter } 88075ef7184SMel Gorman 88175ef7184SMel Gorman for_each_online_pgdat(pgdat) { 88275ef7184SMel Gorman struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats; 88375ef7184SMel Gorman 88475ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 88575ef7184SMel Gorman int v; 88675ef7184SMel Gorman 88775ef7184SMel Gorman v = this_cpu_xchg(p->vm_node_stat_diff[i], 0); 88875ef7184SMel Gorman if (v) { 88975ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 89075ef7184SMel Gorman global_node_diff[i] += v; 89175ef7184SMel Gorman } 89275ef7184SMel Gorman } 89375ef7184SMel Gorman } 89475ef7184SMel Gorman 89575ef7184SMel Gorman changes += fold_diff(global_zone_diff, global_node_diff); 8967cc36bbdSChristoph Lameter return changes; 8972244b95aSChristoph Lameter } 8982244b95aSChristoph Lameter 89940f4b1eaSCody P Schafer /* 9002bb921e5SChristoph Lameter * Fold the data for an offline cpu into the global array. 9012bb921e5SChristoph Lameter * There cannot be any access by the offline cpu and therefore 9022bb921e5SChristoph Lameter * synchronization is simplified. 9032bb921e5SChristoph Lameter */ 9042bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu) 9052bb921e5SChristoph Lameter { 90675ef7184SMel Gorman struct pglist_data *pgdat; 9072bb921e5SChristoph Lameter struct zone *zone; 9082bb921e5SChristoph Lameter int i; 90975ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 91075ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 9112bb921e5SChristoph Lameter 9122bb921e5SChristoph Lameter for_each_populated_zone(zone) { 91328f836b6SMel Gorman struct per_cpu_zonestat *pzstats; 9142bb921e5SChristoph Lameter 91528f836b6SMel Gorman pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 9162bb921e5SChristoph Lameter 917f19298b9SMel Gorman for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 91828f836b6SMel Gorman if (pzstats->vm_stat_diff[i]) { 9192bb921e5SChristoph Lameter int v; 9202bb921e5SChristoph Lameter 92128f836b6SMel Gorman v = pzstats->vm_stat_diff[i]; 92228f836b6SMel Gorman pzstats->vm_stat_diff[i] = 0; 9232bb921e5SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 92475ef7184SMel Gorman global_zone_diff[i] += v; 9252bb921e5SChristoph Lameter } 926f19298b9SMel Gorman } 9273a321d2aSKemi Wang #ifdef CONFIG_NUMA 928f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { 929f19298b9SMel Gorman if (pzstats->vm_numa_event[i]) { 930f19298b9SMel Gorman unsigned long v; 9313a321d2aSKemi Wang 932f19298b9SMel Gorman v = pzstats->vm_numa_event[i]; 933f19298b9SMel Gorman pzstats->vm_numa_event[i] = 0; 934f19298b9SMel Gorman zone_numa_event_add(v, zone, i); 935f19298b9SMel Gorman } 9363a321d2aSKemi Wang } 9373a321d2aSKemi Wang #endif 9382bb921e5SChristoph Lameter } 9392bb921e5SChristoph Lameter 94075ef7184SMel Gorman for_each_online_pgdat(pgdat) { 94175ef7184SMel Gorman struct per_cpu_nodestat *p; 94275ef7184SMel Gorman 94375ef7184SMel Gorman p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); 94475ef7184SMel Gorman 94575ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 94675ef7184SMel Gorman if (p->vm_node_stat_diff[i]) { 94775ef7184SMel Gorman int v; 94875ef7184SMel Gorman 94975ef7184SMel Gorman v = p->vm_node_stat_diff[i]; 95075ef7184SMel Gorman p->vm_node_stat_diff[i] = 0; 95175ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 95275ef7184SMel Gorman global_node_diff[i] += v; 95375ef7184SMel Gorman } 95475ef7184SMel Gorman } 95575ef7184SMel Gorman 95675ef7184SMel Gorman fold_diff(global_zone_diff, global_node_diff); 9572bb921e5SChristoph Lameter } 9582bb921e5SChristoph Lameter 9592bb921e5SChristoph Lameter /* 96040f4b1eaSCody P Schafer * this is only called if !populated_zone(zone), which implies no other users of 961f0953a1bSIngo Molnar * pset->vm_stat_diff[] exist. 96240f4b1eaSCody P Schafer */ 96328f836b6SMel Gorman void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) 9645a883813SMinchan Kim { 965f19298b9SMel Gorman unsigned long v; 9665a883813SMinchan Kim int i; 9675a883813SMinchan Kim 968f19298b9SMel Gorman for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 96928f836b6SMel Gorman if (pzstats->vm_stat_diff[i]) { 970f19298b9SMel Gorman v = pzstats->vm_stat_diff[i]; 97128f836b6SMel Gorman pzstats->vm_stat_diff[i] = 0; 972f19298b9SMel Gorman zone_page_state_add(v, zone, i); 973f19298b9SMel Gorman } 9745a883813SMinchan Kim } 9753a321d2aSKemi Wang 9763a321d2aSKemi Wang #ifdef CONFIG_NUMA 977f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { 978f19298b9SMel Gorman if (pzstats->vm_numa_event[i]) { 979f19298b9SMel Gorman v = pzstats->vm_numa_event[i]; 980f19298b9SMel Gorman pzstats->vm_numa_event[i] = 0; 981f19298b9SMel Gorman zone_numa_event_add(v, zone, i); 982f19298b9SMel Gorman } 9833a321d2aSKemi Wang } 9843a321d2aSKemi Wang #endif 9855a883813SMinchan Kim } 9862244b95aSChristoph Lameter #endif 9872244b95aSChristoph Lameter 988ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA 989ca889e6cSChristoph Lameter /* 99075ef7184SMel Gorman * Determine the per node value of a stat item. This function 99175ef7184SMel Gorman * is called frequently in a NUMA machine, so try to be as 99275ef7184SMel Gorman * frugal as possible. 993c2d42c16SAndrew Morton */ 99475ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node, 99575ef7184SMel Gorman enum zone_stat_item item) 996c2d42c16SAndrew Morton { 997c2d42c16SAndrew Morton struct zone *zones = NODE_DATA(node)->node_zones; 998e87d59f7SJoonsoo Kim int i; 999e87d59f7SJoonsoo Kim unsigned long count = 0; 1000c2d42c16SAndrew Morton 1001e87d59f7SJoonsoo Kim for (i = 0; i < MAX_NR_ZONES; i++) 1002e87d59f7SJoonsoo Kim count += zone_page_state(zones + i, item); 1003e87d59f7SJoonsoo Kim 1004e87d59f7SJoonsoo Kim return count; 1005c2d42c16SAndrew Morton } 1006c2d42c16SAndrew Morton 1007f19298b9SMel Gorman /* Determine the per node value of a numa stat item. */ 1008f19298b9SMel Gorman unsigned long sum_zone_numa_event_state(int node, 10093a321d2aSKemi Wang enum numa_stat_item item) 10103a321d2aSKemi Wang { 10113a321d2aSKemi Wang struct zone *zones = NODE_DATA(node)->node_zones; 10123a321d2aSKemi Wang unsigned long count = 0; 1013f19298b9SMel Gorman int i; 10143a321d2aSKemi Wang 10153a321d2aSKemi Wang for (i = 0; i < MAX_NR_ZONES; i++) 1016f19298b9SMel Gorman count += zone_numa_event_state(zones + i, item); 10173a321d2aSKemi Wang 10183a321d2aSKemi Wang return count; 10193a321d2aSKemi Wang } 10203a321d2aSKemi Wang 102175ef7184SMel Gorman /* 102275ef7184SMel Gorman * Determine the per node value of a stat item. 102375ef7184SMel Gorman */ 1024ea426c2aSRoman Gushchin unsigned long node_page_state_pages(struct pglist_data *pgdat, 102575ef7184SMel Gorman enum node_stat_item item) 102675ef7184SMel Gorman { 102775ef7184SMel Gorman long x = atomic_long_read(&pgdat->vm_stat[item]); 102875ef7184SMel Gorman #ifdef CONFIG_SMP 102975ef7184SMel Gorman if (x < 0) 103075ef7184SMel Gorman x = 0; 103175ef7184SMel Gorman #endif 103275ef7184SMel Gorman return x; 103375ef7184SMel Gorman } 1034ea426c2aSRoman Gushchin 1035ea426c2aSRoman Gushchin unsigned long node_page_state(struct pglist_data *pgdat, 1036ea426c2aSRoman Gushchin enum node_stat_item item) 1037ea426c2aSRoman Gushchin { 1038ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 1039ea426c2aSRoman Gushchin 1040ea426c2aSRoman Gushchin return node_page_state_pages(pgdat, item); 1041ea426c2aSRoman Gushchin } 1042ca889e6cSChristoph Lameter #endif 1043ca889e6cSChristoph Lameter 1044d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION 104536deb0beSNamhyung Kim 1046d7a5752cSMel Gorman struct contig_page_info { 1047d7a5752cSMel Gorman unsigned long free_pages; 1048d7a5752cSMel Gorman unsigned long free_blocks_total; 1049d7a5752cSMel Gorman unsigned long free_blocks_suitable; 1050d7a5752cSMel Gorman }; 1051d7a5752cSMel Gorman 1052d7a5752cSMel Gorman /* 1053d7a5752cSMel Gorman * Calculate the number of free pages in a zone, how many contiguous 1054d7a5752cSMel Gorman * pages are free and how many are large enough to satisfy an allocation of 1055d7a5752cSMel Gorman * the target size. Note that this function makes no attempt to estimate 1056d7a5752cSMel Gorman * how many suitable free blocks there *might* be if MOVABLE pages were 1057d7a5752cSMel Gorman * migrated. Calculating that is possible, but expensive and can be 1058d7a5752cSMel Gorman * figured out from userspace 1059d7a5752cSMel Gorman */ 1060d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone, 1061d7a5752cSMel Gorman unsigned int suitable_order, 1062d7a5752cSMel Gorman struct contig_page_info *info) 1063d7a5752cSMel Gorman { 1064d7a5752cSMel Gorman unsigned int order; 1065d7a5752cSMel Gorman 1066d7a5752cSMel Gorman info->free_pages = 0; 1067d7a5752cSMel Gorman info->free_blocks_total = 0; 1068d7a5752cSMel Gorman info->free_blocks_suitable = 0; 1069d7a5752cSMel Gorman 1070d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; order++) { 1071d7a5752cSMel Gorman unsigned long blocks; 1072d7a5752cSMel Gorman 1073d7a5752cSMel Gorman /* Count number of free blocks */ 1074d7a5752cSMel Gorman blocks = zone->free_area[order].nr_free; 1075d7a5752cSMel Gorman info->free_blocks_total += blocks; 1076d7a5752cSMel Gorman 1077d7a5752cSMel Gorman /* Count free base pages */ 1078d7a5752cSMel Gorman info->free_pages += blocks << order; 1079d7a5752cSMel Gorman 1080d7a5752cSMel Gorman /* Count the suitable free blocks */ 1081d7a5752cSMel Gorman if (order >= suitable_order) 1082d7a5752cSMel Gorman info->free_blocks_suitable += blocks << 1083d7a5752cSMel Gorman (order - suitable_order); 1084d7a5752cSMel Gorman } 1085d7a5752cSMel Gorman } 1086f1a5ab12SMel Gorman 1087f1a5ab12SMel Gorman /* 1088f1a5ab12SMel Gorman * A fragmentation index only makes sense if an allocation of a requested 1089f1a5ab12SMel Gorman * size would fail. If that is true, the fragmentation index indicates 1090f1a5ab12SMel Gorman * whether external fragmentation or a lack of memory was the problem. 1091f1a5ab12SMel Gorman * The value can be used to determine if page reclaim or compaction 1092f1a5ab12SMel Gorman * should be used 1093f1a5ab12SMel Gorman */ 109456de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 1095f1a5ab12SMel Gorman { 1096f1a5ab12SMel Gorman unsigned long requested = 1UL << order; 1097f1a5ab12SMel Gorman 109888d6ac40SWen Yang if (WARN_ON_ONCE(order >= MAX_ORDER)) 109988d6ac40SWen Yang return 0; 110088d6ac40SWen Yang 1101f1a5ab12SMel Gorman if (!info->free_blocks_total) 1102f1a5ab12SMel Gorman return 0; 1103f1a5ab12SMel Gorman 1104f1a5ab12SMel Gorman /* Fragmentation index only makes sense when a request would fail */ 1105f1a5ab12SMel Gorman if (info->free_blocks_suitable) 1106f1a5ab12SMel Gorman return -1000; 1107f1a5ab12SMel Gorman 1108f1a5ab12SMel Gorman /* 1109f1a5ab12SMel Gorman * Index is between 0 and 1 so return within 3 decimal places 1110f1a5ab12SMel Gorman * 1111f1a5ab12SMel Gorman * 0 => allocation would fail due to lack of memory 1112f1a5ab12SMel Gorman * 1 => allocation would fail due to fragmentation 1113f1a5ab12SMel Gorman */ 1114f1a5ab12SMel Gorman return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 1115f1a5ab12SMel Gorman } 111656de7263SMel Gorman 1117facdaa91SNitin Gupta /* 1118facdaa91SNitin Gupta * Calculates external fragmentation within a zone wrt the given order. 1119facdaa91SNitin Gupta * It is defined as the percentage of pages found in blocks of size 1120facdaa91SNitin Gupta * less than 1 << order. It returns values in range [0, 100]. 1121facdaa91SNitin Gupta */ 1122d34c0a75SNitin Gupta unsigned int extfrag_for_order(struct zone *zone, unsigned int order) 1123facdaa91SNitin Gupta { 1124facdaa91SNitin Gupta struct contig_page_info info; 1125facdaa91SNitin Gupta 1126facdaa91SNitin Gupta fill_contig_page_info(zone, order, &info); 1127facdaa91SNitin Gupta if (info.free_pages == 0) 1128facdaa91SNitin Gupta return 0; 1129facdaa91SNitin Gupta 1130facdaa91SNitin Gupta return div_u64((info.free_pages - 1131facdaa91SNitin Gupta (info.free_blocks_suitable << order)) * 100, 1132facdaa91SNitin Gupta info.free_pages); 1133facdaa91SNitin Gupta } 1134facdaa91SNitin Gupta 113556de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */ 113656de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order) 113756de7263SMel Gorman { 113856de7263SMel Gorman struct contig_page_info info; 113956de7263SMel Gorman 114056de7263SMel Gorman fill_contig_page_info(zone, order, &info); 114156de7263SMel Gorman return __fragmentation_index(order, &info); 114256de7263SMel Gorman } 1143d7a5752cSMel Gorman #endif 1144d7a5752cSMel Gorman 1145ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \ 1146ebc5d83dSKonstantin Khlebnikov defined(CONFIG_NUMA) || defined(CONFIG_MEMCG) 1147fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA 1148fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma", 1149fa25c503SKOSAKI Motohiro #else 1150fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) 1151fa25c503SKOSAKI Motohiro #endif 1152fa25c503SKOSAKI Motohiro 1153fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32 1154fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32", 1155fa25c503SKOSAKI Motohiro #else 1156fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) 1157fa25c503SKOSAKI Motohiro #endif 1158fa25c503SKOSAKI Motohiro 1159fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM 1160fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high", 1161fa25c503SKOSAKI Motohiro #else 1162fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) 1163fa25c503SKOSAKI Motohiro #endif 1164fa25c503SKOSAKI Motohiro 1165fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 1166fa25c503SKOSAKI Motohiro TEXT_FOR_HIGHMEM(xx) xx "_movable", 1167fa25c503SKOSAKI Motohiro 1168fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = { 11698d92890bSNeilBrown /* enum zone_stat_item counters */ 1170fa25c503SKOSAKI Motohiro "nr_free_pages", 117171c799f4SMinchan Kim "nr_zone_inactive_anon", 117271c799f4SMinchan Kim "nr_zone_active_anon", 117371c799f4SMinchan Kim "nr_zone_inactive_file", 117471c799f4SMinchan Kim "nr_zone_active_file", 117571c799f4SMinchan Kim "nr_zone_unevictable", 11765a1c84b4SMel Gorman "nr_zone_write_pending", 1177fa25c503SKOSAKI Motohiro "nr_mlock", 1178fa25c503SKOSAKI Motohiro "nr_bounce", 117991537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC) 118091537feeSMinchan Kim "nr_zspages", 118191537feeSMinchan Kim #endif 11823a321d2aSKemi Wang "nr_free_cma", 11833a321d2aSKemi Wang 11843a321d2aSKemi Wang /* enum numa_stat_item counters */ 1185fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1186fa25c503SKOSAKI Motohiro "numa_hit", 1187fa25c503SKOSAKI Motohiro "numa_miss", 1188fa25c503SKOSAKI Motohiro "numa_foreign", 1189fa25c503SKOSAKI Motohiro "numa_interleave", 1190fa25c503SKOSAKI Motohiro "numa_local", 1191fa25c503SKOSAKI Motohiro "numa_other", 1192fa25c503SKOSAKI Motohiro #endif 119309316c09SKonstantin Khlebnikov 11949d7ea9a2SKonstantin Khlebnikov /* enum node_stat_item counters */ 1195599d0c95SMel Gorman "nr_inactive_anon", 1196599d0c95SMel Gorman "nr_active_anon", 1197599d0c95SMel Gorman "nr_inactive_file", 1198599d0c95SMel Gorman "nr_active_file", 1199599d0c95SMel Gorman "nr_unevictable", 1200385386cfSJohannes Weiner "nr_slab_reclaimable", 1201385386cfSJohannes Weiner "nr_slab_unreclaimable", 1202599d0c95SMel Gorman "nr_isolated_anon", 1203599d0c95SMel Gorman "nr_isolated_file", 120468d48e6aSJohannes Weiner "workingset_nodes", 1205170b04b7SJoonsoo Kim "workingset_refault_anon", 1206170b04b7SJoonsoo Kim "workingset_refault_file", 1207170b04b7SJoonsoo Kim "workingset_activate_anon", 1208170b04b7SJoonsoo Kim "workingset_activate_file", 1209170b04b7SJoonsoo Kim "workingset_restore_anon", 1210170b04b7SJoonsoo Kim "workingset_restore_file", 12111e6b1085SMel Gorman "workingset_nodereclaim", 121250658e2eSMel Gorman "nr_anon_pages", 121350658e2eSMel Gorman "nr_mapped", 121411fb9989SMel Gorman "nr_file_pages", 121511fb9989SMel Gorman "nr_dirty", 121611fb9989SMel Gorman "nr_writeback", 121711fb9989SMel Gorman "nr_writeback_temp", 121811fb9989SMel Gorman "nr_shmem", 121911fb9989SMel Gorman "nr_shmem_hugepages", 122011fb9989SMel Gorman "nr_shmem_pmdmapped", 122160fbf0abSSong Liu "nr_file_hugepages", 122260fbf0abSSong Liu "nr_file_pmdmapped", 122311fb9989SMel Gorman "nr_anon_transparent_hugepages", 1224c4a25635SMel Gorman "nr_vmscan_write", 1225c4a25635SMel Gorman "nr_vmscan_immediate_reclaim", 1226c4a25635SMel Gorman "nr_dirtied", 1227c4a25635SMel Gorman "nr_written", 1228b29940c1SVlastimil Babka "nr_kernel_misc_reclaimable", 12291970dc6fSJohn Hubbard "nr_foll_pin_acquired", 12301970dc6fSJohn Hubbard "nr_foll_pin_released", 1231991e7673SShakeel Butt "nr_kernel_stack", 1232991e7673SShakeel Butt #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 1233991e7673SShakeel Butt "nr_shadow_call_stack", 1234991e7673SShakeel Butt #endif 1235f0c0c115SShakeel Butt "nr_page_table_pages", 1236b6038942SShakeel Butt #ifdef CONFIG_SWAP 1237b6038942SShakeel Butt "nr_swapcached", 1238b6038942SShakeel Butt #endif 1239599d0c95SMel Gorman 124009316c09SKonstantin Khlebnikov /* enum writeback_stat_item counters */ 1241fa25c503SKOSAKI Motohiro "nr_dirty_threshold", 1242fa25c503SKOSAKI Motohiro "nr_dirty_background_threshold", 1243fa25c503SKOSAKI Motohiro 1244ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) 124509316c09SKonstantin Khlebnikov /* enum vm_event_item counters */ 1246fa25c503SKOSAKI Motohiro "pgpgin", 1247fa25c503SKOSAKI Motohiro "pgpgout", 1248fa25c503SKOSAKI Motohiro "pswpin", 1249fa25c503SKOSAKI Motohiro "pswpout", 1250fa25c503SKOSAKI Motohiro 1251fa25c503SKOSAKI Motohiro TEXTS_FOR_ZONES("pgalloc") 12527cc30fcfSMel Gorman TEXTS_FOR_ZONES("allocstall") 12537cc30fcfSMel Gorman TEXTS_FOR_ZONES("pgskip") 1254fa25c503SKOSAKI Motohiro 1255fa25c503SKOSAKI Motohiro "pgfree", 1256fa25c503SKOSAKI Motohiro "pgactivate", 1257fa25c503SKOSAKI Motohiro "pgdeactivate", 1258f7ad2a6cSShaohua Li "pglazyfree", 1259fa25c503SKOSAKI Motohiro 1260fa25c503SKOSAKI Motohiro "pgfault", 1261fa25c503SKOSAKI Motohiro "pgmajfault", 1262854e9ed0SMinchan Kim "pglazyfreed", 1263fa25c503SKOSAKI Motohiro 1264599d0c95SMel Gorman "pgrefill", 1265798a6b87SPeter Xu "pgreuse", 1266599d0c95SMel Gorman "pgsteal_kswapd", 1267599d0c95SMel Gorman "pgsteal_direct", 1268668e4147SYang Shi "pgdemote_kswapd", 1269668e4147SYang Shi "pgdemote_direct", 1270599d0c95SMel Gorman "pgscan_kswapd", 1271599d0c95SMel Gorman "pgscan_direct", 127268243e76SMel Gorman "pgscan_direct_throttle", 1273497a6c1bSJohannes Weiner "pgscan_anon", 1274497a6c1bSJohannes Weiner "pgscan_file", 1275497a6c1bSJohannes Weiner "pgsteal_anon", 1276497a6c1bSJohannes Weiner "pgsteal_file", 1277fa25c503SKOSAKI Motohiro 1278fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1279fa25c503SKOSAKI Motohiro "zone_reclaim_failed", 1280fa25c503SKOSAKI Motohiro #endif 1281fa25c503SKOSAKI Motohiro "pginodesteal", 1282fa25c503SKOSAKI Motohiro "slabs_scanned", 1283fa25c503SKOSAKI Motohiro "kswapd_inodesteal", 1284fa25c503SKOSAKI Motohiro "kswapd_low_wmark_hit_quickly", 1285fa25c503SKOSAKI Motohiro "kswapd_high_wmark_hit_quickly", 1286fa25c503SKOSAKI Motohiro "pageoutrun", 1287fa25c503SKOSAKI Motohiro 1288fa25c503SKOSAKI Motohiro "pgrotated", 1289fa25c503SKOSAKI Motohiro 12905509a5d2SDave Hansen "drop_pagecache", 12915509a5d2SDave Hansen "drop_slab", 12928e675f7aSKonstantin Khlebnikov "oom_kill", 12935509a5d2SDave Hansen 129403c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING 129503c5a6e1SMel Gorman "numa_pte_updates", 129672403b4aSMel Gorman "numa_huge_pte_updates", 129703c5a6e1SMel Gorman "numa_hint_faults", 129803c5a6e1SMel Gorman "numa_hint_faults_local", 129903c5a6e1SMel Gorman "numa_pages_migrated", 130003c5a6e1SMel Gorman #endif 13015647bc29SMel Gorman #ifdef CONFIG_MIGRATION 13025647bc29SMel Gorman "pgmigrate_success", 13035647bc29SMel Gorman "pgmigrate_fail", 13041a5bae25SAnshuman Khandual "thp_migration_success", 13051a5bae25SAnshuman Khandual "thp_migration_fail", 13061a5bae25SAnshuman Khandual "thp_migration_split", 13075647bc29SMel Gorman #endif 1308fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION 1309397487dbSMel Gorman "compact_migrate_scanned", 1310397487dbSMel Gorman "compact_free_scanned", 1311397487dbSMel Gorman "compact_isolated", 1312fa25c503SKOSAKI Motohiro "compact_stall", 1313fa25c503SKOSAKI Motohiro "compact_fail", 1314fa25c503SKOSAKI Motohiro "compact_success", 1315698b1b30SVlastimil Babka "compact_daemon_wake", 13167f354a54SDavid Rientjes "compact_daemon_migrate_scanned", 13177f354a54SDavid Rientjes "compact_daemon_free_scanned", 1318fa25c503SKOSAKI Motohiro #endif 1319fa25c503SKOSAKI Motohiro 1320fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE 1321fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_success", 1322fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_fail", 1323fa25c503SKOSAKI Motohiro #endif 1324bbb26920SMinchan Kim #ifdef CONFIG_CMA 1325bbb26920SMinchan Kim "cma_alloc_success", 1326bbb26920SMinchan Kim "cma_alloc_fail", 1327bbb26920SMinchan Kim #endif 1328fa25c503SKOSAKI Motohiro "unevictable_pgs_culled", 1329fa25c503SKOSAKI Motohiro "unevictable_pgs_scanned", 1330fa25c503SKOSAKI Motohiro "unevictable_pgs_rescued", 1331fa25c503SKOSAKI Motohiro "unevictable_pgs_mlocked", 1332fa25c503SKOSAKI Motohiro "unevictable_pgs_munlocked", 1333fa25c503SKOSAKI Motohiro "unevictable_pgs_cleared", 1334fa25c503SKOSAKI Motohiro "unevictable_pgs_stranded", 1335fa25c503SKOSAKI Motohiro 1336fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1337fa25c503SKOSAKI Motohiro "thp_fault_alloc", 1338fa25c503SKOSAKI Motohiro "thp_fault_fallback", 133985b9f46eSDavid Rientjes "thp_fault_fallback_charge", 1340fa25c503SKOSAKI Motohiro "thp_collapse_alloc", 1341fa25c503SKOSAKI Motohiro "thp_collapse_alloc_failed", 134295ecedcdSKirill A. Shutemov "thp_file_alloc", 1343dcdf11eeSDavid Rientjes "thp_file_fallback", 134485b9f46eSDavid Rientjes "thp_file_fallback_charge", 134595ecedcdSKirill A. Shutemov "thp_file_mapped", 1346122afea9SKirill A. Shutemov "thp_split_page", 1347122afea9SKirill A. Shutemov "thp_split_page_failed", 1348f9719a03SKirill A. Shutemov "thp_deferred_split_page", 1349122afea9SKirill A. Shutemov "thp_split_pmd", 1350ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 1351ce9311cfSYisheng Xie "thp_split_pud", 1352ce9311cfSYisheng Xie #endif 1353d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc", 1354d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc_failed", 1355225311a4SHuang Ying "thp_swpout", 1356fe490cc0SHuang Ying "thp_swpout_fallback", 1357fa25c503SKOSAKI Motohiro #endif 135809316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON 135909316c09SKonstantin Khlebnikov "balloon_inflate", 136009316c09SKonstantin Khlebnikov "balloon_deflate", 136109316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION 136209316c09SKonstantin Khlebnikov "balloon_migrate", 136309316c09SKonstantin Khlebnikov #endif 136409316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */ 1365ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH 13669824cf97SDave Hansen "nr_tlb_remote_flush", 13679824cf97SDave Hansen "nr_tlb_remote_flush_received", 13689824cf97SDave Hansen "nr_tlb_local_flush_all", 13699824cf97SDave Hansen "nr_tlb_local_flush_one", 1370ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */ 1371fa25c503SKOSAKI Motohiro 13724f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE 13734f115147SDavidlohr Bueso "vmacache_find_calls", 13744f115147SDavidlohr Bueso "vmacache_find_hits", 13754f115147SDavidlohr Bueso #endif 1376cbc65df2SHuang Ying #ifdef CONFIG_SWAP 1377cbc65df2SHuang Ying "swap_ra", 1378cbc65df2SHuang Ying "swap_ra_hit", 1379cbc65df2SHuang Ying #endif 1380575299eaSSaravanan D #ifdef CONFIG_X86 1381575299eaSSaravanan D "direct_map_level2_splits", 1382575299eaSSaravanan D "direct_map_level3_splits", 1383575299eaSSaravanan D #endif 1384ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ 1385fa25c503SKOSAKI Motohiro }; 1386ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ 1387fa25c503SKOSAKI Motohiro 13883c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ 13893c486871SAndrew Morton defined(CONFIG_PROC_FS) 13903c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos) 13913c486871SAndrew Morton { 13923c486871SAndrew Morton pg_data_t *pgdat; 13933c486871SAndrew Morton loff_t node = *pos; 13943c486871SAndrew Morton 13953c486871SAndrew Morton for (pgdat = first_online_pgdat(); 13963c486871SAndrew Morton pgdat && node; 13973c486871SAndrew Morton pgdat = next_online_pgdat(pgdat)) 13983c486871SAndrew Morton --node; 13993c486871SAndrew Morton 14003c486871SAndrew Morton return pgdat; 14013c486871SAndrew Morton } 14023c486871SAndrew Morton 14033c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 14043c486871SAndrew Morton { 14053c486871SAndrew Morton pg_data_t *pgdat = (pg_data_t *)arg; 14063c486871SAndrew Morton 14073c486871SAndrew Morton (*pos)++; 14083c486871SAndrew Morton return next_online_pgdat(pgdat); 14093c486871SAndrew Morton } 14103c486871SAndrew Morton 14113c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg) 14123c486871SAndrew Morton { 14133c486871SAndrew Morton } 14143c486871SAndrew Morton 1415b2bd8598SDavid Rientjes /* 1416b2bd8598SDavid Rientjes * Walk zones in a node and print using a callback. 1417b2bd8598SDavid Rientjes * If @assert_populated is true, only use callback for zones that are populated. 1418b2bd8598SDavid Rientjes */ 14193c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 1420727c080fSVinayak Menon bool assert_populated, bool nolock, 14213c486871SAndrew Morton void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 14223c486871SAndrew Morton { 14233c486871SAndrew Morton struct zone *zone; 14243c486871SAndrew Morton struct zone *node_zones = pgdat->node_zones; 14253c486871SAndrew Morton unsigned long flags; 14263c486871SAndrew Morton 14273c486871SAndrew Morton for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 1428b2bd8598SDavid Rientjes if (assert_populated && !populated_zone(zone)) 14293c486871SAndrew Morton continue; 14303c486871SAndrew Morton 1431727c080fSVinayak Menon if (!nolock) 14323c486871SAndrew Morton spin_lock_irqsave(&zone->lock, flags); 14333c486871SAndrew Morton print(m, pgdat, zone); 1434727c080fSVinayak Menon if (!nolock) 14353c486871SAndrew Morton spin_unlock_irqrestore(&zone->lock, flags); 14363c486871SAndrew Morton } 14373c486871SAndrew Morton } 14383c486871SAndrew Morton #endif 14393c486871SAndrew Morton 1440d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS 1441467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 1442467c996cSMel Gorman struct zone *zone) 1443467c996cSMel Gorman { 1444467c996cSMel Gorman int order; 1445467c996cSMel Gorman 1446f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1447f6ac2354SChristoph Lameter for (order = 0; order < MAX_ORDER; ++order) 1448f6ac2354SChristoph Lameter seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 1449f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1450f6ac2354SChristoph Lameter } 1451467c996cSMel Gorman 1452467c996cSMel Gorman /* 1453467c996cSMel Gorman * This walks the free areas for each zone. 1454467c996cSMel Gorman */ 1455467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg) 1456467c996cSMel Gorman { 1457467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1458727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, frag_show_print); 1459467c996cSMel Gorman return 0; 1460467c996cSMel Gorman } 1461467c996cSMel Gorman 1462467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m, 1463467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1464467c996cSMel Gorman { 1465467c996cSMel Gorman int order, mtype; 1466467c996cSMel Gorman 1467467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 1468467c996cSMel Gorman seq_printf(m, "Node %4d, zone %8s, type %12s ", 1469467c996cSMel Gorman pgdat->node_id, 1470467c996cSMel Gorman zone->name, 1471467c996cSMel Gorman migratetype_names[mtype]); 1472467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 1473467c996cSMel Gorman unsigned long freecount = 0; 1474467c996cSMel Gorman struct free_area *area; 1475467c996cSMel Gorman struct list_head *curr; 147693b3a674SMichal Hocko bool overflow = false; 1477467c996cSMel Gorman 1478467c996cSMel Gorman area = &(zone->free_area[order]); 1479467c996cSMel Gorman 148093b3a674SMichal Hocko list_for_each(curr, &area->free_list[mtype]) { 148193b3a674SMichal Hocko /* 148293b3a674SMichal Hocko * Cap the free_list iteration because it might 148393b3a674SMichal Hocko * be really large and we are under a spinlock 148493b3a674SMichal Hocko * so a long time spent here could trigger a 148593b3a674SMichal Hocko * hard lockup detector. Anyway this is a 148693b3a674SMichal Hocko * debugging tool so knowing there is a handful 148793b3a674SMichal Hocko * of pages of this order should be more than 148893b3a674SMichal Hocko * sufficient. 148993b3a674SMichal Hocko */ 149093b3a674SMichal Hocko if (++freecount >= 100000) { 149193b3a674SMichal Hocko overflow = true; 149293b3a674SMichal Hocko break; 149393b3a674SMichal Hocko } 149493b3a674SMichal Hocko } 149593b3a674SMichal Hocko seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount); 149693b3a674SMichal Hocko spin_unlock_irq(&zone->lock); 149793b3a674SMichal Hocko cond_resched(); 149893b3a674SMichal Hocko spin_lock_irq(&zone->lock); 1499467c996cSMel Gorman } 1500467c996cSMel Gorman seq_putc(m, '\n'); 1501467c996cSMel Gorman } 1502467c996cSMel Gorman } 1503467c996cSMel Gorman 1504467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */ 150533090af9SMiaohe Lin static void pagetypeinfo_showfree(struct seq_file *m, void *arg) 1506467c996cSMel Gorman { 1507467c996cSMel Gorman int order; 1508467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1509467c996cSMel Gorman 1510467c996cSMel Gorman /* Print header */ 1511467c996cSMel Gorman seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 1512467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) 1513467c996cSMel Gorman seq_printf(m, "%6d ", order); 1514467c996cSMel Gorman seq_putc(m, '\n'); 1515467c996cSMel Gorman 1516727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print); 1517467c996cSMel Gorman } 1518467c996cSMel Gorman 1519467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m, 1520467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1521467c996cSMel Gorman { 1522467c996cSMel Gorman int mtype; 1523467c996cSMel Gorman unsigned long pfn; 1524467c996cSMel Gorman unsigned long start_pfn = zone->zone_start_pfn; 1525108bcc96SCody P Schafer unsigned long end_pfn = zone_end_pfn(zone); 1526467c996cSMel Gorman unsigned long count[MIGRATE_TYPES] = { 0, }; 1527467c996cSMel Gorman 1528467c996cSMel Gorman for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 1529467c996cSMel Gorman struct page *page; 1530467c996cSMel Gorman 1531d336e94eSMichal Hocko page = pfn_to_online_page(pfn); 1532d336e94eSMichal Hocko if (!page) 1533467c996cSMel Gorman continue; 1534467c996cSMel Gorman 1535a91c43c7SJoonsoo Kim if (page_zone(page) != zone) 1536a91c43c7SJoonsoo Kim continue; 1537a91c43c7SJoonsoo Kim 1538467c996cSMel Gorman mtype = get_pageblock_migratetype(page); 1539467c996cSMel Gorman 1540e80d6a24SMel Gorman if (mtype < MIGRATE_TYPES) 1541467c996cSMel Gorman count[mtype]++; 1542467c996cSMel Gorman } 1543467c996cSMel Gorman 1544467c996cSMel Gorman /* Print counts */ 1545467c996cSMel Gorman seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1546467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1547467c996cSMel Gorman seq_printf(m, "%12lu ", count[mtype]); 1548467c996cSMel Gorman seq_putc(m, '\n'); 1549467c996cSMel Gorman } 1550467c996cSMel Gorman 1551f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */ 155233090af9SMiaohe Lin static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 1553467c996cSMel Gorman { 1554467c996cSMel Gorman int mtype; 1555467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1556467c996cSMel Gorman 1557467c996cSMel Gorman seq_printf(m, "\n%-23s", "Number of blocks type "); 1558467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1559467c996cSMel Gorman seq_printf(m, "%12s ", migratetype_names[mtype]); 1560467c996cSMel Gorman seq_putc(m, '\n'); 1561727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, 1562727c080fSVinayak Menon pagetypeinfo_showblockcount_print); 1563467c996cSMel Gorman } 1564467c996cSMel Gorman 156548c96a36SJoonsoo Kim /* 156648c96a36SJoonsoo Kim * Print out the number of pageblocks for each migratetype that contain pages 156748c96a36SJoonsoo Kim * of other types. This gives an indication of how well fallbacks are being 156848c96a36SJoonsoo Kim * contained by rmqueue_fallback(). It requires information from PAGE_OWNER 156948c96a36SJoonsoo Kim * to determine what is going on 157048c96a36SJoonsoo Kim */ 157148c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) 157248c96a36SJoonsoo Kim { 157348c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER 157448c96a36SJoonsoo Kim int mtype; 157548c96a36SJoonsoo Kim 15767dd80b8aSVlastimil Babka if (!static_branch_unlikely(&page_owner_inited)) 157748c96a36SJoonsoo Kim return; 157848c96a36SJoonsoo Kim 157948c96a36SJoonsoo Kim drain_all_pages(NULL); 158048c96a36SJoonsoo Kim 158148c96a36SJoonsoo Kim seq_printf(m, "\n%-23s", "Number of mixed blocks "); 158248c96a36SJoonsoo Kim for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 158348c96a36SJoonsoo Kim seq_printf(m, "%12s ", migratetype_names[mtype]); 158448c96a36SJoonsoo Kim seq_putc(m, '\n'); 158548c96a36SJoonsoo Kim 1586727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, true, 1587727c080fSVinayak Menon pagetypeinfo_showmixedcount_print); 158848c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */ 158948c96a36SJoonsoo Kim } 159048c96a36SJoonsoo Kim 1591467c996cSMel Gorman /* 1592467c996cSMel Gorman * This prints out statistics in relation to grouping pages by mobility. 1593467c996cSMel Gorman * It is expensive to collect so do not constantly read the file. 1594467c996cSMel Gorman */ 1595467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg) 1596467c996cSMel Gorman { 1597467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1598467c996cSMel Gorman 159941b25a37SKOSAKI Motohiro /* check memoryless node */ 1600a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 160141b25a37SKOSAKI Motohiro return 0; 160241b25a37SKOSAKI Motohiro 1603467c996cSMel Gorman seq_printf(m, "Page block order: %d\n", pageblock_order); 1604467c996cSMel Gorman seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 1605467c996cSMel Gorman seq_putc(m, '\n'); 1606467c996cSMel Gorman pagetypeinfo_showfree(m, pgdat); 1607467c996cSMel Gorman pagetypeinfo_showblockcount(m, pgdat); 160848c96a36SJoonsoo Kim pagetypeinfo_showmixedcount(m, pgdat); 1609467c996cSMel Gorman 1610f6ac2354SChristoph Lameter return 0; 1611f6ac2354SChristoph Lameter } 1612f6ac2354SChristoph Lameter 16138f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = { 1614f6ac2354SChristoph Lameter .start = frag_start, 1615f6ac2354SChristoph Lameter .next = frag_next, 1616f6ac2354SChristoph Lameter .stop = frag_stop, 1617f6ac2354SChristoph Lameter .show = frag_show, 1618f6ac2354SChristoph Lameter }; 1619f6ac2354SChristoph Lameter 162074e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = { 1621467c996cSMel Gorman .start = frag_start, 1622467c996cSMel Gorman .next = frag_next, 1623467c996cSMel Gorman .stop = frag_stop, 1624467c996cSMel Gorman .show = pagetypeinfo_show, 1625467c996cSMel Gorman }; 1626467c996cSMel Gorman 1627e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) 1628e2ecc8a7SMel Gorman { 1629e2ecc8a7SMel Gorman int zid; 1630e2ecc8a7SMel Gorman 1631e2ecc8a7SMel Gorman for (zid = 0; zid < MAX_NR_ZONES; zid++) { 1632e2ecc8a7SMel Gorman struct zone *compare = &pgdat->node_zones[zid]; 1633e2ecc8a7SMel Gorman 1634e2ecc8a7SMel Gorman if (populated_zone(compare)) 1635e2ecc8a7SMel Gorman return zone == compare; 1636e2ecc8a7SMel Gorman } 1637e2ecc8a7SMel Gorman 1638e2ecc8a7SMel Gorman return false; 1639e2ecc8a7SMel Gorman } 1640e2ecc8a7SMel Gorman 1641467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 1642467c996cSMel Gorman struct zone *zone) 1643f6ac2354SChristoph Lameter { 1644f6ac2354SChristoph Lameter int i; 1645f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 1646e2ecc8a7SMel Gorman if (is_zone_first_populated(pgdat, zone)) { 1647e2ecc8a7SMel Gorman seq_printf(m, "\n per-node stats"); 1648e2ecc8a7SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 164969473e5dSMuchun Song unsigned long pages = node_page_state_pages(pgdat, i); 165069473e5dSMuchun Song 165169473e5dSMuchun Song if (vmstat_item_print_in_thp(i)) 165269473e5dSMuchun Song pages /= HPAGE_PMD_NR; 16539d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", node_stat_name(i), 165469473e5dSMuchun Song pages); 1655e2ecc8a7SMel Gorman } 1656e2ecc8a7SMel Gorman } 1657f6ac2354SChristoph Lameter seq_printf(m, 1658f6ac2354SChristoph Lameter "\n pages free %lu" 1659f6ac2354SChristoph Lameter "\n min %lu" 1660f6ac2354SChristoph Lameter "\n low %lu" 1661f6ac2354SChristoph Lameter "\n high %lu" 1662f6ac2354SChristoph Lameter "\n spanned %lu" 16639feedc9dSJiang Liu "\n present %lu" 16643c381db1SDavid Hildenbrand "\n managed %lu" 16653c381db1SDavid Hildenbrand "\n cma %lu", 166688f5acf8SMel Gorman zone_page_state(zone, NR_FREE_PAGES), 166741858966SMel Gorman min_wmark_pages(zone), 166841858966SMel Gorman low_wmark_pages(zone), 166941858966SMel Gorman high_wmark_pages(zone), 1670f6ac2354SChristoph Lameter zone->spanned_pages, 16719feedc9dSJiang Liu zone->present_pages, 16723c381db1SDavid Hildenbrand zone_managed_pages(zone), 16733c381db1SDavid Hildenbrand zone_cma_pages(zone)); 16742244b95aSChristoph Lameter 1675f6ac2354SChristoph Lameter seq_printf(m, 16763484b2deSMel Gorman "\n protection: (%ld", 1677f6ac2354SChristoph Lameter zone->lowmem_reserve[0]); 1678f6ac2354SChristoph Lameter for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 16793484b2deSMel Gorman seq_printf(m, ", %ld", zone->lowmem_reserve[i]); 16807dfb8bf3SDavid Rientjes seq_putc(m, ')'); 16817dfb8bf3SDavid Rientjes 1682a8a4b7aeSBaoquan He /* If unpopulated, no other information is useful */ 1683a8a4b7aeSBaoquan He if (!populated_zone(zone)) { 1684a8a4b7aeSBaoquan He seq_putc(m, '\n'); 1685a8a4b7aeSBaoquan He return; 1686a8a4b7aeSBaoquan He } 1687a8a4b7aeSBaoquan He 16887dfb8bf3SDavid Rientjes for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 16899d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", zone_stat_name(i), 16907dfb8bf3SDavid Rientjes zone_page_state(zone, i)); 16917dfb8bf3SDavid Rientjes 16923a321d2aSKemi Wang #ifdef CONFIG_NUMA 1693f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) 16949d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", numa_stat_name(i), 1695f19298b9SMel Gorman zone_numa_event_state(zone, i)); 16963a321d2aSKemi Wang #endif 16973a321d2aSKemi Wang 16987dfb8bf3SDavid Rientjes seq_printf(m, "\n pagesets"); 1699f6ac2354SChristoph Lameter for_each_online_cpu(i) { 170028f836b6SMel Gorman struct per_cpu_pages *pcp; 170128f836b6SMel Gorman struct per_cpu_zonestat __maybe_unused *pzstats; 1702f6ac2354SChristoph Lameter 170328f836b6SMel Gorman pcp = per_cpu_ptr(zone->per_cpu_pageset, i); 1704f6ac2354SChristoph Lameter seq_printf(m, 17053dfa5721SChristoph Lameter "\n cpu: %i" 1706f6ac2354SChristoph Lameter "\n count: %i" 1707f6ac2354SChristoph Lameter "\n high: %i" 1708f6ac2354SChristoph Lameter "\n batch: %i", 17093dfa5721SChristoph Lameter i, 171028f836b6SMel Gorman pcp->count, 171128f836b6SMel Gorman pcp->high, 171228f836b6SMel Gorman pcp->batch); 1713df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 171428f836b6SMel Gorman pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); 1715df9ecabaSChristoph Lameter seq_printf(m, "\n vm stats threshold: %d", 171628f836b6SMel Gorman pzstats->stat_threshold); 1717df9ecabaSChristoph Lameter #endif 1718f6ac2354SChristoph Lameter } 1719f6ac2354SChristoph Lameter seq_printf(m, 1720599d0c95SMel Gorman "\n node_unreclaimable: %u" 17213a50d14dSAndrey Ryabinin "\n start_pfn: %lu", 1722c73322d0SJohannes Weiner pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, 17233a50d14dSAndrey Ryabinin zone->zone_start_pfn); 1724f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1725f6ac2354SChristoph Lameter } 1726467c996cSMel Gorman 1727467c996cSMel Gorman /* 1728b2bd8598SDavid Rientjes * Output information about zones in @pgdat. All zones are printed regardless 1729b2bd8598SDavid Rientjes * of whether they are populated or not: lowmem_reserve_ratio operates on the 1730b2bd8598SDavid Rientjes * set of all zones and userspace would not be aware of such zones if they are 1731b2bd8598SDavid Rientjes * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio). 1732467c996cSMel Gorman */ 1733467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg) 1734467c996cSMel Gorman { 1735467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1736727c080fSVinayak Menon walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print); 1737f6ac2354SChristoph Lameter return 0; 1738f6ac2354SChristoph Lameter } 1739f6ac2354SChristoph Lameter 17405c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = { 1741f6ac2354SChristoph Lameter .start = frag_start, /* iterate over all zones. The same as in 1742f6ac2354SChristoph Lameter * fragmentation. */ 1743f6ac2354SChristoph Lameter .next = frag_next, 1744f6ac2354SChristoph Lameter .stop = frag_stop, 1745f6ac2354SChristoph Lameter .show = zoneinfo_show, 1746f6ac2354SChristoph Lameter }; 1747f6ac2354SChristoph Lameter 17489d7ea9a2SKonstantin Khlebnikov #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ 1749f19298b9SMel Gorman NR_VM_NUMA_EVENT_ITEMS + \ 17509d7ea9a2SKonstantin Khlebnikov NR_VM_NODE_STAT_ITEMS + \ 17519d7ea9a2SKonstantin Khlebnikov NR_VM_WRITEBACK_STAT_ITEMS + \ 17529d7ea9a2SKonstantin Khlebnikov (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ 17539d7ea9a2SKonstantin Khlebnikov NR_VM_EVENT_ITEMS : 0)) 175479da826aSMichael Rubin 1755f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos) 1756f6ac2354SChristoph Lameter { 17572244b95aSChristoph Lameter unsigned long *v; 17589d7ea9a2SKonstantin Khlebnikov int i; 1759f6ac2354SChristoph Lameter 17609d7ea9a2SKonstantin Khlebnikov if (*pos >= NR_VMSTAT_ITEMS) 1761f6ac2354SChristoph Lameter return NULL; 1762f6ac2354SChristoph Lameter 17639d7ea9a2SKonstantin Khlebnikov BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); 1764f19298b9SMel Gorman fold_vm_numa_events(); 17659d7ea9a2SKonstantin Khlebnikov v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); 17662244b95aSChristoph Lameter m->private = v; 17672244b95aSChristoph Lameter if (!v) 1768f6ac2354SChristoph Lameter return ERR_PTR(-ENOMEM); 17692244b95aSChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1770c41f012aSMichal Hocko v[i] = global_zone_page_state(i); 177179da826aSMichael Rubin v += NR_VM_ZONE_STAT_ITEMS; 177279da826aSMichael Rubin 17733a321d2aSKemi Wang #ifdef CONFIG_NUMA 1774f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) 1775f19298b9SMel Gorman v[i] = global_numa_event_state(i); 1776f19298b9SMel Gorman v += NR_VM_NUMA_EVENT_ITEMS; 17773a321d2aSKemi Wang #endif 17783a321d2aSKemi Wang 177969473e5dSMuchun Song for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1780ea426c2aSRoman Gushchin v[i] = global_node_page_state_pages(i); 178169473e5dSMuchun Song if (vmstat_item_print_in_thp(i)) 178269473e5dSMuchun Song v[i] /= HPAGE_PMD_NR; 178369473e5dSMuchun Song } 178475ef7184SMel Gorman v += NR_VM_NODE_STAT_ITEMS; 178575ef7184SMel Gorman 178679da826aSMichael Rubin global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, 178779da826aSMichael Rubin v + NR_DIRTY_THRESHOLD); 178879da826aSMichael Rubin v += NR_VM_WRITEBACK_STAT_ITEMS; 178979da826aSMichael Rubin 1790f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 179179da826aSMichael Rubin all_vm_events(v); 179279da826aSMichael Rubin v[PGPGIN] /= 2; /* sectors -> kbytes */ 179379da826aSMichael Rubin v[PGPGOUT] /= 2; 1794f8891e5eSChristoph Lameter #endif 1795ff8b16d7SWu Fengguang return (unsigned long *)m->private + *pos; 1796f6ac2354SChristoph Lameter } 1797f6ac2354SChristoph Lameter 1798f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 1799f6ac2354SChristoph Lameter { 1800f6ac2354SChristoph Lameter (*pos)++; 18019d7ea9a2SKonstantin Khlebnikov if (*pos >= NR_VMSTAT_ITEMS) 1802f6ac2354SChristoph Lameter return NULL; 1803f6ac2354SChristoph Lameter return (unsigned long *)m->private + *pos; 1804f6ac2354SChristoph Lameter } 1805f6ac2354SChristoph Lameter 1806f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg) 1807f6ac2354SChristoph Lameter { 1808f6ac2354SChristoph Lameter unsigned long *l = arg; 1809f6ac2354SChristoph Lameter unsigned long off = l - (unsigned long *)m->private; 181068ba0326SAlexey Dobriyan 181168ba0326SAlexey Dobriyan seq_puts(m, vmstat_text[off]); 181275ba1d07SJoe Perches seq_put_decimal_ull(m, " ", *l); 181368ba0326SAlexey Dobriyan seq_putc(m, '\n'); 18148d92890bSNeilBrown 18158d92890bSNeilBrown if (off == NR_VMSTAT_ITEMS - 1) { 18168d92890bSNeilBrown /* 18178d92890bSNeilBrown * We've come to the end - add any deprecated counters to avoid 18188d92890bSNeilBrown * breaking userspace which might depend on them being present. 18198d92890bSNeilBrown */ 18208d92890bSNeilBrown seq_puts(m, "nr_unstable 0\n"); 18218d92890bSNeilBrown } 1822f6ac2354SChristoph Lameter return 0; 1823f6ac2354SChristoph Lameter } 1824f6ac2354SChristoph Lameter 1825f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg) 1826f6ac2354SChristoph Lameter { 1827f6ac2354SChristoph Lameter kfree(m->private); 1828f6ac2354SChristoph Lameter m->private = NULL; 1829f6ac2354SChristoph Lameter } 1830f6ac2354SChristoph Lameter 1831b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = { 1832f6ac2354SChristoph Lameter .start = vmstat_start, 1833f6ac2354SChristoph Lameter .next = vmstat_next, 1834f6ac2354SChristoph Lameter .stop = vmstat_stop, 1835f6ac2354SChristoph Lameter .show = vmstat_show, 1836f6ac2354SChristoph Lameter }; 1837f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */ 1838f6ac2354SChristoph Lameter 1839df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 1840d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 184177461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ; 1842d1187ed2SChristoph Lameter 184352b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS 184452b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work) 184552b6f46bSHugh Dickins { 184652b6f46bSHugh Dickins refresh_cpu_vm_stats(true); 184752b6f46bSHugh Dickins } 184852b6f46bSHugh Dickins 184952b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write, 185032927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 185152b6f46bSHugh Dickins { 185252b6f46bSHugh Dickins long val; 185352b6f46bSHugh Dickins int err; 185452b6f46bSHugh Dickins int i; 185552b6f46bSHugh Dickins 185652b6f46bSHugh Dickins /* 185752b6f46bSHugh Dickins * The regular update, every sysctl_stat_interval, may come later 185852b6f46bSHugh Dickins * than expected: leaving a significant amount in per_cpu buckets. 185952b6f46bSHugh Dickins * This is particularly misleading when checking a quantity of HUGE 186052b6f46bSHugh Dickins * pages, immediately after running a test. /proc/sys/vm/stat_refresh, 186152b6f46bSHugh Dickins * which can equally be echo'ed to or cat'ted from (by root), 186252b6f46bSHugh Dickins * can be used to update the stats just before reading them. 186352b6f46bSHugh Dickins * 1864c41f012aSMichal Hocko * Oh, and since global_zone_page_state() etc. are so careful to hide 186552b6f46bSHugh Dickins * transiently negative values, report an error here if any of 186652b6f46bSHugh Dickins * the stats is negative, so we know to go looking for imbalance. 186752b6f46bSHugh Dickins */ 186852b6f46bSHugh Dickins err = schedule_on_each_cpu(refresh_vm_stats); 186952b6f46bSHugh Dickins if (err) 187052b6f46bSHugh Dickins return err; 187152b6f46bSHugh Dickins for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 187275083aaeSHugh Dickins /* 187375083aaeSHugh Dickins * Skip checking stats known to go negative occasionally. 187475083aaeSHugh Dickins */ 187575083aaeSHugh Dickins switch (i) { 187675083aaeSHugh Dickins case NR_ZONE_WRITE_PENDING: 187775083aaeSHugh Dickins case NR_FREE_CMA_PAGES: 187875083aaeSHugh Dickins continue; 187975083aaeSHugh Dickins } 188075ef7184SMel Gorman val = atomic_long_read(&vm_zone_stat[i]); 188152b6f46bSHugh Dickins if (val < 0) { 188252b6f46bSHugh Dickins pr_warn("%s: %s %ld\n", 18839d7ea9a2SKonstantin Khlebnikov __func__, zone_stat_name(i), val); 188452b6f46bSHugh Dickins } 188552b6f46bSHugh Dickins } 188676d8cc3cSHugh Dickins for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 188775083aaeSHugh Dickins /* 188875083aaeSHugh Dickins * Skip checking stats known to go negative occasionally. 188975083aaeSHugh Dickins */ 189075083aaeSHugh Dickins switch (i) { 189175083aaeSHugh Dickins case NR_WRITEBACK: 189275083aaeSHugh Dickins continue; 189375083aaeSHugh Dickins } 189476d8cc3cSHugh Dickins val = atomic_long_read(&vm_node_stat[i]); 189576d8cc3cSHugh Dickins if (val < 0) { 189676d8cc3cSHugh Dickins pr_warn("%s: %s %ld\n", 189776d8cc3cSHugh Dickins __func__, node_stat_name(i), val); 189876d8cc3cSHugh Dickins } 189976d8cc3cSHugh Dickins } 190052b6f46bSHugh Dickins if (write) 190152b6f46bSHugh Dickins *ppos += *lenp; 190252b6f46bSHugh Dickins else 190352b6f46bSHugh Dickins *lenp = 0; 190452b6f46bSHugh Dickins return 0; 190552b6f46bSHugh Dickins } 190652b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */ 190752b6f46bSHugh Dickins 1908d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w) 1909d1187ed2SChristoph Lameter { 19100eb77e98SChristoph Lameter if (refresh_cpu_vm_stats(true)) { 19117cc36bbdSChristoph Lameter /* 19127cc36bbdSChristoph Lameter * Counters were updated so we expect more updates 19137cc36bbdSChristoph Lameter * to occur in the future. Keep on running the 19147cc36bbdSChristoph Lameter * update worker thread. 19157cc36bbdSChristoph Lameter */ 1916ce612879SMichal Hocko queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, 1917176bed1dSLinus Torvalds this_cpu_ptr(&vmstat_work), 191898f4ebb2SAnton Blanchard round_jiffies_relative(sysctl_stat_interval)); 1919f01f17d3SMichal Hocko } 1920d1187ed2SChristoph Lameter } 1921d1187ed2SChristoph Lameter 19227cc36bbdSChristoph Lameter /* 19237cc36bbdSChristoph Lameter * Check if the diffs for a certain cpu indicate that 19247cc36bbdSChristoph Lameter * an update is needed. 19257cc36bbdSChristoph Lameter */ 19267cc36bbdSChristoph Lameter static bool need_update(int cpu) 1927d1187ed2SChristoph Lameter { 19282bbd00aeSJohannes Weiner pg_data_t *last_pgdat = NULL; 19297cc36bbdSChristoph Lameter struct zone *zone; 1930d1187ed2SChristoph Lameter 19317cc36bbdSChristoph Lameter for_each_populated_zone(zone) { 193228f836b6SMel Gorman struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 19332bbd00aeSJohannes Weiner struct per_cpu_nodestat *n; 193428f836b6SMel Gorman 19357cc36bbdSChristoph Lameter /* 19367cc36bbdSChristoph Lameter * The fast way of checking if there are any vmstat diffs. 19377cc36bbdSChristoph Lameter */ 193864632fd3SMiaohe Lin if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff))) 19397cc36bbdSChristoph Lameter return true; 1940f19298b9SMel Gorman 19412bbd00aeSJohannes Weiner if (last_pgdat == zone->zone_pgdat) 19422bbd00aeSJohannes Weiner continue; 19432bbd00aeSJohannes Weiner last_pgdat = zone->zone_pgdat; 19442bbd00aeSJohannes Weiner n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu); 194564632fd3SMiaohe Lin if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff))) 19462bbd00aeSJohannes Weiner return true; 19477cc36bbdSChristoph Lameter } 19487cc36bbdSChristoph Lameter return false; 19497cc36bbdSChristoph Lameter } 19507cc36bbdSChristoph Lameter 19517b8da4c7SChristoph Lameter /* 19527b8da4c7SChristoph Lameter * Switch off vmstat processing and then fold all the remaining differentials 19537b8da4c7SChristoph Lameter * until the diffs stay at zero. The function is used by NOHZ and can only be 19547b8da4c7SChristoph Lameter * invoked when tick processing is not active. 19557b8da4c7SChristoph Lameter */ 1956f01f17d3SMichal Hocko void quiet_vmstat(void) 1957f01f17d3SMichal Hocko { 1958f01f17d3SMichal Hocko if (system_state != SYSTEM_RUNNING) 1959f01f17d3SMichal Hocko return; 1960f01f17d3SMichal Hocko 19617b8da4c7SChristoph Lameter if (!delayed_work_pending(this_cpu_ptr(&vmstat_work))) 1962f01f17d3SMichal Hocko return; 1963f01f17d3SMichal Hocko 1964f01f17d3SMichal Hocko if (!need_update(smp_processor_id())) 1965f01f17d3SMichal Hocko return; 1966f01f17d3SMichal Hocko 1967f01f17d3SMichal Hocko /* 1968f01f17d3SMichal Hocko * Just refresh counters and do not care about the pending delayed 1969f01f17d3SMichal Hocko * vmstat_update. It doesn't fire that often to matter and canceling 1970f01f17d3SMichal Hocko * it would be too expensive from this path. 1971f01f17d3SMichal Hocko * vmstat_shepherd will take care about that for us. 1972f01f17d3SMichal Hocko */ 1973f01f17d3SMichal Hocko refresh_cpu_vm_stats(false); 1974f01f17d3SMichal Hocko } 1975f01f17d3SMichal Hocko 19767cc36bbdSChristoph Lameter /* 19777cc36bbdSChristoph Lameter * Shepherd worker thread that checks the 19787cc36bbdSChristoph Lameter * differentials of processors that have their worker 19797cc36bbdSChristoph Lameter * threads for vm statistics updates disabled because of 19807cc36bbdSChristoph Lameter * inactivity. 19817cc36bbdSChristoph Lameter */ 19827cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w); 19837cc36bbdSChristoph Lameter 19840eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd); 19857cc36bbdSChristoph Lameter 19867cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w) 19877cc36bbdSChristoph Lameter { 19887cc36bbdSChristoph Lameter int cpu; 19897cc36bbdSChristoph Lameter 19907625eccdSSebastian Andrzej Siewior cpus_read_lock(); 19917cc36bbdSChristoph Lameter /* Check processors whose vmstat worker threads have been disabled */ 19927b8da4c7SChristoph Lameter for_each_online_cpu(cpu) { 1993f01f17d3SMichal Hocko struct delayed_work *dw = &per_cpu(vmstat_work, cpu); 19947cc36bbdSChristoph Lameter 19957b8da4c7SChristoph Lameter if (!delayed_work_pending(dw) && need_update(cpu)) 1996ce612879SMichal Hocko queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); 1997fbcc8183SJiang Biao 1998fbcc8183SJiang Biao cond_resched(); 1999f01f17d3SMichal Hocko } 20007625eccdSSebastian Andrzej Siewior cpus_read_unlock(); 20017cc36bbdSChristoph Lameter 20027cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 20037cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 20047cc36bbdSChristoph Lameter } 20057cc36bbdSChristoph Lameter 20067cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void) 20077cc36bbdSChristoph Lameter { 20087cc36bbdSChristoph Lameter int cpu; 20097cc36bbdSChristoph Lameter 20107cc36bbdSChristoph Lameter for_each_possible_cpu(cpu) 2011ccde8bd4SMichal Hocko INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), 20127cc36bbdSChristoph Lameter vmstat_update); 20137cc36bbdSChristoph Lameter 20147cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 20157cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 2016d1187ed2SChristoph Lameter } 2017d1187ed2SChristoph Lameter 201803e86dbaSTim Chen static void __init init_cpu_node_state(void) 201903e86dbaSTim Chen { 20204c501327SSebastian Andrzej Siewior int node; 202103e86dbaSTim Chen 20224c501327SSebastian Andrzej Siewior for_each_online_node(node) { 20234c501327SSebastian Andrzej Siewior if (cpumask_weight(cpumask_of_node(node)) > 0) 20244c501327SSebastian Andrzej Siewior node_set_state(node, N_CPU); 20254c501327SSebastian Andrzej Siewior } 202603e86dbaSTim Chen } 202703e86dbaSTim Chen 20285438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu) 2029807a1bd2SToshi Kani { 20305ee28a44SKAMEZAWA Hiroyuki refresh_zone_stat_thresholds(); 2031ad596925SChristoph Lameter node_set_state(cpu_to_node(cpu), N_CPU); 20325438da97SSebastian Andrzej Siewior return 0; 2033df9ecabaSChristoph Lameter } 2034df9ecabaSChristoph Lameter 20355438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu) 20365438da97SSebastian Andrzej Siewior { 20375438da97SSebastian Andrzej Siewior cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); 20385438da97SSebastian Andrzej Siewior return 0; 20395438da97SSebastian Andrzej Siewior } 20405438da97SSebastian Andrzej Siewior 20415438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu) 20425438da97SSebastian Andrzej Siewior { 20435438da97SSebastian Andrzej Siewior const struct cpumask *node_cpus; 20445438da97SSebastian Andrzej Siewior int node; 20455438da97SSebastian Andrzej Siewior 20465438da97SSebastian Andrzej Siewior node = cpu_to_node(cpu); 20475438da97SSebastian Andrzej Siewior 20485438da97SSebastian Andrzej Siewior refresh_zone_stat_thresholds(); 20495438da97SSebastian Andrzej Siewior node_cpus = cpumask_of_node(node); 20505438da97SSebastian Andrzej Siewior if (cpumask_weight(node_cpus) > 0) 20515438da97SSebastian Andrzej Siewior return 0; 20525438da97SSebastian Andrzej Siewior 20535438da97SSebastian Andrzej Siewior node_clear_state(node, N_CPU); 20545438da97SSebastian Andrzej Siewior return 0; 20555438da97SSebastian Andrzej Siewior } 20565438da97SSebastian Andrzej Siewior 20578f32f7e5SAlexey Dobriyan #endif 2058df9ecabaSChristoph Lameter 2059ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq; 2060ce612879SMichal Hocko 2061597b7305SMichal Hocko void __init init_mm_internals(void) 2062df9ecabaSChristoph Lameter { 2063ce612879SMichal Hocko int ret __maybe_unused; 20645438da97SSebastian Andrzej Siewior 206580d136e1SMichal Hocko mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); 2066ce612879SMichal Hocko 2067ce612879SMichal Hocko #ifdef CONFIG_SMP 20685438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", 20695438da97SSebastian Andrzej Siewior NULL, vmstat_cpu_dead); 20705438da97SSebastian Andrzej Siewior if (ret < 0) 20715438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'dead' hotplug state\n"); 20725438da97SSebastian Andrzej Siewior 20735438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online", 20745438da97SSebastian Andrzej Siewior vmstat_cpu_online, 20755438da97SSebastian Andrzej Siewior vmstat_cpu_down_prep); 20765438da97SSebastian Andrzej Siewior if (ret < 0) 20775438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'online' hotplug state\n"); 20785438da97SSebastian Andrzej Siewior 20797625eccdSSebastian Andrzej Siewior cpus_read_lock(); 208003e86dbaSTim Chen init_cpu_node_state(); 20817625eccdSSebastian Andrzej Siewior cpus_read_unlock(); 2082d1187ed2SChristoph Lameter 20837cc36bbdSChristoph Lameter start_shepherd_timer(); 20848f32f7e5SAlexey Dobriyan #endif 20858f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS 2086fddda2b7SChristoph Hellwig proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); 2087abaed011SMichal Hocko proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); 2088fddda2b7SChristoph Hellwig proc_create_seq("vmstat", 0444, NULL, &vmstat_op); 2089fddda2b7SChristoph Hellwig proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); 20908f32f7e5SAlexey Dobriyan #endif 2091df9ecabaSChristoph Lameter } 2092d7a5752cSMel Gorman 2093d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 2094d7a5752cSMel Gorman 2095d7a5752cSMel Gorman /* 2096d7a5752cSMel Gorman * Return an index indicating how much of the available free memory is 2097d7a5752cSMel Gorman * unusable for an allocation of the requested size. 2098d7a5752cSMel Gorman */ 2099d7a5752cSMel Gorman static int unusable_free_index(unsigned int order, 2100d7a5752cSMel Gorman struct contig_page_info *info) 2101d7a5752cSMel Gorman { 2102d7a5752cSMel Gorman /* No free memory is interpreted as all free memory is unusable */ 2103d7a5752cSMel Gorman if (info->free_pages == 0) 2104d7a5752cSMel Gorman return 1000; 2105d7a5752cSMel Gorman 2106d7a5752cSMel Gorman /* 2107d7a5752cSMel Gorman * Index should be a value between 0 and 1. Return a value to 3 2108d7a5752cSMel Gorman * decimal places. 2109d7a5752cSMel Gorman * 2110d7a5752cSMel Gorman * 0 => no fragmentation 2111d7a5752cSMel Gorman * 1 => high fragmentation 2112d7a5752cSMel Gorman */ 2113d7a5752cSMel Gorman return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 2114d7a5752cSMel Gorman 2115d7a5752cSMel Gorman } 2116d7a5752cSMel Gorman 2117d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m, 2118d7a5752cSMel Gorman pg_data_t *pgdat, struct zone *zone) 2119d7a5752cSMel Gorman { 2120d7a5752cSMel Gorman unsigned int order; 2121d7a5752cSMel Gorman int index; 2122d7a5752cSMel Gorman struct contig_page_info info; 2123d7a5752cSMel Gorman 2124d7a5752cSMel Gorman seq_printf(m, "Node %d, zone %8s ", 2125d7a5752cSMel Gorman pgdat->node_id, 2126d7a5752cSMel Gorman zone->name); 2127d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2128d7a5752cSMel Gorman fill_contig_page_info(zone, order, &info); 2129d7a5752cSMel Gorman index = unusable_free_index(order, &info); 2130d7a5752cSMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2131d7a5752cSMel Gorman } 2132d7a5752cSMel Gorman 2133d7a5752cSMel Gorman seq_putc(m, '\n'); 2134d7a5752cSMel Gorman } 2135d7a5752cSMel Gorman 2136d7a5752cSMel Gorman /* 2137d7a5752cSMel Gorman * Display unusable free space index 2138d7a5752cSMel Gorman * 2139d7a5752cSMel Gorman * The unusable free space index measures how much of the available free 2140d7a5752cSMel Gorman * memory cannot be used to satisfy an allocation of a given size and is a 2141d7a5752cSMel Gorman * value between 0 and 1. The higher the value, the more of free memory is 2142d7a5752cSMel Gorman * unusable and by implication, the worse the external fragmentation is. This 2143d7a5752cSMel Gorman * can be expressed as a percentage by multiplying by 100. 2144d7a5752cSMel Gorman */ 2145d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg) 2146d7a5752cSMel Gorman { 2147d7a5752cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2148d7a5752cSMel Gorman 2149d7a5752cSMel Gorman /* check memoryless node */ 2150a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 2151d7a5752cSMel Gorman return 0; 2152d7a5752cSMel Gorman 2153727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, unusable_show_print); 2154d7a5752cSMel Gorman 2155d7a5752cSMel Gorman return 0; 2156d7a5752cSMel Gorman } 2157d7a5752cSMel Gorman 215801a99560SKefeng Wang static const struct seq_operations unusable_sops = { 2159d7a5752cSMel Gorman .start = frag_start, 2160d7a5752cSMel Gorman .next = frag_next, 2161d7a5752cSMel Gorman .stop = frag_stop, 2162d7a5752cSMel Gorman .show = unusable_show, 2163d7a5752cSMel Gorman }; 2164d7a5752cSMel Gorman 216501a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(unusable); 2166d7a5752cSMel Gorman 2167f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m, 2168f1a5ab12SMel Gorman pg_data_t *pgdat, struct zone *zone) 2169f1a5ab12SMel Gorman { 2170f1a5ab12SMel Gorman unsigned int order; 2171f1a5ab12SMel Gorman int index; 2172f1a5ab12SMel Gorman 2173f1a5ab12SMel Gorman /* Alloc on stack as interrupts are disabled for zone walk */ 2174f1a5ab12SMel Gorman struct contig_page_info info; 2175f1a5ab12SMel Gorman 2176f1a5ab12SMel Gorman seq_printf(m, "Node %d, zone %8s ", 2177f1a5ab12SMel Gorman pgdat->node_id, 2178f1a5ab12SMel Gorman zone->name); 2179f1a5ab12SMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2180f1a5ab12SMel Gorman fill_contig_page_info(zone, order, &info); 218156de7263SMel Gorman index = __fragmentation_index(order, &info); 2182f1a5ab12SMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2183f1a5ab12SMel Gorman } 2184f1a5ab12SMel Gorman 2185f1a5ab12SMel Gorman seq_putc(m, '\n'); 2186f1a5ab12SMel Gorman } 2187f1a5ab12SMel Gorman 2188f1a5ab12SMel Gorman /* 2189f1a5ab12SMel Gorman * Display fragmentation index for orders that allocations would fail for 2190f1a5ab12SMel Gorman */ 2191f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg) 2192f1a5ab12SMel Gorman { 2193f1a5ab12SMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2194f1a5ab12SMel Gorman 2195727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, extfrag_show_print); 2196f1a5ab12SMel Gorman 2197f1a5ab12SMel Gorman return 0; 2198f1a5ab12SMel Gorman } 2199f1a5ab12SMel Gorman 220001a99560SKefeng Wang static const struct seq_operations extfrag_sops = { 2201f1a5ab12SMel Gorman .start = frag_start, 2202f1a5ab12SMel Gorman .next = frag_next, 2203f1a5ab12SMel Gorman .stop = frag_stop, 2204f1a5ab12SMel Gorman .show = extfrag_show, 2205f1a5ab12SMel Gorman }; 2206f1a5ab12SMel Gorman 220701a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(extfrag); 2208f1a5ab12SMel Gorman 2209d7a5752cSMel Gorman static int __init extfrag_debug_init(void) 2210d7a5752cSMel Gorman { 2211bde8bd8aSSasikantha babu struct dentry *extfrag_debug_root; 2212bde8bd8aSSasikantha babu 2213d7a5752cSMel Gorman extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 2214d7a5752cSMel Gorman 2215d9f7979cSGreg Kroah-Hartman debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL, 221601a99560SKefeng Wang &unusable_fops); 2217d7a5752cSMel Gorman 2218d9f7979cSGreg Kroah-Hartman debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL, 221901a99560SKefeng Wang &extfrag_fops); 2220f1a5ab12SMel Gorman 2221d7a5752cSMel Gorman return 0; 2222d7a5752cSMel Gorman } 2223d7a5752cSMel Gorman 2224d7a5752cSMel Gorman module_init(extfrag_debug_init); 2225d7a5752cSMel Gorman #endif 2226