1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2f6ac2354SChristoph Lameter /* 3f6ac2354SChristoph Lameter * linux/mm/vmstat.c 4f6ac2354SChristoph Lameter * 5f6ac2354SChristoph Lameter * Manages VM statistics 6f6ac2354SChristoph Lameter * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 72244b95aSChristoph Lameter * 82244b95aSChristoph Lameter * zoned VM statistics 92244b95aSChristoph Lameter * Copyright (C) 2006 Silicon Graphics, Inc., 102244b95aSChristoph Lameter * Christoph Lameter <christoph@lameter.com> 117cc36bbdSChristoph Lameter * Copyright (C) 2008-2014 Christoph Lameter 12f6ac2354SChristoph Lameter */ 138f32f7e5SAlexey Dobriyan #include <linux/fs.h> 14f6ac2354SChristoph Lameter #include <linux/mm.h> 154e950f6fSAlexey Dobriyan #include <linux/err.h> 162244b95aSChristoph Lameter #include <linux/module.h> 175a0e3ad6STejun Heo #include <linux/slab.h> 18df9ecabaSChristoph Lameter #include <linux/cpu.h> 197cc36bbdSChristoph Lameter #include <linux/cpumask.h> 20c748e134SAdrian Bunk #include <linux/vmstat.h> 213c486871SAndrew Morton #include <linux/proc_fs.h> 223c486871SAndrew Morton #include <linux/seq_file.h> 233c486871SAndrew Morton #include <linux/debugfs.h> 24e8edc6e0SAlexey Dobriyan #include <linux/sched.h> 25f1a5ab12SMel Gorman #include <linux/math64.h> 2679da826aSMichael Rubin #include <linux/writeback.h> 2736deb0beSNamhyung Kim #include <linux/compaction.h> 286e543d57SLisa Du #include <linux/mm_inline.h> 2948c96a36SJoonsoo Kim #include <linux/page_ext.h> 3048c96a36SJoonsoo Kim #include <linux/page_owner.h> 316e543d57SLisa Du 326e543d57SLisa Du #include "internal.h" 33f6ac2354SChristoph Lameter 344518085eSKemi Wang #ifdef CONFIG_NUMA 354518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; 364518085eSKemi Wang 374518085eSKemi Wang /* zero numa counters within a zone */ 384518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone) 394518085eSKemi Wang { 404518085eSKemi Wang int item, cpu; 414518085eSKemi Wang 42f19298b9SMel Gorman for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) { 43f19298b9SMel Gorman atomic_long_set(&zone->vm_numa_event[item], 0); 44f19298b9SMel Gorman for_each_online_cpu(cpu) { 45f19298b9SMel Gorman per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item] 464518085eSKemi Wang = 0; 474518085eSKemi Wang } 484518085eSKemi Wang } 49f19298b9SMel Gorman } 504518085eSKemi Wang 514518085eSKemi Wang /* zero numa counters of all the populated zones */ 524518085eSKemi Wang static void zero_zones_numa_counters(void) 534518085eSKemi Wang { 544518085eSKemi Wang struct zone *zone; 554518085eSKemi Wang 564518085eSKemi Wang for_each_populated_zone(zone) 574518085eSKemi Wang zero_zone_numa_counters(zone); 584518085eSKemi Wang } 594518085eSKemi Wang 604518085eSKemi Wang /* zero global numa counters */ 614518085eSKemi Wang static void zero_global_numa_counters(void) 624518085eSKemi Wang { 634518085eSKemi Wang int item; 644518085eSKemi Wang 65f19298b9SMel Gorman for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 66f19298b9SMel Gorman atomic_long_set(&vm_numa_event[item], 0); 674518085eSKemi Wang } 684518085eSKemi Wang 694518085eSKemi Wang static void invalid_numa_statistics(void) 704518085eSKemi Wang { 714518085eSKemi Wang zero_zones_numa_counters(); 724518085eSKemi Wang zero_global_numa_counters(); 734518085eSKemi Wang } 744518085eSKemi Wang 754518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock); 764518085eSKemi Wang 774518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, 7832927393SChristoph Hellwig void *buffer, size_t *length, loff_t *ppos) 794518085eSKemi Wang { 804518085eSKemi Wang int ret, oldval; 814518085eSKemi Wang 824518085eSKemi Wang mutex_lock(&vm_numa_stat_lock); 834518085eSKemi Wang if (write) 844518085eSKemi Wang oldval = sysctl_vm_numa_stat; 854518085eSKemi Wang ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 864518085eSKemi Wang if (ret || !write) 874518085eSKemi Wang goto out; 884518085eSKemi Wang 894518085eSKemi Wang if (oldval == sysctl_vm_numa_stat) 904518085eSKemi Wang goto out; 914518085eSKemi Wang else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) { 924518085eSKemi Wang static_branch_enable(&vm_numa_stat_key); 934518085eSKemi Wang pr_info("enable numa statistics\n"); 944518085eSKemi Wang } else { 954518085eSKemi Wang static_branch_disable(&vm_numa_stat_key); 964518085eSKemi Wang invalid_numa_statistics(); 974518085eSKemi Wang pr_info("disable numa statistics, and clear numa counters\n"); 984518085eSKemi Wang } 994518085eSKemi Wang 1004518085eSKemi Wang out: 1014518085eSKemi Wang mutex_unlock(&vm_numa_stat_lock); 1024518085eSKemi Wang return ret; 1034518085eSKemi Wang } 1044518085eSKemi Wang #endif 1054518085eSKemi Wang 106f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 107f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 108f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states); 109f8891e5eSChristoph Lameter 11031f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret) 111f8891e5eSChristoph Lameter { 1129eccf2a8SChristoph Lameter int cpu; 113f8891e5eSChristoph Lameter int i; 114f8891e5eSChristoph Lameter 115f8891e5eSChristoph Lameter memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 116f8891e5eSChristoph Lameter 11731f961a8SMinchan Kim for_each_online_cpu(cpu) { 118f8891e5eSChristoph Lameter struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 119f8891e5eSChristoph Lameter 120f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 121f8891e5eSChristoph Lameter ret[i] += this->event[i]; 122f8891e5eSChristoph Lameter } 123f8891e5eSChristoph Lameter } 124f8891e5eSChristoph Lameter 125f8891e5eSChristoph Lameter /* 126f8891e5eSChristoph Lameter * Accumulate the vm event counters across all CPUs. 127f8891e5eSChristoph Lameter * The result is unavoidably approximate - it can change 128f8891e5eSChristoph Lameter * during and after execution of this function. 129f8891e5eSChristoph Lameter */ 130f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret) 131f8891e5eSChristoph Lameter { 1327625eccdSSebastian Andrzej Siewior cpus_read_lock(); 13331f961a8SMinchan Kim sum_vm_events(ret); 1347625eccdSSebastian Andrzej Siewior cpus_read_unlock(); 135f8891e5eSChristoph Lameter } 13632dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events); 137f8891e5eSChristoph Lameter 138f8891e5eSChristoph Lameter /* 139f8891e5eSChristoph Lameter * Fold the foreign cpu events into our own. 140f8891e5eSChristoph Lameter * 141f8891e5eSChristoph Lameter * This is adding to the events on one processor 142f8891e5eSChristoph Lameter * but keeps the global counts constant. 143f8891e5eSChristoph Lameter */ 144f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu) 145f8891e5eSChristoph Lameter { 146f8891e5eSChristoph Lameter struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 147f8891e5eSChristoph Lameter int i; 148f8891e5eSChristoph Lameter 149f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 150f8891e5eSChristoph Lameter count_vm_events(i, fold_state->event[i]); 151f8891e5eSChristoph Lameter fold_state->event[i] = 0; 152f8891e5eSChristoph Lameter } 153f8891e5eSChristoph Lameter } 154f8891e5eSChristoph Lameter 155f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */ 156f8891e5eSChristoph Lameter 1572244b95aSChristoph Lameter /* 1582244b95aSChristoph Lameter * Manage combined zone based / global counters 1592244b95aSChristoph Lameter * 1602244b95aSChristoph Lameter * vm_stat contains the global counters 1612244b95aSChristoph Lameter */ 16275ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; 16375ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; 164f19298b9SMel Gorman atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp; 16575ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat); 16675ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat); 1672244b95aSChristoph Lameter 168ebeac3eaSGeert Uytterhoeven #ifdef CONFIG_NUMA 169ebeac3eaSGeert Uytterhoeven static void fold_vm_zone_numa_events(struct zone *zone) 170ebeac3eaSGeert Uytterhoeven { 171ebeac3eaSGeert Uytterhoeven unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; 172ebeac3eaSGeert Uytterhoeven int cpu; 173ebeac3eaSGeert Uytterhoeven enum numa_stat_item item; 174ebeac3eaSGeert Uytterhoeven 175ebeac3eaSGeert Uytterhoeven for_each_online_cpu(cpu) { 176ebeac3eaSGeert Uytterhoeven struct per_cpu_zonestat *pzstats; 177ebeac3eaSGeert Uytterhoeven 178ebeac3eaSGeert Uytterhoeven pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 179ebeac3eaSGeert Uytterhoeven for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 180ebeac3eaSGeert Uytterhoeven zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); 181ebeac3eaSGeert Uytterhoeven } 182ebeac3eaSGeert Uytterhoeven 183ebeac3eaSGeert Uytterhoeven for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 184ebeac3eaSGeert Uytterhoeven zone_numa_event_add(zone_numa_events[item], zone, item); 185ebeac3eaSGeert Uytterhoeven } 186ebeac3eaSGeert Uytterhoeven 187ebeac3eaSGeert Uytterhoeven void fold_vm_numa_events(void) 188ebeac3eaSGeert Uytterhoeven { 189ebeac3eaSGeert Uytterhoeven struct zone *zone; 190ebeac3eaSGeert Uytterhoeven 191ebeac3eaSGeert Uytterhoeven for_each_populated_zone(zone) 192ebeac3eaSGeert Uytterhoeven fold_vm_zone_numa_events(zone); 193ebeac3eaSGeert Uytterhoeven } 194ebeac3eaSGeert Uytterhoeven #endif 195ebeac3eaSGeert Uytterhoeven 1962244b95aSChristoph Lameter #ifdef CONFIG_SMP 1972244b95aSChristoph Lameter 198b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone) 19988f5acf8SMel Gorman { 20088f5acf8SMel Gorman int threshold; 20188f5acf8SMel Gorman int watermark_distance; 20288f5acf8SMel Gorman 20388f5acf8SMel Gorman /* 20488f5acf8SMel Gorman * As vmstats are not up to date, there is drift between the estimated 20588f5acf8SMel Gorman * and real values. For high thresholds and a high number of CPUs, it 20688f5acf8SMel Gorman * is possible for the min watermark to be breached while the estimated 20788f5acf8SMel Gorman * value looks fine. The pressure threshold is a reduced value such 20888f5acf8SMel Gorman * that even the maximum amount of drift will not accidentally breach 20988f5acf8SMel Gorman * the min watermark 21088f5acf8SMel Gorman */ 21188f5acf8SMel Gorman watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone); 21288f5acf8SMel Gorman threshold = max(1, (int)(watermark_distance / num_online_cpus())); 21388f5acf8SMel Gorman 21488f5acf8SMel Gorman /* 21588f5acf8SMel Gorman * Maximum threshold is 125 21688f5acf8SMel Gorman */ 21788f5acf8SMel Gorman threshold = min(125, threshold); 21888f5acf8SMel Gorman 21988f5acf8SMel Gorman return threshold; 22088f5acf8SMel Gorman } 22188f5acf8SMel Gorman 222b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone) 223df9ecabaSChristoph Lameter { 224df9ecabaSChristoph Lameter int threshold; 225df9ecabaSChristoph Lameter int mem; /* memory in 128 MB units */ 2262244b95aSChristoph Lameter 2272244b95aSChristoph Lameter /* 228df9ecabaSChristoph Lameter * The threshold scales with the number of processors and the amount 229df9ecabaSChristoph Lameter * of memory per zone. More memory means that we can defer updates for 230df9ecabaSChristoph Lameter * longer, more processors could lead to more contention. 231df9ecabaSChristoph Lameter * fls() is used to have a cheap way of logarithmic scaling. 2322244b95aSChristoph Lameter * 233df9ecabaSChristoph Lameter * Some sample thresholds: 234df9ecabaSChristoph Lameter * 235ea15ba17SMiaohe Lin * Threshold Processors (fls) Zonesize fls(mem)+1 236df9ecabaSChristoph Lameter * ------------------------------------------------------------------ 237df9ecabaSChristoph Lameter * 8 1 1 0.9-1 GB 4 238df9ecabaSChristoph Lameter * 16 2 2 0.9-1 GB 4 239df9ecabaSChristoph Lameter * 20 2 2 1-2 GB 5 240df9ecabaSChristoph Lameter * 24 2 2 2-4 GB 6 241df9ecabaSChristoph Lameter * 28 2 2 4-8 GB 7 242df9ecabaSChristoph Lameter * 32 2 2 8-16 GB 8 243df9ecabaSChristoph Lameter * 4 2 2 <128M 1 244df9ecabaSChristoph Lameter * 30 4 3 2-4 GB 5 245df9ecabaSChristoph Lameter * 48 4 3 8-16 GB 8 246df9ecabaSChristoph Lameter * 32 8 4 1-2 GB 4 247df9ecabaSChristoph Lameter * 32 8 4 0.9-1GB 4 248df9ecabaSChristoph Lameter * 10 16 5 <128M 1 249df9ecabaSChristoph Lameter * 40 16 5 900M 4 250df9ecabaSChristoph Lameter * 70 64 7 2-4 GB 5 251df9ecabaSChristoph Lameter * 84 64 7 4-8 GB 6 252df9ecabaSChristoph Lameter * 108 512 9 4-8 GB 6 253df9ecabaSChristoph Lameter * 125 1024 10 8-16 GB 8 254df9ecabaSChristoph Lameter * 125 1024 10 16-32 GB 9 2552244b95aSChristoph Lameter */ 256df9ecabaSChristoph Lameter 2579705bea5SArun KS mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT); 258df9ecabaSChristoph Lameter 259df9ecabaSChristoph Lameter threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 260df9ecabaSChristoph Lameter 261df9ecabaSChristoph Lameter /* 262df9ecabaSChristoph Lameter * Maximum threshold is 125 263df9ecabaSChristoph Lameter */ 264df9ecabaSChristoph Lameter threshold = min(125, threshold); 265df9ecabaSChristoph Lameter 266df9ecabaSChristoph Lameter return threshold; 267df9ecabaSChristoph Lameter } 268df9ecabaSChristoph Lameter 269df9ecabaSChristoph Lameter /* 270df9ecabaSChristoph Lameter * Refresh the thresholds for each zone. 271df9ecabaSChristoph Lameter */ 272a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void) 2732244b95aSChristoph Lameter { 27475ef7184SMel Gorman struct pglist_data *pgdat; 275df9ecabaSChristoph Lameter struct zone *zone; 276df9ecabaSChristoph Lameter int cpu; 277df9ecabaSChristoph Lameter int threshold; 278df9ecabaSChristoph Lameter 27975ef7184SMel Gorman /* Zero current pgdat thresholds */ 28075ef7184SMel Gorman for_each_online_pgdat(pgdat) { 28175ef7184SMel Gorman for_each_online_cpu(cpu) { 28275ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0; 28375ef7184SMel Gorman } 28475ef7184SMel Gorman } 28575ef7184SMel Gorman 286ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 28775ef7184SMel Gorman struct pglist_data *pgdat = zone->zone_pgdat; 288aa454840SChristoph Lameter unsigned long max_drift, tolerate_drift; 289aa454840SChristoph Lameter 290b44129b3SMel Gorman threshold = calculate_normal_threshold(zone); 291df9ecabaSChristoph Lameter 29275ef7184SMel Gorman for_each_online_cpu(cpu) { 29375ef7184SMel Gorman int pgdat_threshold; 29475ef7184SMel Gorman 29528f836b6SMel Gorman per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold 29699dcc3e5SChristoph Lameter = threshold; 2971d90ca89SKemi Wang 29875ef7184SMel Gorman /* Base nodestat threshold on the largest populated zone. */ 29975ef7184SMel Gorman pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; 30075ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold 30175ef7184SMel Gorman = max(threshold, pgdat_threshold); 30275ef7184SMel Gorman } 30375ef7184SMel Gorman 304aa454840SChristoph Lameter /* 305aa454840SChristoph Lameter * Only set percpu_drift_mark if there is a danger that 306aa454840SChristoph Lameter * NR_FREE_PAGES reports the low watermark is ok when in fact 307aa454840SChristoph Lameter * the min watermark could be breached by an allocation 308aa454840SChristoph Lameter */ 309aa454840SChristoph Lameter tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 310aa454840SChristoph Lameter max_drift = num_online_cpus() * threshold; 311aa454840SChristoph Lameter if (max_drift > tolerate_drift) 312aa454840SChristoph Lameter zone->percpu_drift_mark = high_wmark_pages(zone) + 313aa454840SChristoph Lameter max_drift; 314df9ecabaSChristoph Lameter } 3152244b95aSChristoph Lameter } 3162244b95aSChristoph Lameter 317b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat, 318b44129b3SMel Gorman int (*calculate_pressure)(struct zone *)) 31988f5acf8SMel Gorman { 32088f5acf8SMel Gorman struct zone *zone; 32188f5acf8SMel Gorman int cpu; 32288f5acf8SMel Gorman int threshold; 32388f5acf8SMel Gorman int i; 32488f5acf8SMel Gorman 32588f5acf8SMel Gorman for (i = 0; i < pgdat->nr_zones; i++) { 32688f5acf8SMel Gorman zone = &pgdat->node_zones[i]; 32788f5acf8SMel Gorman if (!zone->percpu_drift_mark) 32888f5acf8SMel Gorman continue; 32988f5acf8SMel Gorman 330b44129b3SMel Gorman threshold = (*calculate_pressure)(zone); 3311d90ca89SKemi Wang for_each_online_cpu(cpu) 33228f836b6SMel Gorman per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold 33388f5acf8SMel Gorman = threshold; 33488f5acf8SMel Gorman } 33588f5acf8SMel Gorman } 33688f5acf8SMel Gorman 3372244b95aSChristoph Lameter /* 338bea04b07SJianyu Zhan * For use when we know that interrupts are disabled, 339bea04b07SJianyu Zhan * or when we know that preemption is disabled and that 340bea04b07SJianyu Zhan * particular counter cannot be updated from interrupt context. 3412244b95aSChristoph Lameter */ 3422244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 3436cdb18adSHeiko Carstens long delta) 3442244b95aSChristoph Lameter { 34528f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 34612938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 3472244b95aSChristoph Lameter long x; 34812938a92SChristoph Lameter long t; 3492244b95aSChristoph Lameter 350c68ed794SIngo Molnar /* 351c68ed794SIngo Molnar * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels, 352c68ed794SIngo Molnar * atomicity is provided by IRQs being disabled -- either explicitly 353c68ed794SIngo Molnar * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables 354c68ed794SIngo Molnar * CPU migrations and preemption potentially corrupts a counter so 355c68ed794SIngo Molnar * disable preemption. 356c68ed794SIngo Molnar */ 357c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 358c68ed794SIngo Molnar preempt_disable(); 359c68ed794SIngo Molnar 36012938a92SChristoph Lameter x = delta + __this_cpu_read(*p); 3612244b95aSChristoph Lameter 36212938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 36312938a92SChristoph Lameter 36440610076SMiaohe Lin if (unlikely(abs(x) > t)) { 3652244b95aSChristoph Lameter zone_page_state_add(x, zone, item); 3662244b95aSChristoph Lameter x = 0; 3672244b95aSChristoph Lameter } 36812938a92SChristoph Lameter __this_cpu_write(*p, x); 369c68ed794SIngo Molnar 370c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 371c68ed794SIngo Molnar preempt_enable(); 3722244b95aSChristoph Lameter } 3732244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state); 3742244b95aSChristoph Lameter 37575ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 37675ef7184SMel Gorman long delta) 37775ef7184SMel Gorman { 37875ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 37975ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 38075ef7184SMel Gorman long x; 38175ef7184SMel Gorman long t; 38275ef7184SMel Gorman 383ea426c2aSRoman Gushchin if (vmstat_item_in_bytes(item)) { 384629484aeSJohannes Weiner /* 385629484aeSJohannes Weiner * Only cgroups use subpage accounting right now; at 386629484aeSJohannes Weiner * the global level, these items still change in 387629484aeSJohannes Weiner * multiples of whole pages. Store them as pages 388629484aeSJohannes Weiner * internally to keep the per-cpu counters compact. 389629484aeSJohannes Weiner */ 390ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 391ea426c2aSRoman Gushchin delta >>= PAGE_SHIFT; 392ea426c2aSRoman Gushchin } 393ea426c2aSRoman Gushchin 394c68ed794SIngo Molnar /* See __mod_node_page_state */ 395c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 396c68ed794SIngo Molnar preempt_disable(); 397c68ed794SIngo Molnar 39875ef7184SMel Gorman x = delta + __this_cpu_read(*p); 39975ef7184SMel Gorman 40075ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 40175ef7184SMel Gorman 40240610076SMiaohe Lin if (unlikely(abs(x) > t)) { 40375ef7184SMel Gorman node_page_state_add(x, pgdat, item); 40475ef7184SMel Gorman x = 0; 40575ef7184SMel Gorman } 40675ef7184SMel Gorman __this_cpu_write(*p, x); 407c68ed794SIngo Molnar 408c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 409c68ed794SIngo Molnar preempt_enable(); 41075ef7184SMel Gorman } 41175ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state); 41275ef7184SMel Gorman 4132244b95aSChristoph Lameter /* 4142244b95aSChristoph Lameter * Optimized increment and decrement functions. 4152244b95aSChristoph Lameter * 4162244b95aSChristoph Lameter * These are only for a single page and therefore can take a struct page * 4172244b95aSChristoph Lameter * argument instead of struct zone *. This allows the inclusion of the code 4182244b95aSChristoph Lameter * generated for page_zone(page) into the optimized functions. 4192244b95aSChristoph Lameter * 4202244b95aSChristoph Lameter * No overflow check is necessary and therefore the differential can be 4212244b95aSChristoph Lameter * incremented or decremented in place which may allow the compilers to 4222244b95aSChristoph Lameter * generate better code. 4232244b95aSChristoph Lameter * The increment or decrement is known and therefore one boundary check can 4242244b95aSChristoph Lameter * be omitted. 4252244b95aSChristoph Lameter * 426df9ecabaSChristoph Lameter * NOTE: These functions are very performance sensitive. Change only 427df9ecabaSChristoph Lameter * with care. 428df9ecabaSChristoph Lameter * 4292244b95aSChristoph Lameter * Some processors have inc/dec instructions that are atomic vs an interrupt. 4302244b95aSChristoph Lameter * However, the code must first determine the differential location in a zone 4312244b95aSChristoph Lameter * based on the processor number and then inc/dec the counter. There is no 4322244b95aSChristoph Lameter * guarantee without disabling preemption that the processor will not change 4332244b95aSChristoph Lameter * in between and therefore the atomicity vs. interrupt cannot be exploited 4342244b95aSChristoph Lameter * in a useful way here. 4352244b95aSChristoph Lameter */ 436c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 4372244b95aSChristoph Lameter { 43828f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 43912938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 44012938a92SChristoph Lameter s8 v, t; 4412244b95aSChristoph Lameter 442c68ed794SIngo Molnar /* See __mod_node_page_state */ 443c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 444c68ed794SIngo Molnar preempt_disable(); 445c68ed794SIngo Molnar 446908ee0f1SChristoph Lameter v = __this_cpu_inc_return(*p); 44712938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 44812938a92SChristoph Lameter if (unlikely(v > t)) { 44912938a92SChristoph Lameter s8 overstep = t >> 1; 4502244b95aSChristoph Lameter 45112938a92SChristoph Lameter zone_page_state_add(v + overstep, zone, item); 45212938a92SChristoph Lameter __this_cpu_write(*p, -overstep); 4532244b95aSChristoph Lameter } 454c68ed794SIngo Molnar 455c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 456c68ed794SIngo Molnar preempt_enable(); 4572244b95aSChristoph Lameter } 458ca889e6cSChristoph Lameter 45975ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 46075ef7184SMel Gorman { 46175ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 46275ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 46375ef7184SMel Gorman s8 v, t; 46475ef7184SMel Gorman 465ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 466ea426c2aSRoman Gushchin 467c68ed794SIngo Molnar /* See __mod_node_page_state */ 468c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 469c68ed794SIngo Molnar preempt_disable(); 470c68ed794SIngo Molnar 47175ef7184SMel Gorman v = __this_cpu_inc_return(*p); 47275ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 47375ef7184SMel Gorman if (unlikely(v > t)) { 47475ef7184SMel Gorman s8 overstep = t >> 1; 47575ef7184SMel Gorman 47675ef7184SMel Gorman node_page_state_add(v + overstep, pgdat, item); 47775ef7184SMel Gorman __this_cpu_write(*p, -overstep); 47875ef7184SMel Gorman } 479c68ed794SIngo Molnar 480c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 481c68ed794SIngo Molnar preempt_enable(); 48275ef7184SMel Gorman } 48375ef7184SMel Gorman 484ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 485ca889e6cSChristoph Lameter { 486ca889e6cSChristoph Lameter __inc_zone_state(page_zone(page), item); 487ca889e6cSChristoph Lameter } 4882244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state); 4892244b95aSChristoph Lameter 49075ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item) 49175ef7184SMel Gorman { 49275ef7184SMel Gorman __inc_node_state(page_pgdat(page), item); 49375ef7184SMel Gorman } 49475ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state); 49575ef7184SMel Gorman 496c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 4972244b95aSChristoph Lameter { 49828f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 49912938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 50012938a92SChristoph Lameter s8 v, t; 5012244b95aSChristoph Lameter 502c68ed794SIngo Molnar /* See __mod_node_page_state */ 503c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 504c68ed794SIngo Molnar preempt_disable(); 505c68ed794SIngo Molnar 506908ee0f1SChristoph Lameter v = __this_cpu_dec_return(*p); 50712938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 50812938a92SChristoph Lameter if (unlikely(v < - t)) { 50912938a92SChristoph Lameter s8 overstep = t >> 1; 5102244b95aSChristoph Lameter 51112938a92SChristoph Lameter zone_page_state_add(v - overstep, zone, item); 51212938a92SChristoph Lameter __this_cpu_write(*p, overstep); 5132244b95aSChristoph Lameter } 514c68ed794SIngo Molnar 515c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 516c68ed794SIngo Molnar preempt_enable(); 5172244b95aSChristoph Lameter } 518c8785385SChristoph Lameter 51975ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) 52075ef7184SMel Gorman { 52175ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 52275ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 52375ef7184SMel Gorman s8 v, t; 52475ef7184SMel Gorman 525ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 526ea426c2aSRoman Gushchin 527c68ed794SIngo Molnar /* See __mod_node_page_state */ 528c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 529c68ed794SIngo Molnar preempt_disable(); 530c68ed794SIngo Molnar 53175ef7184SMel Gorman v = __this_cpu_dec_return(*p); 53275ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 53375ef7184SMel Gorman if (unlikely(v < - t)) { 53475ef7184SMel Gorman s8 overstep = t >> 1; 53575ef7184SMel Gorman 53675ef7184SMel Gorman node_page_state_add(v - overstep, pgdat, item); 53775ef7184SMel Gorman __this_cpu_write(*p, overstep); 53875ef7184SMel Gorman } 539c68ed794SIngo Molnar 540c68ed794SIngo Molnar if (IS_ENABLED(CONFIG_PREEMPT_RT)) 541c68ed794SIngo Molnar preempt_enable(); 54275ef7184SMel Gorman } 54375ef7184SMel Gorman 544c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 545c8785385SChristoph Lameter { 546c8785385SChristoph Lameter __dec_zone_state(page_zone(page), item); 547c8785385SChristoph Lameter } 5482244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state); 5492244b95aSChristoph Lameter 55075ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item) 55175ef7184SMel Gorman { 55275ef7184SMel Gorman __dec_node_state(page_pgdat(page), item); 55375ef7184SMel Gorman } 55475ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state); 55575ef7184SMel Gorman 5564156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL 5577c839120SChristoph Lameter /* 5587c839120SChristoph Lameter * If we have cmpxchg_local support then we do not need to incur the overhead 5597c839120SChristoph Lameter * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. 5607c839120SChristoph Lameter * 5617c839120SChristoph Lameter * mod_state() modifies the zone counter state through atomic per cpu 5627c839120SChristoph Lameter * operations. 5637c839120SChristoph Lameter * 5647c839120SChristoph Lameter * Overstep mode specifies how overstep should handled: 5657c839120SChristoph Lameter * 0 No overstepping 5667c839120SChristoph Lameter * 1 Overstepping half of threshold 5677c839120SChristoph Lameter * -1 Overstepping minus half of threshold 5687c839120SChristoph Lameter */ 56975ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone, 57075ef7184SMel Gorman enum zone_stat_item item, long delta, int overstep_mode) 5717c839120SChristoph Lameter { 57228f836b6SMel Gorman struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 5737c839120SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 5747c839120SChristoph Lameter long o, n, t, z; 5757c839120SChristoph Lameter 5767c839120SChristoph Lameter do { 5777c839120SChristoph Lameter z = 0; /* overflow to zone counters */ 5787c839120SChristoph Lameter 5797c839120SChristoph Lameter /* 5807c839120SChristoph Lameter * The fetching of the stat_threshold is racy. We may apply 5817c839120SChristoph Lameter * a counter threshold to the wrong the cpu if we get 582d3bc2367SChristoph Lameter * rescheduled while executing here. However, the next 583d3bc2367SChristoph Lameter * counter update will apply the threshold again and 584d3bc2367SChristoph Lameter * therefore bring the counter under the threshold again. 585d3bc2367SChristoph Lameter * 586d3bc2367SChristoph Lameter * Most of the time the thresholds are the same anyways 587d3bc2367SChristoph Lameter * for all cpus in a zone. 5887c839120SChristoph Lameter */ 5897c839120SChristoph Lameter t = this_cpu_read(pcp->stat_threshold); 5907c839120SChristoph Lameter 5917c839120SChristoph Lameter o = this_cpu_read(*p); 5927c839120SChristoph Lameter n = delta + o; 5937c839120SChristoph Lameter 59440610076SMiaohe Lin if (abs(n) > t) { 5957c839120SChristoph Lameter int os = overstep_mode * (t >> 1) ; 5967c839120SChristoph Lameter 5977c839120SChristoph Lameter /* Overflow must be added to zone counters */ 5987c839120SChristoph Lameter z = n + os; 5997c839120SChristoph Lameter n = -os; 6007c839120SChristoph Lameter } 6017c839120SChristoph Lameter } while (this_cpu_cmpxchg(*p, o, n) != o); 6027c839120SChristoph Lameter 6037c839120SChristoph Lameter if (z) 6047c839120SChristoph Lameter zone_page_state_add(z, zone, item); 6057c839120SChristoph Lameter } 6067c839120SChristoph Lameter 6077c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 6086cdb18adSHeiko Carstens long delta) 6097c839120SChristoph Lameter { 61075ef7184SMel Gorman mod_zone_state(zone, item, delta, 0); 6117c839120SChristoph Lameter } 6127c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 6137c839120SChristoph Lameter 6147c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 6157c839120SChristoph Lameter { 61675ef7184SMel Gorman mod_zone_state(page_zone(page), item, 1, 1); 6177c839120SChristoph Lameter } 6187c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 6197c839120SChristoph Lameter 6207c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 6217c839120SChristoph Lameter { 62275ef7184SMel Gorman mod_zone_state(page_zone(page), item, -1, -1); 6237c839120SChristoph Lameter } 6247c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 62575ef7184SMel Gorman 62675ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat, 62775ef7184SMel Gorman enum node_stat_item item, int delta, int overstep_mode) 62875ef7184SMel Gorman { 62975ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 63075ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 63175ef7184SMel Gorman long o, n, t, z; 63275ef7184SMel Gorman 633ea426c2aSRoman Gushchin if (vmstat_item_in_bytes(item)) { 634629484aeSJohannes Weiner /* 635629484aeSJohannes Weiner * Only cgroups use subpage accounting right now; at 636629484aeSJohannes Weiner * the global level, these items still change in 637629484aeSJohannes Weiner * multiples of whole pages. Store them as pages 638629484aeSJohannes Weiner * internally to keep the per-cpu counters compact. 639629484aeSJohannes Weiner */ 640ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 641ea426c2aSRoman Gushchin delta >>= PAGE_SHIFT; 642ea426c2aSRoman Gushchin } 643ea426c2aSRoman Gushchin 64475ef7184SMel Gorman do { 64575ef7184SMel Gorman z = 0; /* overflow to node counters */ 64675ef7184SMel Gorman 64775ef7184SMel Gorman /* 64875ef7184SMel Gorman * The fetching of the stat_threshold is racy. We may apply 64975ef7184SMel Gorman * a counter threshold to the wrong the cpu if we get 65075ef7184SMel Gorman * rescheduled while executing here. However, the next 65175ef7184SMel Gorman * counter update will apply the threshold again and 65275ef7184SMel Gorman * therefore bring the counter under the threshold again. 65375ef7184SMel Gorman * 65475ef7184SMel Gorman * Most of the time the thresholds are the same anyways 65575ef7184SMel Gorman * for all cpus in a node. 65675ef7184SMel Gorman */ 65775ef7184SMel Gorman t = this_cpu_read(pcp->stat_threshold); 65875ef7184SMel Gorman 65975ef7184SMel Gorman o = this_cpu_read(*p); 66075ef7184SMel Gorman n = delta + o; 66175ef7184SMel Gorman 66240610076SMiaohe Lin if (abs(n) > t) { 66375ef7184SMel Gorman int os = overstep_mode * (t >> 1) ; 66475ef7184SMel Gorman 66575ef7184SMel Gorman /* Overflow must be added to node counters */ 66675ef7184SMel Gorman z = n + os; 66775ef7184SMel Gorman n = -os; 66875ef7184SMel Gorman } 66975ef7184SMel Gorman } while (this_cpu_cmpxchg(*p, o, n) != o); 67075ef7184SMel Gorman 67175ef7184SMel Gorman if (z) 67275ef7184SMel Gorman node_page_state_add(z, pgdat, item); 67375ef7184SMel Gorman } 67475ef7184SMel Gorman 67575ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 67675ef7184SMel Gorman long delta) 67775ef7184SMel Gorman { 67875ef7184SMel Gorman mod_node_state(pgdat, item, delta, 0); 67975ef7184SMel Gorman } 68075ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 68175ef7184SMel Gorman 68275ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 68375ef7184SMel Gorman { 68475ef7184SMel Gorman mod_node_state(pgdat, item, 1, 1); 68575ef7184SMel Gorman } 68675ef7184SMel Gorman 68775ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 68875ef7184SMel Gorman { 68975ef7184SMel Gorman mod_node_state(page_pgdat(page), item, 1, 1); 69075ef7184SMel Gorman } 69175ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 69275ef7184SMel Gorman 69375ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 69475ef7184SMel Gorman { 69575ef7184SMel Gorman mod_node_state(page_pgdat(page), item, -1, -1); 69675ef7184SMel Gorman } 69775ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 6987c839120SChristoph Lameter #else 6997c839120SChristoph Lameter /* 7007c839120SChristoph Lameter * Use interrupt disable to serialize counter updates 7017c839120SChristoph Lameter */ 7027c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 7036cdb18adSHeiko Carstens long delta) 7047c839120SChristoph Lameter { 7057c839120SChristoph Lameter unsigned long flags; 7067c839120SChristoph Lameter 7077c839120SChristoph Lameter local_irq_save(flags); 7087c839120SChristoph Lameter __mod_zone_page_state(zone, item, delta); 7097c839120SChristoph Lameter local_irq_restore(flags); 7107c839120SChristoph Lameter } 7117c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 7127c839120SChristoph Lameter 7132244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 7142244b95aSChristoph Lameter { 7152244b95aSChristoph Lameter unsigned long flags; 7162244b95aSChristoph Lameter struct zone *zone; 7172244b95aSChristoph Lameter 7182244b95aSChristoph Lameter zone = page_zone(page); 7192244b95aSChristoph Lameter local_irq_save(flags); 720ca889e6cSChristoph Lameter __inc_zone_state(zone, item); 7212244b95aSChristoph Lameter local_irq_restore(flags); 7222244b95aSChristoph Lameter } 7232244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 7242244b95aSChristoph Lameter 7252244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 7262244b95aSChristoph Lameter { 7272244b95aSChristoph Lameter unsigned long flags; 7282244b95aSChristoph Lameter 7292244b95aSChristoph Lameter local_irq_save(flags); 730a302eb4eSChristoph Lameter __dec_zone_page_state(page, item); 7312244b95aSChristoph Lameter local_irq_restore(flags); 7322244b95aSChristoph Lameter } 7332244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 7342244b95aSChristoph Lameter 73575ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 73675ef7184SMel Gorman { 73775ef7184SMel Gorman unsigned long flags; 73875ef7184SMel Gorman 73975ef7184SMel Gorman local_irq_save(flags); 74075ef7184SMel Gorman __inc_node_state(pgdat, item); 74175ef7184SMel Gorman local_irq_restore(flags); 74275ef7184SMel Gorman } 74375ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state); 74475ef7184SMel Gorman 74575ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 74675ef7184SMel Gorman long delta) 74775ef7184SMel Gorman { 74875ef7184SMel Gorman unsigned long flags; 74975ef7184SMel Gorman 75075ef7184SMel Gorman local_irq_save(flags); 75175ef7184SMel Gorman __mod_node_page_state(pgdat, item, delta); 75275ef7184SMel Gorman local_irq_restore(flags); 75375ef7184SMel Gorman } 75475ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 75575ef7184SMel Gorman 75675ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 75775ef7184SMel Gorman { 75875ef7184SMel Gorman unsigned long flags; 75975ef7184SMel Gorman struct pglist_data *pgdat; 76075ef7184SMel Gorman 76175ef7184SMel Gorman pgdat = page_pgdat(page); 76275ef7184SMel Gorman local_irq_save(flags); 76375ef7184SMel Gorman __inc_node_state(pgdat, item); 76475ef7184SMel Gorman local_irq_restore(flags); 76575ef7184SMel Gorman } 76675ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 76775ef7184SMel Gorman 76875ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 76975ef7184SMel Gorman { 77075ef7184SMel Gorman unsigned long flags; 77175ef7184SMel Gorman 77275ef7184SMel Gorman local_irq_save(flags); 77375ef7184SMel Gorman __dec_node_page_state(page, item); 77475ef7184SMel Gorman local_irq_restore(flags); 77575ef7184SMel Gorman } 77675ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 77775ef7184SMel Gorman #endif 7787cc36bbdSChristoph Lameter 7797cc36bbdSChristoph Lameter /* 7807cc36bbdSChristoph Lameter * Fold a differential into the global counters. 7817cc36bbdSChristoph Lameter * Returns the number of counters updated. 7827cc36bbdSChristoph Lameter */ 78375ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff) 7844edb0748SChristoph Lameter { 7854edb0748SChristoph Lameter int i; 7867cc36bbdSChristoph Lameter int changes = 0; 7874edb0748SChristoph Lameter 7884edb0748SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 78975ef7184SMel Gorman if (zone_diff[i]) { 79075ef7184SMel Gorman atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 79175ef7184SMel Gorman changes++; 79275ef7184SMel Gorman } 79375ef7184SMel Gorman 79475ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 79575ef7184SMel Gorman if (node_diff[i]) { 79675ef7184SMel Gorman atomic_long_add(node_diff[i], &vm_node_stat[i]); 7977cc36bbdSChristoph Lameter changes++; 7987cc36bbdSChristoph Lameter } 7997cc36bbdSChristoph Lameter return changes; 8004edb0748SChristoph Lameter } 801f19298b9SMel Gorman 8022244b95aSChristoph Lameter /* 8032bb921e5SChristoph Lameter * Update the zone counters for the current cpu. 804a7f75e25SChristoph Lameter * 8054037d452SChristoph Lameter * Note that refresh_cpu_vm_stats strives to only access 8064037d452SChristoph Lameter * node local memory. The per cpu pagesets on remote zones are placed 8074037d452SChristoph Lameter * in the memory local to the processor using that pageset. So the 8084037d452SChristoph Lameter * loop over all zones will access a series of cachelines local to 8094037d452SChristoph Lameter * the processor. 8104037d452SChristoph Lameter * 8114037d452SChristoph Lameter * The call to zone_page_state_add updates the cachelines with the 8124037d452SChristoph Lameter * statistics in the remote zone struct as well as the global cachelines 8134037d452SChristoph Lameter * with the global counters. These could cause remote node cache line 8144037d452SChristoph Lameter * bouncing and will have to be only done when necessary. 8157cc36bbdSChristoph Lameter * 8167cc36bbdSChristoph Lameter * The function returns the number of global counters updated. 8172244b95aSChristoph Lameter */ 8180eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets) 8192244b95aSChristoph Lameter { 82075ef7184SMel Gorman struct pglist_data *pgdat; 8212244b95aSChristoph Lameter struct zone *zone; 8222244b95aSChristoph Lameter int i; 82375ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 82475ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 8257cc36bbdSChristoph Lameter int changes = 0; 8262244b95aSChristoph Lameter 827ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 82828f836b6SMel Gorman struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; 82928f836b6SMel Gorman #ifdef CONFIG_NUMA 83028f836b6SMel Gorman struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset; 83128f836b6SMel Gorman #endif 8322244b95aSChristoph Lameter 833fbc2edb0SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 834a7f75e25SChristoph Lameter int v; 835a7f75e25SChristoph Lameter 83628f836b6SMel Gorman v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0); 837fbc2edb0SChristoph Lameter if (v) { 838fbc2edb0SChristoph Lameter 839a7f75e25SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 84075ef7184SMel Gorman global_zone_diff[i] += v; 8414037d452SChristoph Lameter #ifdef CONFIG_NUMA 8424037d452SChristoph Lameter /* 3 seconds idle till flush */ 84328f836b6SMel Gorman __this_cpu_write(pcp->expire, 3); 8444037d452SChristoph Lameter #endif 8452244b95aSChristoph Lameter } 846fbc2edb0SChristoph Lameter } 8474037d452SChristoph Lameter #ifdef CONFIG_NUMA 8483a321d2aSKemi Wang 8490eb77e98SChristoph Lameter if (do_pagesets) { 8500eb77e98SChristoph Lameter cond_resched(); 8514037d452SChristoph Lameter /* 8524037d452SChristoph Lameter * Deal with draining the remote pageset of this 8534037d452SChristoph Lameter * processor 8544037d452SChristoph Lameter * 8554037d452SChristoph Lameter * Check if there are pages remaining in this pageset 8564037d452SChristoph Lameter * if not then there is nothing to expire. 8574037d452SChristoph Lameter */ 85828f836b6SMel Gorman if (!__this_cpu_read(pcp->expire) || 85928f836b6SMel Gorman !__this_cpu_read(pcp->count)) 8604037d452SChristoph Lameter continue; 8614037d452SChristoph Lameter 8624037d452SChristoph Lameter /* 8634037d452SChristoph Lameter * We never drain zones local to this processor. 8644037d452SChristoph Lameter */ 8654037d452SChristoph Lameter if (zone_to_nid(zone) == numa_node_id()) { 86628f836b6SMel Gorman __this_cpu_write(pcp->expire, 0); 8674037d452SChristoph Lameter continue; 8684037d452SChristoph Lameter } 8694037d452SChristoph Lameter 87028f836b6SMel Gorman if (__this_cpu_dec_return(pcp->expire)) 8714037d452SChristoph Lameter continue; 8724037d452SChristoph Lameter 87328f836b6SMel Gorman if (__this_cpu_read(pcp->count)) { 87428f836b6SMel Gorman drain_zone_pages(zone, this_cpu_ptr(pcp)); 8757cc36bbdSChristoph Lameter changes++; 8767cc36bbdSChristoph Lameter } 8770eb77e98SChristoph Lameter } 8784037d452SChristoph Lameter #endif 8792244b95aSChristoph Lameter } 88075ef7184SMel Gorman 88175ef7184SMel Gorman for_each_online_pgdat(pgdat) { 88275ef7184SMel Gorman struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats; 88375ef7184SMel Gorman 88475ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 88575ef7184SMel Gorman int v; 88675ef7184SMel Gorman 88775ef7184SMel Gorman v = this_cpu_xchg(p->vm_node_stat_diff[i], 0); 88875ef7184SMel Gorman if (v) { 88975ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 89075ef7184SMel Gorman global_node_diff[i] += v; 89175ef7184SMel Gorman } 89275ef7184SMel Gorman } 89375ef7184SMel Gorman } 89475ef7184SMel Gorman 89575ef7184SMel Gorman changes += fold_diff(global_zone_diff, global_node_diff); 8967cc36bbdSChristoph Lameter return changes; 8972244b95aSChristoph Lameter } 8982244b95aSChristoph Lameter 89940f4b1eaSCody P Schafer /* 9002bb921e5SChristoph Lameter * Fold the data for an offline cpu into the global array. 9012bb921e5SChristoph Lameter * There cannot be any access by the offline cpu and therefore 9022bb921e5SChristoph Lameter * synchronization is simplified. 9032bb921e5SChristoph Lameter */ 9042bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu) 9052bb921e5SChristoph Lameter { 90675ef7184SMel Gorman struct pglist_data *pgdat; 9072bb921e5SChristoph Lameter struct zone *zone; 9082bb921e5SChristoph Lameter int i; 90975ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 91075ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 9112bb921e5SChristoph Lameter 9122bb921e5SChristoph Lameter for_each_populated_zone(zone) { 91328f836b6SMel Gorman struct per_cpu_zonestat *pzstats; 9142bb921e5SChristoph Lameter 91528f836b6SMel Gorman pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 9162bb921e5SChristoph Lameter 917f19298b9SMel Gorman for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 91828f836b6SMel Gorman if (pzstats->vm_stat_diff[i]) { 9192bb921e5SChristoph Lameter int v; 9202bb921e5SChristoph Lameter 92128f836b6SMel Gorman v = pzstats->vm_stat_diff[i]; 92228f836b6SMel Gorman pzstats->vm_stat_diff[i] = 0; 9232bb921e5SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 92475ef7184SMel Gorman global_zone_diff[i] += v; 9252bb921e5SChristoph Lameter } 926f19298b9SMel Gorman } 9273a321d2aSKemi Wang #ifdef CONFIG_NUMA 928f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { 929f19298b9SMel Gorman if (pzstats->vm_numa_event[i]) { 930f19298b9SMel Gorman unsigned long v; 9313a321d2aSKemi Wang 932f19298b9SMel Gorman v = pzstats->vm_numa_event[i]; 933f19298b9SMel Gorman pzstats->vm_numa_event[i] = 0; 934f19298b9SMel Gorman zone_numa_event_add(v, zone, i); 935f19298b9SMel Gorman } 9363a321d2aSKemi Wang } 9373a321d2aSKemi Wang #endif 9382bb921e5SChristoph Lameter } 9392bb921e5SChristoph Lameter 94075ef7184SMel Gorman for_each_online_pgdat(pgdat) { 94175ef7184SMel Gorman struct per_cpu_nodestat *p; 94275ef7184SMel Gorman 94375ef7184SMel Gorman p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); 94475ef7184SMel Gorman 94575ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 94675ef7184SMel Gorman if (p->vm_node_stat_diff[i]) { 94775ef7184SMel Gorman int v; 94875ef7184SMel Gorman 94975ef7184SMel Gorman v = p->vm_node_stat_diff[i]; 95075ef7184SMel Gorman p->vm_node_stat_diff[i] = 0; 95175ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 95275ef7184SMel Gorman global_node_diff[i] += v; 95375ef7184SMel Gorman } 95475ef7184SMel Gorman } 95575ef7184SMel Gorman 95675ef7184SMel Gorman fold_diff(global_zone_diff, global_node_diff); 9572bb921e5SChristoph Lameter } 9582bb921e5SChristoph Lameter 9592bb921e5SChristoph Lameter /* 96040f4b1eaSCody P Schafer * this is only called if !populated_zone(zone), which implies no other users of 961f0953a1bSIngo Molnar * pset->vm_stat_diff[] exist. 96240f4b1eaSCody P Schafer */ 96328f836b6SMel Gorman void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) 9645a883813SMinchan Kim { 965f19298b9SMel Gorman unsigned long v; 9665a883813SMinchan Kim int i; 9675a883813SMinchan Kim 968f19298b9SMel Gorman for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 96928f836b6SMel Gorman if (pzstats->vm_stat_diff[i]) { 970f19298b9SMel Gorman v = pzstats->vm_stat_diff[i]; 97128f836b6SMel Gorman pzstats->vm_stat_diff[i] = 0; 972f19298b9SMel Gorman zone_page_state_add(v, zone, i); 973f19298b9SMel Gorman } 9745a883813SMinchan Kim } 9753a321d2aSKemi Wang 9763a321d2aSKemi Wang #ifdef CONFIG_NUMA 977f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { 978f19298b9SMel Gorman if (pzstats->vm_numa_event[i]) { 979f19298b9SMel Gorman v = pzstats->vm_numa_event[i]; 980f19298b9SMel Gorman pzstats->vm_numa_event[i] = 0; 981f19298b9SMel Gorman zone_numa_event_add(v, zone, i); 982f19298b9SMel Gorman } 9833a321d2aSKemi Wang } 9843a321d2aSKemi Wang #endif 9855a883813SMinchan Kim } 9862244b95aSChristoph Lameter #endif 9872244b95aSChristoph Lameter 988ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA 989ca889e6cSChristoph Lameter /* 99075ef7184SMel Gorman * Determine the per node value of a stat item. This function 99175ef7184SMel Gorman * is called frequently in a NUMA machine, so try to be as 99275ef7184SMel Gorman * frugal as possible. 993c2d42c16SAndrew Morton */ 99475ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node, 99575ef7184SMel Gorman enum zone_stat_item item) 996c2d42c16SAndrew Morton { 997c2d42c16SAndrew Morton struct zone *zones = NODE_DATA(node)->node_zones; 998e87d59f7SJoonsoo Kim int i; 999e87d59f7SJoonsoo Kim unsigned long count = 0; 1000c2d42c16SAndrew Morton 1001e87d59f7SJoonsoo Kim for (i = 0; i < MAX_NR_ZONES; i++) 1002e87d59f7SJoonsoo Kim count += zone_page_state(zones + i, item); 1003e87d59f7SJoonsoo Kim 1004e87d59f7SJoonsoo Kim return count; 1005c2d42c16SAndrew Morton } 1006c2d42c16SAndrew Morton 1007f19298b9SMel Gorman /* Determine the per node value of a numa stat item. */ 1008f19298b9SMel Gorman unsigned long sum_zone_numa_event_state(int node, 10093a321d2aSKemi Wang enum numa_stat_item item) 10103a321d2aSKemi Wang { 10113a321d2aSKemi Wang struct zone *zones = NODE_DATA(node)->node_zones; 10123a321d2aSKemi Wang unsigned long count = 0; 1013f19298b9SMel Gorman int i; 10143a321d2aSKemi Wang 10153a321d2aSKemi Wang for (i = 0; i < MAX_NR_ZONES; i++) 1016f19298b9SMel Gorman count += zone_numa_event_state(zones + i, item); 10173a321d2aSKemi Wang 10183a321d2aSKemi Wang return count; 10193a321d2aSKemi Wang } 10203a321d2aSKemi Wang 102175ef7184SMel Gorman /* 102275ef7184SMel Gorman * Determine the per node value of a stat item. 102375ef7184SMel Gorman */ 1024ea426c2aSRoman Gushchin unsigned long node_page_state_pages(struct pglist_data *pgdat, 102575ef7184SMel Gorman enum node_stat_item item) 102675ef7184SMel Gorman { 102775ef7184SMel Gorman long x = atomic_long_read(&pgdat->vm_stat[item]); 102875ef7184SMel Gorman #ifdef CONFIG_SMP 102975ef7184SMel Gorman if (x < 0) 103075ef7184SMel Gorman x = 0; 103175ef7184SMel Gorman #endif 103275ef7184SMel Gorman return x; 103375ef7184SMel Gorman } 1034ea426c2aSRoman Gushchin 1035ea426c2aSRoman Gushchin unsigned long node_page_state(struct pglist_data *pgdat, 1036ea426c2aSRoman Gushchin enum node_stat_item item) 1037ea426c2aSRoman Gushchin { 1038ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 1039ea426c2aSRoman Gushchin 1040ea426c2aSRoman Gushchin return node_page_state_pages(pgdat, item); 1041ea426c2aSRoman Gushchin } 1042ca889e6cSChristoph Lameter #endif 1043ca889e6cSChristoph Lameter 1044d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION 104536deb0beSNamhyung Kim 1046d7a5752cSMel Gorman struct contig_page_info { 1047d7a5752cSMel Gorman unsigned long free_pages; 1048d7a5752cSMel Gorman unsigned long free_blocks_total; 1049d7a5752cSMel Gorman unsigned long free_blocks_suitable; 1050d7a5752cSMel Gorman }; 1051d7a5752cSMel Gorman 1052d7a5752cSMel Gorman /* 1053d7a5752cSMel Gorman * Calculate the number of free pages in a zone, how many contiguous 1054d7a5752cSMel Gorman * pages are free and how many are large enough to satisfy an allocation of 1055d7a5752cSMel Gorman * the target size. Note that this function makes no attempt to estimate 1056d7a5752cSMel Gorman * how many suitable free blocks there *might* be if MOVABLE pages were 1057d7a5752cSMel Gorman * migrated. Calculating that is possible, but expensive and can be 1058d7a5752cSMel Gorman * figured out from userspace 1059d7a5752cSMel Gorman */ 1060d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone, 1061d7a5752cSMel Gorman unsigned int suitable_order, 1062d7a5752cSMel Gorman struct contig_page_info *info) 1063d7a5752cSMel Gorman { 1064d7a5752cSMel Gorman unsigned int order; 1065d7a5752cSMel Gorman 1066d7a5752cSMel Gorman info->free_pages = 0; 1067d7a5752cSMel Gorman info->free_blocks_total = 0; 1068d7a5752cSMel Gorman info->free_blocks_suitable = 0; 1069d7a5752cSMel Gorman 1070d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; order++) { 1071d7a5752cSMel Gorman unsigned long blocks; 1072d7a5752cSMel Gorman 1073af1c31acSLiu Shixin /* 1074af1c31acSLiu Shixin * Count number of free blocks. 1075af1c31acSLiu Shixin * 1076af1c31acSLiu Shixin * Access to nr_free is lockless as nr_free is used only for 1077af1c31acSLiu Shixin * diagnostic purposes. Use data_race to avoid KCSAN warning. 1078af1c31acSLiu Shixin */ 1079af1c31acSLiu Shixin blocks = data_race(zone->free_area[order].nr_free); 1080d7a5752cSMel Gorman info->free_blocks_total += blocks; 1081d7a5752cSMel Gorman 1082d7a5752cSMel Gorman /* Count free base pages */ 1083d7a5752cSMel Gorman info->free_pages += blocks << order; 1084d7a5752cSMel Gorman 1085d7a5752cSMel Gorman /* Count the suitable free blocks */ 1086d7a5752cSMel Gorman if (order >= suitable_order) 1087d7a5752cSMel Gorman info->free_blocks_suitable += blocks << 1088d7a5752cSMel Gorman (order - suitable_order); 1089d7a5752cSMel Gorman } 1090d7a5752cSMel Gorman } 1091f1a5ab12SMel Gorman 1092f1a5ab12SMel Gorman /* 1093f1a5ab12SMel Gorman * A fragmentation index only makes sense if an allocation of a requested 1094f1a5ab12SMel Gorman * size would fail. If that is true, the fragmentation index indicates 1095f1a5ab12SMel Gorman * whether external fragmentation or a lack of memory was the problem. 1096f1a5ab12SMel Gorman * The value can be used to determine if page reclaim or compaction 1097f1a5ab12SMel Gorman * should be used 1098f1a5ab12SMel Gorman */ 109956de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 1100f1a5ab12SMel Gorman { 1101f1a5ab12SMel Gorman unsigned long requested = 1UL << order; 1102f1a5ab12SMel Gorman 110388d6ac40SWen Yang if (WARN_ON_ONCE(order >= MAX_ORDER)) 110488d6ac40SWen Yang return 0; 110588d6ac40SWen Yang 1106f1a5ab12SMel Gorman if (!info->free_blocks_total) 1107f1a5ab12SMel Gorman return 0; 1108f1a5ab12SMel Gorman 1109f1a5ab12SMel Gorman /* Fragmentation index only makes sense when a request would fail */ 1110f1a5ab12SMel Gorman if (info->free_blocks_suitable) 1111f1a5ab12SMel Gorman return -1000; 1112f1a5ab12SMel Gorman 1113f1a5ab12SMel Gorman /* 1114f1a5ab12SMel Gorman * Index is between 0 and 1 so return within 3 decimal places 1115f1a5ab12SMel Gorman * 1116f1a5ab12SMel Gorman * 0 => allocation would fail due to lack of memory 1117f1a5ab12SMel Gorman * 1 => allocation would fail due to fragmentation 1118f1a5ab12SMel Gorman */ 1119f1a5ab12SMel Gorman return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 1120f1a5ab12SMel Gorman } 112156de7263SMel Gorman 1122facdaa91SNitin Gupta /* 1123facdaa91SNitin Gupta * Calculates external fragmentation within a zone wrt the given order. 1124facdaa91SNitin Gupta * It is defined as the percentage of pages found in blocks of size 1125facdaa91SNitin Gupta * less than 1 << order. It returns values in range [0, 100]. 1126facdaa91SNitin Gupta */ 1127d34c0a75SNitin Gupta unsigned int extfrag_for_order(struct zone *zone, unsigned int order) 1128facdaa91SNitin Gupta { 1129facdaa91SNitin Gupta struct contig_page_info info; 1130facdaa91SNitin Gupta 1131facdaa91SNitin Gupta fill_contig_page_info(zone, order, &info); 1132facdaa91SNitin Gupta if (info.free_pages == 0) 1133facdaa91SNitin Gupta return 0; 1134facdaa91SNitin Gupta 1135facdaa91SNitin Gupta return div_u64((info.free_pages - 1136facdaa91SNitin Gupta (info.free_blocks_suitable << order)) * 100, 1137facdaa91SNitin Gupta info.free_pages); 1138facdaa91SNitin Gupta } 1139facdaa91SNitin Gupta 114056de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */ 114156de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order) 114256de7263SMel Gorman { 114356de7263SMel Gorman struct contig_page_info info; 114456de7263SMel Gorman 114556de7263SMel Gorman fill_contig_page_info(zone, order, &info); 114656de7263SMel Gorman return __fragmentation_index(order, &info); 114756de7263SMel Gorman } 1148d7a5752cSMel Gorman #endif 1149d7a5752cSMel Gorman 1150ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \ 1151ebc5d83dSKonstantin Khlebnikov defined(CONFIG_NUMA) || defined(CONFIG_MEMCG) 1152fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA 1153fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma", 1154fa25c503SKOSAKI Motohiro #else 1155fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) 1156fa25c503SKOSAKI Motohiro #endif 1157fa25c503SKOSAKI Motohiro 1158fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32 1159fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32", 1160fa25c503SKOSAKI Motohiro #else 1161fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) 1162fa25c503SKOSAKI Motohiro #endif 1163fa25c503SKOSAKI Motohiro 1164fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM 1165fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high", 1166fa25c503SKOSAKI Motohiro #else 1167fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) 1168fa25c503SKOSAKI Motohiro #endif 1169fa25c503SKOSAKI Motohiro 1170fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 1171fa25c503SKOSAKI Motohiro TEXT_FOR_HIGHMEM(xx) xx "_movable", 1172fa25c503SKOSAKI Motohiro 1173fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = { 11748d92890bSNeilBrown /* enum zone_stat_item counters */ 1175fa25c503SKOSAKI Motohiro "nr_free_pages", 117671c799f4SMinchan Kim "nr_zone_inactive_anon", 117771c799f4SMinchan Kim "nr_zone_active_anon", 117871c799f4SMinchan Kim "nr_zone_inactive_file", 117971c799f4SMinchan Kim "nr_zone_active_file", 118071c799f4SMinchan Kim "nr_zone_unevictable", 11815a1c84b4SMel Gorman "nr_zone_write_pending", 1182fa25c503SKOSAKI Motohiro "nr_mlock", 1183fa25c503SKOSAKI Motohiro "nr_bounce", 118491537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC) 118591537feeSMinchan Kim "nr_zspages", 118691537feeSMinchan Kim #endif 11873a321d2aSKemi Wang "nr_free_cma", 11883a321d2aSKemi Wang 11893a321d2aSKemi Wang /* enum numa_stat_item counters */ 1190fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1191fa25c503SKOSAKI Motohiro "numa_hit", 1192fa25c503SKOSAKI Motohiro "numa_miss", 1193fa25c503SKOSAKI Motohiro "numa_foreign", 1194fa25c503SKOSAKI Motohiro "numa_interleave", 1195fa25c503SKOSAKI Motohiro "numa_local", 1196fa25c503SKOSAKI Motohiro "numa_other", 1197fa25c503SKOSAKI Motohiro #endif 119809316c09SKonstantin Khlebnikov 11999d7ea9a2SKonstantin Khlebnikov /* enum node_stat_item counters */ 1200599d0c95SMel Gorman "nr_inactive_anon", 1201599d0c95SMel Gorman "nr_active_anon", 1202599d0c95SMel Gorman "nr_inactive_file", 1203599d0c95SMel Gorman "nr_active_file", 1204599d0c95SMel Gorman "nr_unevictable", 1205385386cfSJohannes Weiner "nr_slab_reclaimable", 1206385386cfSJohannes Weiner "nr_slab_unreclaimable", 1207599d0c95SMel Gorman "nr_isolated_anon", 1208599d0c95SMel Gorman "nr_isolated_file", 120968d48e6aSJohannes Weiner "workingset_nodes", 1210170b04b7SJoonsoo Kim "workingset_refault_anon", 1211170b04b7SJoonsoo Kim "workingset_refault_file", 1212170b04b7SJoonsoo Kim "workingset_activate_anon", 1213170b04b7SJoonsoo Kim "workingset_activate_file", 1214170b04b7SJoonsoo Kim "workingset_restore_anon", 1215170b04b7SJoonsoo Kim "workingset_restore_file", 12161e6b1085SMel Gorman "workingset_nodereclaim", 121750658e2eSMel Gorman "nr_anon_pages", 121850658e2eSMel Gorman "nr_mapped", 121911fb9989SMel Gorman "nr_file_pages", 122011fb9989SMel Gorman "nr_dirty", 122111fb9989SMel Gorman "nr_writeback", 122211fb9989SMel Gorman "nr_writeback_temp", 122311fb9989SMel Gorman "nr_shmem", 122411fb9989SMel Gorman "nr_shmem_hugepages", 122511fb9989SMel Gorman "nr_shmem_pmdmapped", 122660fbf0abSSong Liu "nr_file_hugepages", 122760fbf0abSSong Liu "nr_file_pmdmapped", 122811fb9989SMel Gorman "nr_anon_transparent_hugepages", 1229c4a25635SMel Gorman "nr_vmscan_write", 1230c4a25635SMel Gorman "nr_vmscan_immediate_reclaim", 1231c4a25635SMel Gorman "nr_dirtied", 1232c4a25635SMel Gorman "nr_written", 12338cd7c588SMel Gorman "nr_throttled_written", 1234b29940c1SVlastimil Babka "nr_kernel_misc_reclaimable", 12351970dc6fSJohn Hubbard "nr_foll_pin_acquired", 12361970dc6fSJohn Hubbard "nr_foll_pin_released", 1237991e7673SShakeel Butt "nr_kernel_stack", 1238991e7673SShakeel Butt #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 1239991e7673SShakeel Butt "nr_shadow_call_stack", 1240991e7673SShakeel Butt #endif 1241f0c0c115SShakeel Butt "nr_page_table_pages", 1242b6038942SShakeel Butt #ifdef CONFIG_SWAP 1243b6038942SShakeel Butt "nr_swapcached", 1244b6038942SShakeel Butt #endif 1245599d0c95SMel Gorman 124609316c09SKonstantin Khlebnikov /* enum writeback_stat_item counters */ 1247fa25c503SKOSAKI Motohiro "nr_dirty_threshold", 1248fa25c503SKOSAKI Motohiro "nr_dirty_background_threshold", 1249fa25c503SKOSAKI Motohiro 1250ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) 125109316c09SKonstantin Khlebnikov /* enum vm_event_item counters */ 1252fa25c503SKOSAKI Motohiro "pgpgin", 1253fa25c503SKOSAKI Motohiro "pgpgout", 1254fa25c503SKOSAKI Motohiro "pswpin", 1255fa25c503SKOSAKI Motohiro "pswpout", 1256fa25c503SKOSAKI Motohiro 1257fa25c503SKOSAKI Motohiro TEXTS_FOR_ZONES("pgalloc") 12587cc30fcfSMel Gorman TEXTS_FOR_ZONES("allocstall") 12597cc30fcfSMel Gorman TEXTS_FOR_ZONES("pgskip") 1260fa25c503SKOSAKI Motohiro 1261fa25c503SKOSAKI Motohiro "pgfree", 1262fa25c503SKOSAKI Motohiro "pgactivate", 1263fa25c503SKOSAKI Motohiro "pgdeactivate", 1264f7ad2a6cSShaohua Li "pglazyfree", 1265fa25c503SKOSAKI Motohiro 1266fa25c503SKOSAKI Motohiro "pgfault", 1267fa25c503SKOSAKI Motohiro "pgmajfault", 1268854e9ed0SMinchan Kim "pglazyfreed", 1269fa25c503SKOSAKI Motohiro 1270599d0c95SMel Gorman "pgrefill", 1271798a6b87SPeter Xu "pgreuse", 1272599d0c95SMel Gorman "pgsteal_kswapd", 1273599d0c95SMel Gorman "pgsteal_direct", 1274668e4147SYang Shi "pgdemote_kswapd", 1275668e4147SYang Shi "pgdemote_direct", 1276599d0c95SMel Gorman "pgscan_kswapd", 1277599d0c95SMel Gorman "pgscan_direct", 127868243e76SMel Gorman "pgscan_direct_throttle", 1279497a6c1bSJohannes Weiner "pgscan_anon", 1280497a6c1bSJohannes Weiner "pgscan_file", 1281497a6c1bSJohannes Weiner "pgsteal_anon", 1282497a6c1bSJohannes Weiner "pgsteal_file", 1283fa25c503SKOSAKI Motohiro 1284fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1285fa25c503SKOSAKI Motohiro "zone_reclaim_failed", 1286fa25c503SKOSAKI Motohiro #endif 1287fa25c503SKOSAKI Motohiro "pginodesteal", 1288fa25c503SKOSAKI Motohiro "slabs_scanned", 1289fa25c503SKOSAKI Motohiro "kswapd_inodesteal", 1290fa25c503SKOSAKI Motohiro "kswapd_low_wmark_hit_quickly", 1291fa25c503SKOSAKI Motohiro "kswapd_high_wmark_hit_quickly", 1292fa25c503SKOSAKI Motohiro "pageoutrun", 1293fa25c503SKOSAKI Motohiro 1294fa25c503SKOSAKI Motohiro "pgrotated", 1295fa25c503SKOSAKI Motohiro 12965509a5d2SDave Hansen "drop_pagecache", 12975509a5d2SDave Hansen "drop_slab", 12988e675f7aSKonstantin Khlebnikov "oom_kill", 12995509a5d2SDave Hansen 130003c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING 130103c5a6e1SMel Gorman "numa_pte_updates", 130272403b4aSMel Gorman "numa_huge_pte_updates", 130303c5a6e1SMel Gorman "numa_hint_faults", 130403c5a6e1SMel Gorman "numa_hint_faults_local", 130503c5a6e1SMel Gorman "numa_pages_migrated", 130603c5a6e1SMel Gorman #endif 13075647bc29SMel Gorman #ifdef CONFIG_MIGRATION 13085647bc29SMel Gorman "pgmigrate_success", 13095647bc29SMel Gorman "pgmigrate_fail", 13101a5bae25SAnshuman Khandual "thp_migration_success", 13111a5bae25SAnshuman Khandual "thp_migration_fail", 13121a5bae25SAnshuman Khandual "thp_migration_split", 13135647bc29SMel Gorman #endif 1314fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION 1315397487dbSMel Gorman "compact_migrate_scanned", 1316397487dbSMel Gorman "compact_free_scanned", 1317397487dbSMel Gorman "compact_isolated", 1318fa25c503SKOSAKI Motohiro "compact_stall", 1319fa25c503SKOSAKI Motohiro "compact_fail", 1320fa25c503SKOSAKI Motohiro "compact_success", 1321698b1b30SVlastimil Babka "compact_daemon_wake", 13227f354a54SDavid Rientjes "compact_daemon_migrate_scanned", 13237f354a54SDavid Rientjes "compact_daemon_free_scanned", 1324fa25c503SKOSAKI Motohiro #endif 1325fa25c503SKOSAKI Motohiro 1326fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE 1327fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_success", 1328fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_fail", 1329fa25c503SKOSAKI Motohiro #endif 1330bbb26920SMinchan Kim #ifdef CONFIG_CMA 1331bbb26920SMinchan Kim "cma_alloc_success", 1332bbb26920SMinchan Kim "cma_alloc_fail", 1333bbb26920SMinchan Kim #endif 1334fa25c503SKOSAKI Motohiro "unevictable_pgs_culled", 1335fa25c503SKOSAKI Motohiro "unevictable_pgs_scanned", 1336fa25c503SKOSAKI Motohiro "unevictable_pgs_rescued", 1337fa25c503SKOSAKI Motohiro "unevictable_pgs_mlocked", 1338fa25c503SKOSAKI Motohiro "unevictable_pgs_munlocked", 1339fa25c503SKOSAKI Motohiro "unevictable_pgs_cleared", 1340fa25c503SKOSAKI Motohiro "unevictable_pgs_stranded", 1341fa25c503SKOSAKI Motohiro 1342fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1343fa25c503SKOSAKI Motohiro "thp_fault_alloc", 1344fa25c503SKOSAKI Motohiro "thp_fault_fallback", 134585b9f46eSDavid Rientjes "thp_fault_fallback_charge", 1346fa25c503SKOSAKI Motohiro "thp_collapse_alloc", 1347fa25c503SKOSAKI Motohiro "thp_collapse_alloc_failed", 134895ecedcdSKirill A. Shutemov "thp_file_alloc", 1349dcdf11eeSDavid Rientjes "thp_file_fallback", 135085b9f46eSDavid Rientjes "thp_file_fallback_charge", 135195ecedcdSKirill A. Shutemov "thp_file_mapped", 1352122afea9SKirill A. Shutemov "thp_split_page", 1353122afea9SKirill A. Shutemov "thp_split_page_failed", 1354f9719a03SKirill A. Shutemov "thp_deferred_split_page", 1355122afea9SKirill A. Shutemov "thp_split_pmd", 1356*e9ea874aSYang Yang "thp_scan_exceed_none_pte", 1357*e9ea874aSYang Yang "thp_scan_exceed_swap_pte", 1358*e9ea874aSYang Yang "thp_scan_exceed_share_pte", 1359ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 1360ce9311cfSYisheng Xie "thp_split_pud", 1361ce9311cfSYisheng Xie #endif 1362d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc", 1363d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc_failed", 1364225311a4SHuang Ying "thp_swpout", 1365fe490cc0SHuang Ying "thp_swpout_fallback", 1366fa25c503SKOSAKI Motohiro #endif 136709316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON 136809316c09SKonstantin Khlebnikov "balloon_inflate", 136909316c09SKonstantin Khlebnikov "balloon_deflate", 137009316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION 137109316c09SKonstantin Khlebnikov "balloon_migrate", 137209316c09SKonstantin Khlebnikov #endif 137309316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */ 1374ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH 13759824cf97SDave Hansen "nr_tlb_remote_flush", 13769824cf97SDave Hansen "nr_tlb_remote_flush_received", 13779824cf97SDave Hansen "nr_tlb_local_flush_all", 13789824cf97SDave Hansen "nr_tlb_local_flush_one", 1379ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */ 1380fa25c503SKOSAKI Motohiro 13814f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE 13824f115147SDavidlohr Bueso "vmacache_find_calls", 13834f115147SDavidlohr Bueso "vmacache_find_hits", 13844f115147SDavidlohr Bueso #endif 1385cbc65df2SHuang Ying #ifdef CONFIG_SWAP 1386cbc65df2SHuang Ying "swap_ra", 1387cbc65df2SHuang Ying "swap_ra_hit", 1388cbc65df2SHuang Ying #endif 1389575299eaSSaravanan D #ifdef CONFIG_X86 1390575299eaSSaravanan D "direct_map_level2_splits", 1391575299eaSSaravanan D "direct_map_level3_splits", 1392575299eaSSaravanan D #endif 1393ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ 1394fa25c503SKOSAKI Motohiro }; 1395ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ 1396fa25c503SKOSAKI Motohiro 13973c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ 13983c486871SAndrew Morton defined(CONFIG_PROC_FS) 13993c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos) 14003c486871SAndrew Morton { 14013c486871SAndrew Morton pg_data_t *pgdat; 14023c486871SAndrew Morton loff_t node = *pos; 14033c486871SAndrew Morton 14043c486871SAndrew Morton for (pgdat = first_online_pgdat(); 14053c486871SAndrew Morton pgdat && node; 14063c486871SAndrew Morton pgdat = next_online_pgdat(pgdat)) 14073c486871SAndrew Morton --node; 14083c486871SAndrew Morton 14093c486871SAndrew Morton return pgdat; 14103c486871SAndrew Morton } 14113c486871SAndrew Morton 14123c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 14133c486871SAndrew Morton { 14143c486871SAndrew Morton pg_data_t *pgdat = (pg_data_t *)arg; 14153c486871SAndrew Morton 14163c486871SAndrew Morton (*pos)++; 14173c486871SAndrew Morton return next_online_pgdat(pgdat); 14183c486871SAndrew Morton } 14193c486871SAndrew Morton 14203c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg) 14213c486871SAndrew Morton { 14223c486871SAndrew Morton } 14233c486871SAndrew Morton 1424b2bd8598SDavid Rientjes /* 1425b2bd8598SDavid Rientjes * Walk zones in a node and print using a callback. 1426b2bd8598SDavid Rientjes * If @assert_populated is true, only use callback for zones that are populated. 1427b2bd8598SDavid Rientjes */ 14283c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 1429727c080fSVinayak Menon bool assert_populated, bool nolock, 14303c486871SAndrew Morton void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 14313c486871SAndrew Morton { 14323c486871SAndrew Morton struct zone *zone; 14333c486871SAndrew Morton struct zone *node_zones = pgdat->node_zones; 14343c486871SAndrew Morton unsigned long flags; 14353c486871SAndrew Morton 14363c486871SAndrew Morton for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 1437b2bd8598SDavid Rientjes if (assert_populated && !populated_zone(zone)) 14383c486871SAndrew Morton continue; 14393c486871SAndrew Morton 1440727c080fSVinayak Menon if (!nolock) 14413c486871SAndrew Morton spin_lock_irqsave(&zone->lock, flags); 14423c486871SAndrew Morton print(m, pgdat, zone); 1443727c080fSVinayak Menon if (!nolock) 14443c486871SAndrew Morton spin_unlock_irqrestore(&zone->lock, flags); 14453c486871SAndrew Morton } 14463c486871SAndrew Morton } 14473c486871SAndrew Morton #endif 14483c486871SAndrew Morton 1449d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS 1450467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 1451467c996cSMel Gorman struct zone *zone) 1452467c996cSMel Gorman { 1453467c996cSMel Gorman int order; 1454467c996cSMel Gorman 1455f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1456f6ac2354SChristoph Lameter for (order = 0; order < MAX_ORDER; ++order) 1457af1c31acSLiu Shixin /* 1458af1c31acSLiu Shixin * Access to nr_free is lockless as nr_free is used only for 1459af1c31acSLiu Shixin * printing purposes. Use data_race to avoid KCSAN warning. 1460af1c31acSLiu Shixin */ 1461af1c31acSLiu Shixin seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free)); 1462f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1463f6ac2354SChristoph Lameter } 1464467c996cSMel Gorman 1465467c996cSMel Gorman /* 1466467c996cSMel Gorman * This walks the free areas for each zone. 1467467c996cSMel Gorman */ 1468467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg) 1469467c996cSMel Gorman { 1470467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1471727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, frag_show_print); 1472467c996cSMel Gorman return 0; 1473467c996cSMel Gorman } 1474467c996cSMel Gorman 1475467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m, 1476467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1477467c996cSMel Gorman { 1478467c996cSMel Gorman int order, mtype; 1479467c996cSMel Gorman 1480467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 1481467c996cSMel Gorman seq_printf(m, "Node %4d, zone %8s, type %12s ", 1482467c996cSMel Gorman pgdat->node_id, 1483467c996cSMel Gorman zone->name, 1484467c996cSMel Gorman migratetype_names[mtype]); 1485467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 1486467c996cSMel Gorman unsigned long freecount = 0; 1487467c996cSMel Gorman struct free_area *area; 1488467c996cSMel Gorman struct list_head *curr; 148993b3a674SMichal Hocko bool overflow = false; 1490467c996cSMel Gorman 1491467c996cSMel Gorman area = &(zone->free_area[order]); 1492467c996cSMel Gorman 149393b3a674SMichal Hocko list_for_each(curr, &area->free_list[mtype]) { 149493b3a674SMichal Hocko /* 149593b3a674SMichal Hocko * Cap the free_list iteration because it might 149693b3a674SMichal Hocko * be really large and we are under a spinlock 149793b3a674SMichal Hocko * so a long time spent here could trigger a 149893b3a674SMichal Hocko * hard lockup detector. Anyway this is a 149993b3a674SMichal Hocko * debugging tool so knowing there is a handful 150093b3a674SMichal Hocko * of pages of this order should be more than 150193b3a674SMichal Hocko * sufficient. 150293b3a674SMichal Hocko */ 150393b3a674SMichal Hocko if (++freecount >= 100000) { 150493b3a674SMichal Hocko overflow = true; 150593b3a674SMichal Hocko break; 150693b3a674SMichal Hocko } 150793b3a674SMichal Hocko } 150893b3a674SMichal Hocko seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount); 150993b3a674SMichal Hocko spin_unlock_irq(&zone->lock); 151093b3a674SMichal Hocko cond_resched(); 151193b3a674SMichal Hocko spin_lock_irq(&zone->lock); 1512467c996cSMel Gorman } 1513467c996cSMel Gorman seq_putc(m, '\n'); 1514467c996cSMel Gorman } 1515467c996cSMel Gorman } 1516467c996cSMel Gorman 1517467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */ 151833090af9SMiaohe Lin static void pagetypeinfo_showfree(struct seq_file *m, void *arg) 1519467c996cSMel Gorman { 1520467c996cSMel Gorman int order; 1521467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1522467c996cSMel Gorman 1523467c996cSMel Gorman /* Print header */ 1524467c996cSMel Gorman seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 1525467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) 1526467c996cSMel Gorman seq_printf(m, "%6d ", order); 1527467c996cSMel Gorman seq_putc(m, '\n'); 1528467c996cSMel Gorman 1529727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print); 1530467c996cSMel Gorman } 1531467c996cSMel Gorman 1532467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m, 1533467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1534467c996cSMel Gorman { 1535467c996cSMel Gorman int mtype; 1536467c996cSMel Gorman unsigned long pfn; 1537467c996cSMel Gorman unsigned long start_pfn = zone->zone_start_pfn; 1538108bcc96SCody P Schafer unsigned long end_pfn = zone_end_pfn(zone); 1539467c996cSMel Gorman unsigned long count[MIGRATE_TYPES] = { 0, }; 1540467c996cSMel Gorman 1541467c996cSMel Gorman for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 1542467c996cSMel Gorman struct page *page; 1543467c996cSMel Gorman 1544d336e94eSMichal Hocko page = pfn_to_online_page(pfn); 1545d336e94eSMichal Hocko if (!page) 1546467c996cSMel Gorman continue; 1547467c996cSMel Gorman 1548a91c43c7SJoonsoo Kim if (page_zone(page) != zone) 1549a91c43c7SJoonsoo Kim continue; 1550a91c43c7SJoonsoo Kim 1551467c996cSMel Gorman mtype = get_pageblock_migratetype(page); 1552467c996cSMel Gorman 1553e80d6a24SMel Gorman if (mtype < MIGRATE_TYPES) 1554467c996cSMel Gorman count[mtype]++; 1555467c996cSMel Gorman } 1556467c996cSMel Gorman 1557467c996cSMel Gorman /* Print counts */ 1558467c996cSMel Gorman seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1559467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1560467c996cSMel Gorman seq_printf(m, "%12lu ", count[mtype]); 1561467c996cSMel Gorman seq_putc(m, '\n'); 1562467c996cSMel Gorman } 1563467c996cSMel Gorman 1564f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */ 156533090af9SMiaohe Lin static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 1566467c996cSMel Gorman { 1567467c996cSMel Gorman int mtype; 1568467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1569467c996cSMel Gorman 1570467c996cSMel Gorman seq_printf(m, "\n%-23s", "Number of blocks type "); 1571467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1572467c996cSMel Gorman seq_printf(m, "%12s ", migratetype_names[mtype]); 1573467c996cSMel Gorman seq_putc(m, '\n'); 1574727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, 1575727c080fSVinayak Menon pagetypeinfo_showblockcount_print); 1576467c996cSMel Gorman } 1577467c996cSMel Gorman 157848c96a36SJoonsoo Kim /* 157948c96a36SJoonsoo Kim * Print out the number of pageblocks for each migratetype that contain pages 158048c96a36SJoonsoo Kim * of other types. This gives an indication of how well fallbacks are being 158148c96a36SJoonsoo Kim * contained by rmqueue_fallback(). It requires information from PAGE_OWNER 158248c96a36SJoonsoo Kim * to determine what is going on 158348c96a36SJoonsoo Kim */ 158448c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) 158548c96a36SJoonsoo Kim { 158648c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER 158748c96a36SJoonsoo Kim int mtype; 158848c96a36SJoonsoo Kim 15897dd80b8aSVlastimil Babka if (!static_branch_unlikely(&page_owner_inited)) 159048c96a36SJoonsoo Kim return; 159148c96a36SJoonsoo Kim 159248c96a36SJoonsoo Kim drain_all_pages(NULL); 159348c96a36SJoonsoo Kim 159448c96a36SJoonsoo Kim seq_printf(m, "\n%-23s", "Number of mixed blocks "); 159548c96a36SJoonsoo Kim for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 159648c96a36SJoonsoo Kim seq_printf(m, "%12s ", migratetype_names[mtype]); 159748c96a36SJoonsoo Kim seq_putc(m, '\n'); 159848c96a36SJoonsoo Kim 1599727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, true, 1600727c080fSVinayak Menon pagetypeinfo_showmixedcount_print); 160148c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */ 160248c96a36SJoonsoo Kim } 160348c96a36SJoonsoo Kim 1604467c996cSMel Gorman /* 1605467c996cSMel Gorman * This prints out statistics in relation to grouping pages by mobility. 1606467c996cSMel Gorman * It is expensive to collect so do not constantly read the file. 1607467c996cSMel Gorman */ 1608467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg) 1609467c996cSMel Gorman { 1610467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1611467c996cSMel Gorman 161241b25a37SKOSAKI Motohiro /* check memoryless node */ 1613a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 161441b25a37SKOSAKI Motohiro return 0; 161541b25a37SKOSAKI Motohiro 1616467c996cSMel Gorman seq_printf(m, "Page block order: %d\n", pageblock_order); 1617467c996cSMel Gorman seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 1618467c996cSMel Gorman seq_putc(m, '\n'); 1619467c996cSMel Gorman pagetypeinfo_showfree(m, pgdat); 1620467c996cSMel Gorman pagetypeinfo_showblockcount(m, pgdat); 162148c96a36SJoonsoo Kim pagetypeinfo_showmixedcount(m, pgdat); 1622467c996cSMel Gorman 1623f6ac2354SChristoph Lameter return 0; 1624f6ac2354SChristoph Lameter } 1625f6ac2354SChristoph Lameter 16268f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = { 1627f6ac2354SChristoph Lameter .start = frag_start, 1628f6ac2354SChristoph Lameter .next = frag_next, 1629f6ac2354SChristoph Lameter .stop = frag_stop, 1630f6ac2354SChristoph Lameter .show = frag_show, 1631f6ac2354SChristoph Lameter }; 1632f6ac2354SChristoph Lameter 163374e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = { 1634467c996cSMel Gorman .start = frag_start, 1635467c996cSMel Gorman .next = frag_next, 1636467c996cSMel Gorman .stop = frag_stop, 1637467c996cSMel Gorman .show = pagetypeinfo_show, 1638467c996cSMel Gorman }; 1639467c996cSMel Gorman 1640e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) 1641e2ecc8a7SMel Gorman { 1642e2ecc8a7SMel Gorman int zid; 1643e2ecc8a7SMel Gorman 1644e2ecc8a7SMel Gorman for (zid = 0; zid < MAX_NR_ZONES; zid++) { 1645e2ecc8a7SMel Gorman struct zone *compare = &pgdat->node_zones[zid]; 1646e2ecc8a7SMel Gorman 1647e2ecc8a7SMel Gorman if (populated_zone(compare)) 1648e2ecc8a7SMel Gorman return zone == compare; 1649e2ecc8a7SMel Gorman } 1650e2ecc8a7SMel Gorman 1651e2ecc8a7SMel Gorman return false; 1652e2ecc8a7SMel Gorman } 1653e2ecc8a7SMel Gorman 1654467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 1655467c996cSMel Gorman struct zone *zone) 1656f6ac2354SChristoph Lameter { 1657f6ac2354SChristoph Lameter int i; 1658f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 1659e2ecc8a7SMel Gorman if (is_zone_first_populated(pgdat, zone)) { 1660e2ecc8a7SMel Gorman seq_printf(m, "\n per-node stats"); 1661e2ecc8a7SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 166269473e5dSMuchun Song unsigned long pages = node_page_state_pages(pgdat, i); 166369473e5dSMuchun Song 166469473e5dSMuchun Song if (vmstat_item_print_in_thp(i)) 166569473e5dSMuchun Song pages /= HPAGE_PMD_NR; 16669d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", node_stat_name(i), 166769473e5dSMuchun Song pages); 1668e2ecc8a7SMel Gorman } 1669e2ecc8a7SMel Gorman } 1670f6ac2354SChristoph Lameter seq_printf(m, 1671f6ac2354SChristoph Lameter "\n pages free %lu" 1672a6ea8b5bSLiangcai Fan "\n boost %lu" 1673f6ac2354SChristoph Lameter "\n min %lu" 1674f6ac2354SChristoph Lameter "\n low %lu" 1675f6ac2354SChristoph Lameter "\n high %lu" 1676f6ac2354SChristoph Lameter "\n spanned %lu" 16779feedc9dSJiang Liu "\n present %lu" 16783c381db1SDavid Hildenbrand "\n managed %lu" 16793c381db1SDavid Hildenbrand "\n cma %lu", 168088f5acf8SMel Gorman zone_page_state(zone, NR_FREE_PAGES), 1681a6ea8b5bSLiangcai Fan zone->watermark_boost, 168241858966SMel Gorman min_wmark_pages(zone), 168341858966SMel Gorman low_wmark_pages(zone), 168441858966SMel Gorman high_wmark_pages(zone), 1685f6ac2354SChristoph Lameter zone->spanned_pages, 16869feedc9dSJiang Liu zone->present_pages, 16873c381db1SDavid Hildenbrand zone_managed_pages(zone), 16883c381db1SDavid Hildenbrand zone_cma_pages(zone)); 16892244b95aSChristoph Lameter 1690f6ac2354SChristoph Lameter seq_printf(m, 16913484b2deSMel Gorman "\n protection: (%ld", 1692f6ac2354SChristoph Lameter zone->lowmem_reserve[0]); 1693f6ac2354SChristoph Lameter for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 16943484b2deSMel Gorman seq_printf(m, ", %ld", zone->lowmem_reserve[i]); 16957dfb8bf3SDavid Rientjes seq_putc(m, ')'); 16967dfb8bf3SDavid Rientjes 1697a8a4b7aeSBaoquan He /* If unpopulated, no other information is useful */ 1698a8a4b7aeSBaoquan He if (!populated_zone(zone)) { 1699a8a4b7aeSBaoquan He seq_putc(m, '\n'); 1700a8a4b7aeSBaoquan He return; 1701a8a4b7aeSBaoquan He } 1702a8a4b7aeSBaoquan He 17037dfb8bf3SDavid Rientjes for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 17049d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", zone_stat_name(i), 17057dfb8bf3SDavid Rientjes zone_page_state(zone, i)); 17067dfb8bf3SDavid Rientjes 17073a321d2aSKemi Wang #ifdef CONFIG_NUMA 1708f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) 17099d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", numa_stat_name(i), 1710f19298b9SMel Gorman zone_numa_event_state(zone, i)); 17113a321d2aSKemi Wang #endif 17123a321d2aSKemi Wang 17137dfb8bf3SDavid Rientjes seq_printf(m, "\n pagesets"); 1714f6ac2354SChristoph Lameter for_each_online_cpu(i) { 171528f836b6SMel Gorman struct per_cpu_pages *pcp; 171628f836b6SMel Gorman struct per_cpu_zonestat __maybe_unused *pzstats; 1717f6ac2354SChristoph Lameter 171828f836b6SMel Gorman pcp = per_cpu_ptr(zone->per_cpu_pageset, i); 1719f6ac2354SChristoph Lameter seq_printf(m, 17203dfa5721SChristoph Lameter "\n cpu: %i" 1721f6ac2354SChristoph Lameter "\n count: %i" 1722f6ac2354SChristoph Lameter "\n high: %i" 1723f6ac2354SChristoph Lameter "\n batch: %i", 17243dfa5721SChristoph Lameter i, 172528f836b6SMel Gorman pcp->count, 172628f836b6SMel Gorman pcp->high, 172728f836b6SMel Gorman pcp->batch); 1728df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 172928f836b6SMel Gorman pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); 1730df9ecabaSChristoph Lameter seq_printf(m, "\n vm stats threshold: %d", 173128f836b6SMel Gorman pzstats->stat_threshold); 1732df9ecabaSChristoph Lameter #endif 1733f6ac2354SChristoph Lameter } 1734f6ac2354SChristoph Lameter seq_printf(m, 1735599d0c95SMel Gorman "\n node_unreclaimable: %u" 17363a50d14dSAndrey Ryabinin "\n start_pfn: %lu", 1737c73322d0SJohannes Weiner pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, 17383a50d14dSAndrey Ryabinin zone->zone_start_pfn); 1739f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1740f6ac2354SChristoph Lameter } 1741467c996cSMel Gorman 1742467c996cSMel Gorman /* 1743b2bd8598SDavid Rientjes * Output information about zones in @pgdat. All zones are printed regardless 1744b2bd8598SDavid Rientjes * of whether they are populated or not: lowmem_reserve_ratio operates on the 1745b2bd8598SDavid Rientjes * set of all zones and userspace would not be aware of such zones if they are 1746b2bd8598SDavid Rientjes * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio). 1747467c996cSMel Gorman */ 1748467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg) 1749467c996cSMel Gorman { 1750467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1751727c080fSVinayak Menon walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print); 1752f6ac2354SChristoph Lameter return 0; 1753f6ac2354SChristoph Lameter } 1754f6ac2354SChristoph Lameter 17555c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = { 1756f6ac2354SChristoph Lameter .start = frag_start, /* iterate over all zones. The same as in 1757f6ac2354SChristoph Lameter * fragmentation. */ 1758f6ac2354SChristoph Lameter .next = frag_next, 1759f6ac2354SChristoph Lameter .stop = frag_stop, 1760f6ac2354SChristoph Lameter .show = zoneinfo_show, 1761f6ac2354SChristoph Lameter }; 1762f6ac2354SChristoph Lameter 17639d7ea9a2SKonstantin Khlebnikov #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ 1764f19298b9SMel Gorman NR_VM_NUMA_EVENT_ITEMS + \ 17659d7ea9a2SKonstantin Khlebnikov NR_VM_NODE_STAT_ITEMS + \ 17669d7ea9a2SKonstantin Khlebnikov NR_VM_WRITEBACK_STAT_ITEMS + \ 17679d7ea9a2SKonstantin Khlebnikov (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ 17689d7ea9a2SKonstantin Khlebnikov NR_VM_EVENT_ITEMS : 0)) 176979da826aSMichael Rubin 1770f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos) 1771f6ac2354SChristoph Lameter { 17722244b95aSChristoph Lameter unsigned long *v; 17739d7ea9a2SKonstantin Khlebnikov int i; 1774f6ac2354SChristoph Lameter 17759d7ea9a2SKonstantin Khlebnikov if (*pos >= NR_VMSTAT_ITEMS) 1776f6ac2354SChristoph Lameter return NULL; 1777f6ac2354SChristoph Lameter 17789d7ea9a2SKonstantin Khlebnikov BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); 1779f19298b9SMel Gorman fold_vm_numa_events(); 17809d7ea9a2SKonstantin Khlebnikov v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); 17812244b95aSChristoph Lameter m->private = v; 17822244b95aSChristoph Lameter if (!v) 1783f6ac2354SChristoph Lameter return ERR_PTR(-ENOMEM); 17842244b95aSChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1785c41f012aSMichal Hocko v[i] = global_zone_page_state(i); 178679da826aSMichael Rubin v += NR_VM_ZONE_STAT_ITEMS; 178779da826aSMichael Rubin 17883a321d2aSKemi Wang #ifdef CONFIG_NUMA 1789f19298b9SMel Gorman for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) 1790f19298b9SMel Gorman v[i] = global_numa_event_state(i); 1791f19298b9SMel Gorman v += NR_VM_NUMA_EVENT_ITEMS; 17923a321d2aSKemi Wang #endif 17933a321d2aSKemi Wang 179469473e5dSMuchun Song for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1795ea426c2aSRoman Gushchin v[i] = global_node_page_state_pages(i); 179669473e5dSMuchun Song if (vmstat_item_print_in_thp(i)) 179769473e5dSMuchun Song v[i] /= HPAGE_PMD_NR; 179869473e5dSMuchun Song } 179975ef7184SMel Gorman v += NR_VM_NODE_STAT_ITEMS; 180075ef7184SMel Gorman 180179da826aSMichael Rubin global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, 180279da826aSMichael Rubin v + NR_DIRTY_THRESHOLD); 180379da826aSMichael Rubin v += NR_VM_WRITEBACK_STAT_ITEMS; 180479da826aSMichael Rubin 1805f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 180679da826aSMichael Rubin all_vm_events(v); 180779da826aSMichael Rubin v[PGPGIN] /= 2; /* sectors -> kbytes */ 180879da826aSMichael Rubin v[PGPGOUT] /= 2; 1809f8891e5eSChristoph Lameter #endif 1810ff8b16d7SWu Fengguang return (unsigned long *)m->private + *pos; 1811f6ac2354SChristoph Lameter } 1812f6ac2354SChristoph Lameter 1813f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 1814f6ac2354SChristoph Lameter { 1815f6ac2354SChristoph Lameter (*pos)++; 18169d7ea9a2SKonstantin Khlebnikov if (*pos >= NR_VMSTAT_ITEMS) 1817f6ac2354SChristoph Lameter return NULL; 1818f6ac2354SChristoph Lameter return (unsigned long *)m->private + *pos; 1819f6ac2354SChristoph Lameter } 1820f6ac2354SChristoph Lameter 1821f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg) 1822f6ac2354SChristoph Lameter { 1823f6ac2354SChristoph Lameter unsigned long *l = arg; 1824f6ac2354SChristoph Lameter unsigned long off = l - (unsigned long *)m->private; 182568ba0326SAlexey Dobriyan 182668ba0326SAlexey Dobriyan seq_puts(m, vmstat_text[off]); 182775ba1d07SJoe Perches seq_put_decimal_ull(m, " ", *l); 182868ba0326SAlexey Dobriyan seq_putc(m, '\n'); 18298d92890bSNeilBrown 18308d92890bSNeilBrown if (off == NR_VMSTAT_ITEMS - 1) { 18318d92890bSNeilBrown /* 18328d92890bSNeilBrown * We've come to the end - add any deprecated counters to avoid 18338d92890bSNeilBrown * breaking userspace which might depend on them being present. 18348d92890bSNeilBrown */ 18358d92890bSNeilBrown seq_puts(m, "nr_unstable 0\n"); 18368d92890bSNeilBrown } 1837f6ac2354SChristoph Lameter return 0; 1838f6ac2354SChristoph Lameter } 1839f6ac2354SChristoph Lameter 1840f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg) 1841f6ac2354SChristoph Lameter { 1842f6ac2354SChristoph Lameter kfree(m->private); 1843f6ac2354SChristoph Lameter m->private = NULL; 1844f6ac2354SChristoph Lameter } 1845f6ac2354SChristoph Lameter 1846b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = { 1847f6ac2354SChristoph Lameter .start = vmstat_start, 1848f6ac2354SChristoph Lameter .next = vmstat_next, 1849f6ac2354SChristoph Lameter .stop = vmstat_stop, 1850f6ac2354SChristoph Lameter .show = vmstat_show, 1851f6ac2354SChristoph Lameter }; 1852f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */ 1853f6ac2354SChristoph Lameter 1854df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 1855d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 185677461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ; 1857d1187ed2SChristoph Lameter 185852b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS 185952b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work) 186052b6f46bSHugh Dickins { 186152b6f46bSHugh Dickins refresh_cpu_vm_stats(true); 186252b6f46bSHugh Dickins } 186352b6f46bSHugh Dickins 186452b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write, 186532927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 186652b6f46bSHugh Dickins { 186752b6f46bSHugh Dickins long val; 186852b6f46bSHugh Dickins int err; 186952b6f46bSHugh Dickins int i; 187052b6f46bSHugh Dickins 187152b6f46bSHugh Dickins /* 187252b6f46bSHugh Dickins * The regular update, every sysctl_stat_interval, may come later 187352b6f46bSHugh Dickins * than expected: leaving a significant amount in per_cpu buckets. 187452b6f46bSHugh Dickins * This is particularly misleading when checking a quantity of HUGE 187552b6f46bSHugh Dickins * pages, immediately after running a test. /proc/sys/vm/stat_refresh, 187652b6f46bSHugh Dickins * which can equally be echo'ed to or cat'ted from (by root), 187752b6f46bSHugh Dickins * can be used to update the stats just before reading them. 187852b6f46bSHugh Dickins * 1879c41f012aSMichal Hocko * Oh, and since global_zone_page_state() etc. are so careful to hide 188052b6f46bSHugh Dickins * transiently negative values, report an error here if any of 188152b6f46bSHugh Dickins * the stats is negative, so we know to go looking for imbalance. 188252b6f46bSHugh Dickins */ 188352b6f46bSHugh Dickins err = schedule_on_each_cpu(refresh_vm_stats); 188452b6f46bSHugh Dickins if (err) 188552b6f46bSHugh Dickins return err; 188652b6f46bSHugh Dickins for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 188775083aaeSHugh Dickins /* 188875083aaeSHugh Dickins * Skip checking stats known to go negative occasionally. 188975083aaeSHugh Dickins */ 189075083aaeSHugh Dickins switch (i) { 189175083aaeSHugh Dickins case NR_ZONE_WRITE_PENDING: 189275083aaeSHugh Dickins case NR_FREE_CMA_PAGES: 189375083aaeSHugh Dickins continue; 189475083aaeSHugh Dickins } 189575ef7184SMel Gorman val = atomic_long_read(&vm_zone_stat[i]); 189652b6f46bSHugh Dickins if (val < 0) { 189752b6f46bSHugh Dickins pr_warn("%s: %s %ld\n", 18989d7ea9a2SKonstantin Khlebnikov __func__, zone_stat_name(i), val); 189952b6f46bSHugh Dickins } 190052b6f46bSHugh Dickins } 190176d8cc3cSHugh Dickins for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 190275083aaeSHugh Dickins /* 190375083aaeSHugh Dickins * Skip checking stats known to go negative occasionally. 190475083aaeSHugh Dickins */ 190575083aaeSHugh Dickins switch (i) { 190675083aaeSHugh Dickins case NR_WRITEBACK: 190775083aaeSHugh Dickins continue; 190875083aaeSHugh Dickins } 190976d8cc3cSHugh Dickins val = atomic_long_read(&vm_node_stat[i]); 191076d8cc3cSHugh Dickins if (val < 0) { 191176d8cc3cSHugh Dickins pr_warn("%s: %s %ld\n", 191276d8cc3cSHugh Dickins __func__, node_stat_name(i), val); 191376d8cc3cSHugh Dickins } 191476d8cc3cSHugh Dickins } 191552b6f46bSHugh Dickins if (write) 191652b6f46bSHugh Dickins *ppos += *lenp; 191752b6f46bSHugh Dickins else 191852b6f46bSHugh Dickins *lenp = 0; 191952b6f46bSHugh Dickins return 0; 192052b6f46bSHugh Dickins } 192152b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */ 192252b6f46bSHugh Dickins 1923d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w) 1924d1187ed2SChristoph Lameter { 19250eb77e98SChristoph Lameter if (refresh_cpu_vm_stats(true)) { 19267cc36bbdSChristoph Lameter /* 19277cc36bbdSChristoph Lameter * Counters were updated so we expect more updates 19287cc36bbdSChristoph Lameter * to occur in the future. Keep on running the 19297cc36bbdSChristoph Lameter * update worker thread. 19307cc36bbdSChristoph Lameter */ 1931ce612879SMichal Hocko queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, 1932176bed1dSLinus Torvalds this_cpu_ptr(&vmstat_work), 193398f4ebb2SAnton Blanchard round_jiffies_relative(sysctl_stat_interval)); 1934f01f17d3SMichal Hocko } 1935d1187ed2SChristoph Lameter } 1936d1187ed2SChristoph Lameter 19377cc36bbdSChristoph Lameter /* 19387cc36bbdSChristoph Lameter * Check if the diffs for a certain cpu indicate that 19397cc36bbdSChristoph Lameter * an update is needed. 19407cc36bbdSChristoph Lameter */ 19417cc36bbdSChristoph Lameter static bool need_update(int cpu) 1942d1187ed2SChristoph Lameter { 19432bbd00aeSJohannes Weiner pg_data_t *last_pgdat = NULL; 19447cc36bbdSChristoph Lameter struct zone *zone; 1945d1187ed2SChristoph Lameter 19467cc36bbdSChristoph Lameter for_each_populated_zone(zone) { 194728f836b6SMel Gorman struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 19482bbd00aeSJohannes Weiner struct per_cpu_nodestat *n; 194928f836b6SMel Gorman 19507cc36bbdSChristoph Lameter /* 19517cc36bbdSChristoph Lameter * The fast way of checking if there are any vmstat diffs. 19527cc36bbdSChristoph Lameter */ 195364632fd3SMiaohe Lin if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff))) 19547cc36bbdSChristoph Lameter return true; 1955f19298b9SMel Gorman 19562bbd00aeSJohannes Weiner if (last_pgdat == zone->zone_pgdat) 19572bbd00aeSJohannes Weiner continue; 19582bbd00aeSJohannes Weiner last_pgdat = zone->zone_pgdat; 19592bbd00aeSJohannes Weiner n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu); 196064632fd3SMiaohe Lin if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff))) 19612bbd00aeSJohannes Weiner return true; 19627cc36bbdSChristoph Lameter } 19637cc36bbdSChristoph Lameter return false; 19647cc36bbdSChristoph Lameter } 19657cc36bbdSChristoph Lameter 19667b8da4c7SChristoph Lameter /* 19677b8da4c7SChristoph Lameter * Switch off vmstat processing and then fold all the remaining differentials 19687b8da4c7SChristoph Lameter * until the diffs stay at zero. The function is used by NOHZ and can only be 19697b8da4c7SChristoph Lameter * invoked when tick processing is not active. 19707b8da4c7SChristoph Lameter */ 1971f01f17d3SMichal Hocko void quiet_vmstat(void) 1972f01f17d3SMichal Hocko { 1973f01f17d3SMichal Hocko if (system_state != SYSTEM_RUNNING) 1974f01f17d3SMichal Hocko return; 1975f01f17d3SMichal Hocko 19767b8da4c7SChristoph Lameter if (!delayed_work_pending(this_cpu_ptr(&vmstat_work))) 1977f01f17d3SMichal Hocko return; 1978f01f17d3SMichal Hocko 1979f01f17d3SMichal Hocko if (!need_update(smp_processor_id())) 1980f01f17d3SMichal Hocko return; 1981f01f17d3SMichal Hocko 1982f01f17d3SMichal Hocko /* 1983f01f17d3SMichal Hocko * Just refresh counters and do not care about the pending delayed 1984f01f17d3SMichal Hocko * vmstat_update. It doesn't fire that often to matter and canceling 1985f01f17d3SMichal Hocko * it would be too expensive from this path. 1986f01f17d3SMichal Hocko * vmstat_shepherd will take care about that for us. 1987f01f17d3SMichal Hocko */ 1988f01f17d3SMichal Hocko refresh_cpu_vm_stats(false); 1989f01f17d3SMichal Hocko } 1990f01f17d3SMichal Hocko 19917cc36bbdSChristoph Lameter /* 19927cc36bbdSChristoph Lameter * Shepherd worker thread that checks the 19937cc36bbdSChristoph Lameter * differentials of processors that have their worker 19947cc36bbdSChristoph Lameter * threads for vm statistics updates disabled because of 19957cc36bbdSChristoph Lameter * inactivity. 19967cc36bbdSChristoph Lameter */ 19977cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w); 19987cc36bbdSChristoph Lameter 19990eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd); 20007cc36bbdSChristoph Lameter 20017cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w) 20027cc36bbdSChristoph Lameter { 20037cc36bbdSChristoph Lameter int cpu; 20047cc36bbdSChristoph Lameter 20057625eccdSSebastian Andrzej Siewior cpus_read_lock(); 20067cc36bbdSChristoph Lameter /* Check processors whose vmstat worker threads have been disabled */ 20077b8da4c7SChristoph Lameter for_each_online_cpu(cpu) { 2008f01f17d3SMichal Hocko struct delayed_work *dw = &per_cpu(vmstat_work, cpu); 20097cc36bbdSChristoph Lameter 20107b8da4c7SChristoph Lameter if (!delayed_work_pending(dw) && need_update(cpu)) 2011ce612879SMichal Hocko queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); 2012fbcc8183SJiang Biao 2013fbcc8183SJiang Biao cond_resched(); 2014f01f17d3SMichal Hocko } 20157625eccdSSebastian Andrzej Siewior cpus_read_unlock(); 20167cc36bbdSChristoph Lameter 20177cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 20187cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 20197cc36bbdSChristoph Lameter } 20207cc36bbdSChristoph Lameter 20217cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void) 20227cc36bbdSChristoph Lameter { 20237cc36bbdSChristoph Lameter int cpu; 20247cc36bbdSChristoph Lameter 20257cc36bbdSChristoph Lameter for_each_possible_cpu(cpu) 2026ccde8bd4SMichal Hocko INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), 20277cc36bbdSChristoph Lameter vmstat_update); 20287cc36bbdSChristoph Lameter 20297cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 20307cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 2031d1187ed2SChristoph Lameter } 2032d1187ed2SChristoph Lameter 203303e86dbaSTim Chen static void __init init_cpu_node_state(void) 203403e86dbaSTim Chen { 20354c501327SSebastian Andrzej Siewior int node; 203603e86dbaSTim Chen 20374c501327SSebastian Andrzej Siewior for_each_online_node(node) { 20384c501327SSebastian Andrzej Siewior if (cpumask_weight(cpumask_of_node(node)) > 0) 20394c501327SSebastian Andrzej Siewior node_set_state(node, N_CPU); 20404c501327SSebastian Andrzej Siewior } 204103e86dbaSTim Chen } 204203e86dbaSTim Chen 20435438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu) 2044807a1bd2SToshi Kani { 20455ee28a44SKAMEZAWA Hiroyuki refresh_zone_stat_thresholds(); 2046ad596925SChristoph Lameter node_set_state(cpu_to_node(cpu), N_CPU); 20475438da97SSebastian Andrzej Siewior return 0; 2048df9ecabaSChristoph Lameter } 2049df9ecabaSChristoph Lameter 20505438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu) 20515438da97SSebastian Andrzej Siewior { 20525438da97SSebastian Andrzej Siewior cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); 20535438da97SSebastian Andrzej Siewior return 0; 20545438da97SSebastian Andrzej Siewior } 20555438da97SSebastian Andrzej Siewior 20565438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu) 20575438da97SSebastian Andrzej Siewior { 20585438da97SSebastian Andrzej Siewior const struct cpumask *node_cpus; 20595438da97SSebastian Andrzej Siewior int node; 20605438da97SSebastian Andrzej Siewior 20615438da97SSebastian Andrzej Siewior node = cpu_to_node(cpu); 20625438da97SSebastian Andrzej Siewior 20635438da97SSebastian Andrzej Siewior refresh_zone_stat_thresholds(); 20645438da97SSebastian Andrzej Siewior node_cpus = cpumask_of_node(node); 20655438da97SSebastian Andrzej Siewior if (cpumask_weight(node_cpus) > 0) 20665438da97SSebastian Andrzej Siewior return 0; 20675438da97SSebastian Andrzej Siewior 20685438da97SSebastian Andrzej Siewior node_clear_state(node, N_CPU); 20695438da97SSebastian Andrzej Siewior return 0; 20705438da97SSebastian Andrzej Siewior } 20715438da97SSebastian Andrzej Siewior 20728f32f7e5SAlexey Dobriyan #endif 2073df9ecabaSChristoph Lameter 2074ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq; 2075ce612879SMichal Hocko 2076597b7305SMichal Hocko void __init init_mm_internals(void) 2077df9ecabaSChristoph Lameter { 2078ce612879SMichal Hocko int ret __maybe_unused; 20795438da97SSebastian Andrzej Siewior 208080d136e1SMichal Hocko mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); 2081ce612879SMichal Hocko 2082ce612879SMichal Hocko #ifdef CONFIG_SMP 20835438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", 20845438da97SSebastian Andrzej Siewior NULL, vmstat_cpu_dead); 20855438da97SSebastian Andrzej Siewior if (ret < 0) 20865438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'dead' hotplug state\n"); 20875438da97SSebastian Andrzej Siewior 20885438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online", 20895438da97SSebastian Andrzej Siewior vmstat_cpu_online, 20905438da97SSebastian Andrzej Siewior vmstat_cpu_down_prep); 20915438da97SSebastian Andrzej Siewior if (ret < 0) 20925438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'online' hotplug state\n"); 20935438da97SSebastian Andrzej Siewior 20947625eccdSSebastian Andrzej Siewior cpus_read_lock(); 209503e86dbaSTim Chen init_cpu_node_state(); 20967625eccdSSebastian Andrzej Siewior cpus_read_unlock(); 2097d1187ed2SChristoph Lameter 20987cc36bbdSChristoph Lameter start_shepherd_timer(); 20998f32f7e5SAlexey Dobriyan #endif 21008f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS 2101fddda2b7SChristoph Hellwig proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); 2102abaed011SMichal Hocko proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); 2103fddda2b7SChristoph Hellwig proc_create_seq("vmstat", 0444, NULL, &vmstat_op); 2104fddda2b7SChristoph Hellwig proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); 21058f32f7e5SAlexey Dobriyan #endif 2106df9ecabaSChristoph Lameter } 2107d7a5752cSMel Gorman 2108d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 2109d7a5752cSMel Gorman 2110d7a5752cSMel Gorman /* 2111d7a5752cSMel Gorman * Return an index indicating how much of the available free memory is 2112d7a5752cSMel Gorman * unusable for an allocation of the requested size. 2113d7a5752cSMel Gorman */ 2114d7a5752cSMel Gorman static int unusable_free_index(unsigned int order, 2115d7a5752cSMel Gorman struct contig_page_info *info) 2116d7a5752cSMel Gorman { 2117d7a5752cSMel Gorman /* No free memory is interpreted as all free memory is unusable */ 2118d7a5752cSMel Gorman if (info->free_pages == 0) 2119d7a5752cSMel Gorman return 1000; 2120d7a5752cSMel Gorman 2121d7a5752cSMel Gorman /* 2122d7a5752cSMel Gorman * Index should be a value between 0 and 1. Return a value to 3 2123d7a5752cSMel Gorman * decimal places. 2124d7a5752cSMel Gorman * 2125d7a5752cSMel Gorman * 0 => no fragmentation 2126d7a5752cSMel Gorman * 1 => high fragmentation 2127d7a5752cSMel Gorman */ 2128d7a5752cSMel Gorman return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 2129d7a5752cSMel Gorman 2130d7a5752cSMel Gorman } 2131d7a5752cSMel Gorman 2132d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m, 2133d7a5752cSMel Gorman pg_data_t *pgdat, struct zone *zone) 2134d7a5752cSMel Gorman { 2135d7a5752cSMel Gorman unsigned int order; 2136d7a5752cSMel Gorman int index; 2137d7a5752cSMel Gorman struct contig_page_info info; 2138d7a5752cSMel Gorman 2139d7a5752cSMel Gorman seq_printf(m, "Node %d, zone %8s ", 2140d7a5752cSMel Gorman pgdat->node_id, 2141d7a5752cSMel Gorman zone->name); 2142d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2143d7a5752cSMel Gorman fill_contig_page_info(zone, order, &info); 2144d7a5752cSMel Gorman index = unusable_free_index(order, &info); 2145d7a5752cSMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2146d7a5752cSMel Gorman } 2147d7a5752cSMel Gorman 2148d7a5752cSMel Gorman seq_putc(m, '\n'); 2149d7a5752cSMel Gorman } 2150d7a5752cSMel Gorman 2151d7a5752cSMel Gorman /* 2152d7a5752cSMel Gorman * Display unusable free space index 2153d7a5752cSMel Gorman * 2154d7a5752cSMel Gorman * The unusable free space index measures how much of the available free 2155d7a5752cSMel Gorman * memory cannot be used to satisfy an allocation of a given size and is a 2156d7a5752cSMel Gorman * value between 0 and 1. The higher the value, the more of free memory is 2157d7a5752cSMel Gorman * unusable and by implication, the worse the external fragmentation is. This 2158d7a5752cSMel Gorman * can be expressed as a percentage by multiplying by 100. 2159d7a5752cSMel Gorman */ 2160d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg) 2161d7a5752cSMel Gorman { 2162d7a5752cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2163d7a5752cSMel Gorman 2164d7a5752cSMel Gorman /* check memoryless node */ 2165a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 2166d7a5752cSMel Gorman return 0; 2167d7a5752cSMel Gorman 2168727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, unusable_show_print); 2169d7a5752cSMel Gorman 2170d7a5752cSMel Gorman return 0; 2171d7a5752cSMel Gorman } 2172d7a5752cSMel Gorman 217301a99560SKefeng Wang static const struct seq_operations unusable_sops = { 2174d7a5752cSMel Gorman .start = frag_start, 2175d7a5752cSMel Gorman .next = frag_next, 2176d7a5752cSMel Gorman .stop = frag_stop, 2177d7a5752cSMel Gorman .show = unusable_show, 2178d7a5752cSMel Gorman }; 2179d7a5752cSMel Gorman 218001a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(unusable); 2181d7a5752cSMel Gorman 2182f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m, 2183f1a5ab12SMel Gorman pg_data_t *pgdat, struct zone *zone) 2184f1a5ab12SMel Gorman { 2185f1a5ab12SMel Gorman unsigned int order; 2186f1a5ab12SMel Gorman int index; 2187f1a5ab12SMel Gorman 2188f1a5ab12SMel Gorman /* Alloc on stack as interrupts are disabled for zone walk */ 2189f1a5ab12SMel Gorman struct contig_page_info info; 2190f1a5ab12SMel Gorman 2191f1a5ab12SMel Gorman seq_printf(m, "Node %d, zone %8s ", 2192f1a5ab12SMel Gorman pgdat->node_id, 2193f1a5ab12SMel Gorman zone->name); 2194f1a5ab12SMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2195f1a5ab12SMel Gorman fill_contig_page_info(zone, order, &info); 219656de7263SMel Gorman index = __fragmentation_index(order, &info); 2197a9970586SLin Feng seq_printf(m, "%2d.%03d ", index / 1000, index % 1000); 2198f1a5ab12SMel Gorman } 2199f1a5ab12SMel Gorman 2200f1a5ab12SMel Gorman seq_putc(m, '\n'); 2201f1a5ab12SMel Gorman } 2202f1a5ab12SMel Gorman 2203f1a5ab12SMel Gorman /* 2204f1a5ab12SMel Gorman * Display fragmentation index for orders that allocations would fail for 2205f1a5ab12SMel Gorman */ 2206f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg) 2207f1a5ab12SMel Gorman { 2208f1a5ab12SMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2209f1a5ab12SMel Gorman 2210727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, extfrag_show_print); 2211f1a5ab12SMel Gorman 2212f1a5ab12SMel Gorman return 0; 2213f1a5ab12SMel Gorman } 2214f1a5ab12SMel Gorman 221501a99560SKefeng Wang static const struct seq_operations extfrag_sops = { 2216f1a5ab12SMel Gorman .start = frag_start, 2217f1a5ab12SMel Gorman .next = frag_next, 2218f1a5ab12SMel Gorman .stop = frag_stop, 2219f1a5ab12SMel Gorman .show = extfrag_show, 2220f1a5ab12SMel Gorman }; 2221f1a5ab12SMel Gorman 222201a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(extfrag); 2223f1a5ab12SMel Gorman 2224d7a5752cSMel Gorman static int __init extfrag_debug_init(void) 2225d7a5752cSMel Gorman { 2226bde8bd8aSSasikantha babu struct dentry *extfrag_debug_root; 2227bde8bd8aSSasikantha babu 2228d7a5752cSMel Gorman extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 2229d7a5752cSMel Gorman 2230d9f7979cSGreg Kroah-Hartman debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL, 223101a99560SKefeng Wang &unusable_fops); 2232d7a5752cSMel Gorman 2233d9f7979cSGreg Kroah-Hartman debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL, 223401a99560SKefeng Wang &extfrag_fops); 2235f1a5ab12SMel Gorman 2236d7a5752cSMel Gorman return 0; 2237d7a5752cSMel Gorman } 2238d7a5752cSMel Gorman 2239d7a5752cSMel Gorman module_init(extfrag_debug_init); 2240d7a5752cSMel Gorman #endif 2241