1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2f6ac2354SChristoph Lameter /* 3f6ac2354SChristoph Lameter * linux/mm/vmstat.c 4f6ac2354SChristoph Lameter * 5f6ac2354SChristoph Lameter * Manages VM statistics 6f6ac2354SChristoph Lameter * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 72244b95aSChristoph Lameter * 82244b95aSChristoph Lameter * zoned VM statistics 92244b95aSChristoph Lameter * Copyright (C) 2006 Silicon Graphics, Inc., 102244b95aSChristoph Lameter * Christoph Lameter <christoph@lameter.com> 117cc36bbdSChristoph Lameter * Copyright (C) 2008-2014 Christoph Lameter 12f6ac2354SChristoph Lameter */ 138f32f7e5SAlexey Dobriyan #include <linux/fs.h> 14f6ac2354SChristoph Lameter #include <linux/mm.h> 154e950f6fSAlexey Dobriyan #include <linux/err.h> 162244b95aSChristoph Lameter #include <linux/module.h> 175a0e3ad6STejun Heo #include <linux/slab.h> 18df9ecabaSChristoph Lameter #include <linux/cpu.h> 197cc36bbdSChristoph Lameter #include <linux/cpumask.h> 20c748e134SAdrian Bunk #include <linux/vmstat.h> 213c486871SAndrew Morton #include <linux/proc_fs.h> 223c486871SAndrew Morton #include <linux/seq_file.h> 233c486871SAndrew Morton #include <linux/debugfs.h> 24e8edc6e0SAlexey Dobriyan #include <linux/sched.h> 25f1a5ab12SMel Gorman #include <linux/math64.h> 2679da826aSMichael Rubin #include <linux/writeback.h> 2736deb0beSNamhyung Kim #include <linux/compaction.h> 286e543d57SLisa Du #include <linux/mm_inline.h> 2948c96a36SJoonsoo Kim #include <linux/page_ext.h> 3048c96a36SJoonsoo Kim #include <linux/page_owner.h> 316e543d57SLisa Du 326e543d57SLisa Du #include "internal.h" 33f6ac2354SChristoph Lameter 341d90ca89SKemi Wang #define NUMA_STATS_THRESHOLD (U16_MAX - 2) 351d90ca89SKemi Wang 364518085eSKemi Wang #ifdef CONFIG_NUMA 374518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; 384518085eSKemi Wang 394518085eSKemi Wang /* zero numa counters within a zone */ 404518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone) 414518085eSKemi Wang { 424518085eSKemi Wang int item, cpu; 434518085eSKemi Wang 444518085eSKemi Wang for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) { 454518085eSKemi Wang atomic_long_set(&zone->vm_numa_stat[item], 0); 464518085eSKemi Wang for_each_online_cpu(cpu) 474518085eSKemi Wang per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item] 484518085eSKemi Wang = 0; 494518085eSKemi Wang } 504518085eSKemi Wang } 514518085eSKemi Wang 524518085eSKemi Wang /* zero numa counters of all the populated zones */ 534518085eSKemi Wang static void zero_zones_numa_counters(void) 544518085eSKemi Wang { 554518085eSKemi Wang struct zone *zone; 564518085eSKemi Wang 574518085eSKemi Wang for_each_populated_zone(zone) 584518085eSKemi Wang zero_zone_numa_counters(zone); 594518085eSKemi Wang } 604518085eSKemi Wang 614518085eSKemi Wang /* zero global numa counters */ 624518085eSKemi Wang static void zero_global_numa_counters(void) 634518085eSKemi Wang { 644518085eSKemi Wang int item; 654518085eSKemi Wang 664518085eSKemi Wang for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) 674518085eSKemi Wang atomic_long_set(&vm_numa_stat[item], 0); 684518085eSKemi Wang } 694518085eSKemi Wang 704518085eSKemi Wang static void invalid_numa_statistics(void) 714518085eSKemi Wang { 724518085eSKemi Wang zero_zones_numa_counters(); 734518085eSKemi Wang zero_global_numa_counters(); 744518085eSKemi Wang } 754518085eSKemi Wang 764518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock); 774518085eSKemi Wang 784518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, 7932927393SChristoph Hellwig void *buffer, size_t *length, loff_t *ppos) 804518085eSKemi Wang { 814518085eSKemi Wang int ret, oldval; 824518085eSKemi Wang 834518085eSKemi Wang mutex_lock(&vm_numa_stat_lock); 844518085eSKemi Wang if (write) 854518085eSKemi Wang oldval = sysctl_vm_numa_stat; 864518085eSKemi Wang ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 874518085eSKemi Wang if (ret || !write) 884518085eSKemi Wang goto out; 894518085eSKemi Wang 904518085eSKemi Wang if (oldval == sysctl_vm_numa_stat) 914518085eSKemi Wang goto out; 924518085eSKemi Wang else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) { 934518085eSKemi Wang static_branch_enable(&vm_numa_stat_key); 944518085eSKemi Wang pr_info("enable numa statistics\n"); 954518085eSKemi Wang } else { 964518085eSKemi Wang static_branch_disable(&vm_numa_stat_key); 974518085eSKemi Wang invalid_numa_statistics(); 984518085eSKemi Wang pr_info("disable numa statistics, and clear numa counters\n"); 994518085eSKemi Wang } 1004518085eSKemi Wang 1014518085eSKemi Wang out: 1024518085eSKemi Wang mutex_unlock(&vm_numa_stat_lock); 1034518085eSKemi Wang return ret; 1044518085eSKemi Wang } 1054518085eSKemi Wang #endif 1064518085eSKemi Wang 107f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 108f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 109f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states); 110f8891e5eSChristoph Lameter 11131f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret) 112f8891e5eSChristoph Lameter { 1139eccf2a8SChristoph Lameter int cpu; 114f8891e5eSChristoph Lameter int i; 115f8891e5eSChristoph Lameter 116f8891e5eSChristoph Lameter memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 117f8891e5eSChristoph Lameter 11831f961a8SMinchan Kim for_each_online_cpu(cpu) { 119f8891e5eSChristoph Lameter struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 120f8891e5eSChristoph Lameter 121f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 122f8891e5eSChristoph Lameter ret[i] += this->event[i]; 123f8891e5eSChristoph Lameter } 124f8891e5eSChristoph Lameter } 125f8891e5eSChristoph Lameter 126f8891e5eSChristoph Lameter /* 127f8891e5eSChristoph Lameter * Accumulate the vm event counters across all CPUs. 128f8891e5eSChristoph Lameter * The result is unavoidably approximate - it can change 129f8891e5eSChristoph Lameter * during and after execution of this function. 130f8891e5eSChristoph Lameter */ 131f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret) 132f8891e5eSChristoph Lameter { 133b5be1132SKOSAKI Motohiro get_online_cpus(); 13431f961a8SMinchan Kim sum_vm_events(ret); 135b5be1132SKOSAKI Motohiro put_online_cpus(); 136f8891e5eSChristoph Lameter } 13732dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events); 138f8891e5eSChristoph Lameter 139f8891e5eSChristoph Lameter /* 140f8891e5eSChristoph Lameter * Fold the foreign cpu events into our own. 141f8891e5eSChristoph Lameter * 142f8891e5eSChristoph Lameter * This is adding to the events on one processor 143f8891e5eSChristoph Lameter * but keeps the global counts constant. 144f8891e5eSChristoph Lameter */ 145f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu) 146f8891e5eSChristoph Lameter { 147f8891e5eSChristoph Lameter struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 148f8891e5eSChristoph Lameter int i; 149f8891e5eSChristoph Lameter 150f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 151f8891e5eSChristoph Lameter count_vm_events(i, fold_state->event[i]); 152f8891e5eSChristoph Lameter fold_state->event[i] = 0; 153f8891e5eSChristoph Lameter } 154f8891e5eSChristoph Lameter } 155f8891e5eSChristoph Lameter 156f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */ 157f8891e5eSChristoph Lameter 1582244b95aSChristoph Lameter /* 1592244b95aSChristoph Lameter * Manage combined zone based / global counters 1602244b95aSChristoph Lameter * 1612244b95aSChristoph Lameter * vm_stat contains the global counters 1622244b95aSChristoph Lameter */ 16375ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; 1643a321d2aSKemi Wang atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp; 16575ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; 16675ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat); 1673a321d2aSKemi Wang EXPORT_SYMBOL(vm_numa_stat); 16875ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat); 1692244b95aSChristoph Lameter 1702244b95aSChristoph Lameter #ifdef CONFIG_SMP 1712244b95aSChristoph Lameter 172b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone) 17388f5acf8SMel Gorman { 17488f5acf8SMel Gorman int threshold; 17588f5acf8SMel Gorman int watermark_distance; 17688f5acf8SMel Gorman 17788f5acf8SMel Gorman /* 17888f5acf8SMel Gorman * As vmstats are not up to date, there is drift between the estimated 17988f5acf8SMel Gorman * and real values. For high thresholds and a high number of CPUs, it 18088f5acf8SMel Gorman * is possible for the min watermark to be breached while the estimated 18188f5acf8SMel Gorman * value looks fine. The pressure threshold is a reduced value such 18288f5acf8SMel Gorman * that even the maximum amount of drift will not accidentally breach 18388f5acf8SMel Gorman * the min watermark 18488f5acf8SMel Gorman */ 18588f5acf8SMel Gorman watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone); 18688f5acf8SMel Gorman threshold = max(1, (int)(watermark_distance / num_online_cpus())); 18788f5acf8SMel Gorman 18888f5acf8SMel Gorman /* 18988f5acf8SMel Gorman * Maximum threshold is 125 19088f5acf8SMel Gorman */ 19188f5acf8SMel Gorman threshold = min(125, threshold); 19288f5acf8SMel Gorman 19388f5acf8SMel Gorman return threshold; 19488f5acf8SMel Gorman } 19588f5acf8SMel Gorman 196b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone) 197df9ecabaSChristoph Lameter { 198df9ecabaSChristoph Lameter int threshold; 199df9ecabaSChristoph Lameter int mem; /* memory in 128 MB units */ 2002244b95aSChristoph Lameter 2012244b95aSChristoph Lameter /* 202df9ecabaSChristoph Lameter * The threshold scales with the number of processors and the amount 203df9ecabaSChristoph Lameter * of memory per zone. More memory means that we can defer updates for 204df9ecabaSChristoph Lameter * longer, more processors could lead to more contention. 205df9ecabaSChristoph Lameter * fls() is used to have a cheap way of logarithmic scaling. 2062244b95aSChristoph Lameter * 207df9ecabaSChristoph Lameter * Some sample thresholds: 208df9ecabaSChristoph Lameter * 209df9ecabaSChristoph Lameter * Threshold Processors (fls) Zonesize fls(mem+1) 210df9ecabaSChristoph Lameter * ------------------------------------------------------------------ 211df9ecabaSChristoph Lameter * 8 1 1 0.9-1 GB 4 212df9ecabaSChristoph Lameter * 16 2 2 0.9-1 GB 4 213df9ecabaSChristoph Lameter * 20 2 2 1-2 GB 5 214df9ecabaSChristoph Lameter * 24 2 2 2-4 GB 6 215df9ecabaSChristoph Lameter * 28 2 2 4-8 GB 7 216df9ecabaSChristoph Lameter * 32 2 2 8-16 GB 8 217df9ecabaSChristoph Lameter * 4 2 2 <128M 1 218df9ecabaSChristoph Lameter * 30 4 3 2-4 GB 5 219df9ecabaSChristoph Lameter * 48 4 3 8-16 GB 8 220df9ecabaSChristoph Lameter * 32 8 4 1-2 GB 4 221df9ecabaSChristoph Lameter * 32 8 4 0.9-1GB 4 222df9ecabaSChristoph Lameter * 10 16 5 <128M 1 223df9ecabaSChristoph Lameter * 40 16 5 900M 4 224df9ecabaSChristoph Lameter * 70 64 7 2-4 GB 5 225df9ecabaSChristoph Lameter * 84 64 7 4-8 GB 6 226df9ecabaSChristoph Lameter * 108 512 9 4-8 GB 6 227df9ecabaSChristoph Lameter * 125 1024 10 8-16 GB 8 228df9ecabaSChristoph Lameter * 125 1024 10 16-32 GB 9 2292244b95aSChristoph Lameter */ 230df9ecabaSChristoph Lameter 2319705bea5SArun KS mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT); 232df9ecabaSChristoph Lameter 233df9ecabaSChristoph Lameter threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 234df9ecabaSChristoph Lameter 235df9ecabaSChristoph Lameter /* 236df9ecabaSChristoph Lameter * Maximum threshold is 125 237df9ecabaSChristoph Lameter */ 238df9ecabaSChristoph Lameter threshold = min(125, threshold); 239df9ecabaSChristoph Lameter 240df9ecabaSChristoph Lameter return threshold; 241df9ecabaSChristoph Lameter } 242df9ecabaSChristoph Lameter 243df9ecabaSChristoph Lameter /* 244df9ecabaSChristoph Lameter * Refresh the thresholds for each zone. 245df9ecabaSChristoph Lameter */ 246a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void) 2472244b95aSChristoph Lameter { 24875ef7184SMel Gorman struct pglist_data *pgdat; 249df9ecabaSChristoph Lameter struct zone *zone; 250df9ecabaSChristoph Lameter int cpu; 251df9ecabaSChristoph Lameter int threshold; 252df9ecabaSChristoph Lameter 25375ef7184SMel Gorman /* Zero current pgdat thresholds */ 25475ef7184SMel Gorman for_each_online_pgdat(pgdat) { 25575ef7184SMel Gorman for_each_online_cpu(cpu) { 25675ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0; 25775ef7184SMel Gorman } 25875ef7184SMel Gorman } 25975ef7184SMel Gorman 260ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 26175ef7184SMel Gorman struct pglist_data *pgdat = zone->zone_pgdat; 262aa454840SChristoph Lameter unsigned long max_drift, tolerate_drift; 263aa454840SChristoph Lameter 264b44129b3SMel Gorman threshold = calculate_normal_threshold(zone); 265df9ecabaSChristoph Lameter 26675ef7184SMel Gorman for_each_online_cpu(cpu) { 26775ef7184SMel Gorman int pgdat_threshold; 26875ef7184SMel Gorman 26999dcc3e5SChristoph Lameter per_cpu_ptr(zone->pageset, cpu)->stat_threshold 27099dcc3e5SChristoph Lameter = threshold; 2711d90ca89SKemi Wang 27275ef7184SMel Gorman /* Base nodestat threshold on the largest populated zone. */ 27375ef7184SMel Gorman pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; 27475ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold 27575ef7184SMel Gorman = max(threshold, pgdat_threshold); 27675ef7184SMel Gorman } 27775ef7184SMel Gorman 278aa454840SChristoph Lameter /* 279aa454840SChristoph Lameter * Only set percpu_drift_mark if there is a danger that 280aa454840SChristoph Lameter * NR_FREE_PAGES reports the low watermark is ok when in fact 281aa454840SChristoph Lameter * the min watermark could be breached by an allocation 282aa454840SChristoph Lameter */ 283aa454840SChristoph Lameter tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 284aa454840SChristoph Lameter max_drift = num_online_cpus() * threshold; 285aa454840SChristoph Lameter if (max_drift > tolerate_drift) 286aa454840SChristoph Lameter zone->percpu_drift_mark = high_wmark_pages(zone) + 287aa454840SChristoph Lameter max_drift; 288df9ecabaSChristoph Lameter } 2892244b95aSChristoph Lameter } 2902244b95aSChristoph Lameter 291b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat, 292b44129b3SMel Gorman int (*calculate_pressure)(struct zone *)) 29388f5acf8SMel Gorman { 29488f5acf8SMel Gorman struct zone *zone; 29588f5acf8SMel Gorman int cpu; 29688f5acf8SMel Gorman int threshold; 29788f5acf8SMel Gorman int i; 29888f5acf8SMel Gorman 29988f5acf8SMel Gorman for (i = 0; i < pgdat->nr_zones; i++) { 30088f5acf8SMel Gorman zone = &pgdat->node_zones[i]; 30188f5acf8SMel Gorman if (!zone->percpu_drift_mark) 30288f5acf8SMel Gorman continue; 30388f5acf8SMel Gorman 304b44129b3SMel Gorman threshold = (*calculate_pressure)(zone); 3051d90ca89SKemi Wang for_each_online_cpu(cpu) 30688f5acf8SMel Gorman per_cpu_ptr(zone->pageset, cpu)->stat_threshold 30788f5acf8SMel Gorman = threshold; 30888f5acf8SMel Gorman } 30988f5acf8SMel Gorman } 31088f5acf8SMel Gorman 3112244b95aSChristoph Lameter /* 312bea04b07SJianyu Zhan * For use when we know that interrupts are disabled, 313bea04b07SJianyu Zhan * or when we know that preemption is disabled and that 314bea04b07SJianyu Zhan * particular counter cannot be updated from interrupt context. 3152244b95aSChristoph Lameter */ 3162244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 3176cdb18adSHeiko Carstens long delta) 3182244b95aSChristoph Lameter { 31912938a92SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 32012938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 3212244b95aSChristoph Lameter long x; 32212938a92SChristoph Lameter long t; 3232244b95aSChristoph Lameter 32412938a92SChristoph Lameter x = delta + __this_cpu_read(*p); 3252244b95aSChristoph Lameter 32612938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 32712938a92SChristoph Lameter 32840610076SMiaohe Lin if (unlikely(abs(x) > t)) { 3292244b95aSChristoph Lameter zone_page_state_add(x, zone, item); 3302244b95aSChristoph Lameter x = 0; 3312244b95aSChristoph Lameter } 33212938a92SChristoph Lameter __this_cpu_write(*p, x); 3332244b95aSChristoph Lameter } 3342244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state); 3352244b95aSChristoph Lameter 33675ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 33775ef7184SMel Gorman long delta) 33875ef7184SMel Gorman { 33975ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 34075ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 34175ef7184SMel Gorman long x; 34275ef7184SMel Gorman long t; 34375ef7184SMel Gorman 344ea426c2aSRoman Gushchin if (vmstat_item_in_bytes(item)) { 345ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 346ea426c2aSRoman Gushchin delta >>= PAGE_SHIFT; 347ea426c2aSRoman Gushchin } 348ea426c2aSRoman Gushchin 34975ef7184SMel Gorman x = delta + __this_cpu_read(*p); 35075ef7184SMel Gorman 35175ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 35275ef7184SMel Gorman 35340610076SMiaohe Lin if (unlikely(abs(x) > t)) { 35475ef7184SMel Gorman node_page_state_add(x, pgdat, item); 35575ef7184SMel Gorman x = 0; 35675ef7184SMel Gorman } 35775ef7184SMel Gorman __this_cpu_write(*p, x); 35875ef7184SMel Gorman } 35975ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state); 36075ef7184SMel Gorman 3612244b95aSChristoph Lameter /* 3622244b95aSChristoph Lameter * Optimized increment and decrement functions. 3632244b95aSChristoph Lameter * 3642244b95aSChristoph Lameter * These are only for a single page and therefore can take a struct page * 3652244b95aSChristoph Lameter * argument instead of struct zone *. This allows the inclusion of the code 3662244b95aSChristoph Lameter * generated for page_zone(page) into the optimized functions. 3672244b95aSChristoph Lameter * 3682244b95aSChristoph Lameter * No overflow check is necessary and therefore the differential can be 3692244b95aSChristoph Lameter * incremented or decremented in place which may allow the compilers to 3702244b95aSChristoph Lameter * generate better code. 3712244b95aSChristoph Lameter * The increment or decrement is known and therefore one boundary check can 3722244b95aSChristoph Lameter * be omitted. 3732244b95aSChristoph Lameter * 374df9ecabaSChristoph Lameter * NOTE: These functions are very performance sensitive. Change only 375df9ecabaSChristoph Lameter * with care. 376df9ecabaSChristoph Lameter * 3772244b95aSChristoph Lameter * Some processors have inc/dec instructions that are atomic vs an interrupt. 3782244b95aSChristoph Lameter * However, the code must first determine the differential location in a zone 3792244b95aSChristoph Lameter * based on the processor number and then inc/dec the counter. There is no 3802244b95aSChristoph Lameter * guarantee without disabling preemption that the processor will not change 3812244b95aSChristoph Lameter * in between and therefore the atomicity vs. interrupt cannot be exploited 3822244b95aSChristoph Lameter * in a useful way here. 3832244b95aSChristoph Lameter */ 384c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 3852244b95aSChristoph Lameter { 38612938a92SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 38712938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 38812938a92SChristoph Lameter s8 v, t; 3892244b95aSChristoph Lameter 390908ee0f1SChristoph Lameter v = __this_cpu_inc_return(*p); 39112938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 39212938a92SChristoph Lameter if (unlikely(v > t)) { 39312938a92SChristoph Lameter s8 overstep = t >> 1; 3942244b95aSChristoph Lameter 39512938a92SChristoph Lameter zone_page_state_add(v + overstep, zone, item); 39612938a92SChristoph Lameter __this_cpu_write(*p, -overstep); 3972244b95aSChristoph Lameter } 3982244b95aSChristoph Lameter } 399ca889e6cSChristoph Lameter 40075ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 40175ef7184SMel Gorman { 40275ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 40375ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 40475ef7184SMel Gorman s8 v, t; 40575ef7184SMel Gorman 406ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 407ea426c2aSRoman Gushchin 40875ef7184SMel Gorman v = __this_cpu_inc_return(*p); 40975ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 41075ef7184SMel Gorman if (unlikely(v > t)) { 41175ef7184SMel Gorman s8 overstep = t >> 1; 41275ef7184SMel Gorman 41375ef7184SMel Gorman node_page_state_add(v + overstep, pgdat, item); 41475ef7184SMel Gorman __this_cpu_write(*p, -overstep); 41575ef7184SMel Gorman } 41675ef7184SMel Gorman } 41775ef7184SMel Gorman 418ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 419ca889e6cSChristoph Lameter { 420ca889e6cSChristoph Lameter __inc_zone_state(page_zone(page), item); 421ca889e6cSChristoph Lameter } 4222244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state); 4232244b95aSChristoph Lameter 42475ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item) 42575ef7184SMel Gorman { 42675ef7184SMel Gorman __inc_node_state(page_pgdat(page), item); 42775ef7184SMel Gorman } 42875ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state); 42975ef7184SMel Gorman 430c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 4312244b95aSChristoph Lameter { 43212938a92SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 43312938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 43412938a92SChristoph Lameter s8 v, t; 4352244b95aSChristoph Lameter 436908ee0f1SChristoph Lameter v = __this_cpu_dec_return(*p); 43712938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 43812938a92SChristoph Lameter if (unlikely(v < - t)) { 43912938a92SChristoph Lameter s8 overstep = t >> 1; 4402244b95aSChristoph Lameter 44112938a92SChristoph Lameter zone_page_state_add(v - overstep, zone, item); 44212938a92SChristoph Lameter __this_cpu_write(*p, overstep); 4432244b95aSChristoph Lameter } 4442244b95aSChristoph Lameter } 445c8785385SChristoph Lameter 44675ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) 44775ef7184SMel Gorman { 44875ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 44975ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 45075ef7184SMel Gorman s8 v, t; 45175ef7184SMel Gorman 452ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 453ea426c2aSRoman Gushchin 45475ef7184SMel Gorman v = __this_cpu_dec_return(*p); 45575ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 45675ef7184SMel Gorman if (unlikely(v < - t)) { 45775ef7184SMel Gorman s8 overstep = t >> 1; 45875ef7184SMel Gorman 45975ef7184SMel Gorman node_page_state_add(v - overstep, pgdat, item); 46075ef7184SMel Gorman __this_cpu_write(*p, overstep); 46175ef7184SMel Gorman } 46275ef7184SMel Gorman } 46375ef7184SMel Gorman 464c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 465c8785385SChristoph Lameter { 466c8785385SChristoph Lameter __dec_zone_state(page_zone(page), item); 467c8785385SChristoph Lameter } 4682244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state); 4692244b95aSChristoph Lameter 47075ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item) 47175ef7184SMel Gorman { 47275ef7184SMel Gorman __dec_node_state(page_pgdat(page), item); 47375ef7184SMel Gorman } 47475ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state); 47575ef7184SMel Gorman 4764156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL 4777c839120SChristoph Lameter /* 4787c839120SChristoph Lameter * If we have cmpxchg_local support then we do not need to incur the overhead 4797c839120SChristoph Lameter * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. 4807c839120SChristoph Lameter * 4817c839120SChristoph Lameter * mod_state() modifies the zone counter state through atomic per cpu 4827c839120SChristoph Lameter * operations. 4837c839120SChristoph Lameter * 4847c839120SChristoph Lameter * Overstep mode specifies how overstep should handled: 4857c839120SChristoph Lameter * 0 No overstepping 4867c839120SChristoph Lameter * 1 Overstepping half of threshold 4877c839120SChristoph Lameter * -1 Overstepping minus half of threshold 4887c839120SChristoph Lameter */ 48975ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone, 49075ef7184SMel Gorman enum zone_stat_item item, long delta, int overstep_mode) 4917c839120SChristoph Lameter { 4927c839120SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 4937c839120SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 4947c839120SChristoph Lameter long o, n, t, z; 4957c839120SChristoph Lameter 4967c839120SChristoph Lameter do { 4977c839120SChristoph Lameter z = 0; /* overflow to zone counters */ 4987c839120SChristoph Lameter 4997c839120SChristoph Lameter /* 5007c839120SChristoph Lameter * The fetching of the stat_threshold is racy. We may apply 5017c839120SChristoph Lameter * a counter threshold to the wrong the cpu if we get 502d3bc2367SChristoph Lameter * rescheduled while executing here. However, the next 503d3bc2367SChristoph Lameter * counter update will apply the threshold again and 504d3bc2367SChristoph Lameter * therefore bring the counter under the threshold again. 505d3bc2367SChristoph Lameter * 506d3bc2367SChristoph Lameter * Most of the time the thresholds are the same anyways 507d3bc2367SChristoph Lameter * for all cpus in a zone. 5087c839120SChristoph Lameter */ 5097c839120SChristoph Lameter t = this_cpu_read(pcp->stat_threshold); 5107c839120SChristoph Lameter 5117c839120SChristoph Lameter o = this_cpu_read(*p); 5127c839120SChristoph Lameter n = delta + o; 5137c839120SChristoph Lameter 51440610076SMiaohe Lin if (abs(n) > t) { 5157c839120SChristoph Lameter int os = overstep_mode * (t >> 1) ; 5167c839120SChristoph Lameter 5177c839120SChristoph Lameter /* Overflow must be added to zone counters */ 5187c839120SChristoph Lameter z = n + os; 5197c839120SChristoph Lameter n = -os; 5207c839120SChristoph Lameter } 5217c839120SChristoph Lameter } while (this_cpu_cmpxchg(*p, o, n) != o); 5227c839120SChristoph Lameter 5237c839120SChristoph Lameter if (z) 5247c839120SChristoph Lameter zone_page_state_add(z, zone, item); 5257c839120SChristoph Lameter } 5267c839120SChristoph Lameter 5277c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 5286cdb18adSHeiko Carstens long delta) 5297c839120SChristoph Lameter { 53075ef7184SMel Gorman mod_zone_state(zone, item, delta, 0); 5317c839120SChristoph Lameter } 5327c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 5337c839120SChristoph Lameter 5347c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 5357c839120SChristoph Lameter { 53675ef7184SMel Gorman mod_zone_state(page_zone(page), item, 1, 1); 5377c839120SChristoph Lameter } 5387c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 5397c839120SChristoph Lameter 5407c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 5417c839120SChristoph Lameter { 54275ef7184SMel Gorman mod_zone_state(page_zone(page), item, -1, -1); 5437c839120SChristoph Lameter } 5447c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 54575ef7184SMel Gorman 54675ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat, 54775ef7184SMel Gorman enum node_stat_item item, int delta, int overstep_mode) 54875ef7184SMel Gorman { 54975ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 55075ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 55175ef7184SMel Gorman long o, n, t, z; 55275ef7184SMel Gorman 553ea426c2aSRoman Gushchin if (vmstat_item_in_bytes(item)) { 554ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 555ea426c2aSRoman Gushchin delta >>= PAGE_SHIFT; 556ea426c2aSRoman Gushchin } 557ea426c2aSRoman Gushchin 55875ef7184SMel Gorman do { 55975ef7184SMel Gorman z = 0; /* overflow to node counters */ 56075ef7184SMel Gorman 56175ef7184SMel Gorman /* 56275ef7184SMel Gorman * The fetching of the stat_threshold is racy. We may apply 56375ef7184SMel Gorman * a counter threshold to the wrong the cpu if we get 56475ef7184SMel Gorman * rescheduled while executing here. However, the next 56575ef7184SMel Gorman * counter update will apply the threshold again and 56675ef7184SMel Gorman * therefore bring the counter under the threshold again. 56775ef7184SMel Gorman * 56875ef7184SMel Gorman * Most of the time the thresholds are the same anyways 56975ef7184SMel Gorman * for all cpus in a node. 57075ef7184SMel Gorman */ 57175ef7184SMel Gorman t = this_cpu_read(pcp->stat_threshold); 57275ef7184SMel Gorman 57375ef7184SMel Gorman o = this_cpu_read(*p); 57475ef7184SMel Gorman n = delta + o; 57575ef7184SMel Gorman 57640610076SMiaohe Lin if (abs(n) > t) { 57775ef7184SMel Gorman int os = overstep_mode * (t >> 1) ; 57875ef7184SMel Gorman 57975ef7184SMel Gorman /* Overflow must be added to node counters */ 58075ef7184SMel Gorman z = n + os; 58175ef7184SMel Gorman n = -os; 58275ef7184SMel Gorman } 58375ef7184SMel Gorman } while (this_cpu_cmpxchg(*p, o, n) != o); 58475ef7184SMel Gorman 58575ef7184SMel Gorman if (z) 58675ef7184SMel Gorman node_page_state_add(z, pgdat, item); 58775ef7184SMel Gorman } 58875ef7184SMel Gorman 58975ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 59075ef7184SMel Gorman long delta) 59175ef7184SMel Gorman { 59275ef7184SMel Gorman mod_node_state(pgdat, item, delta, 0); 59375ef7184SMel Gorman } 59475ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 59575ef7184SMel Gorman 59675ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 59775ef7184SMel Gorman { 59875ef7184SMel Gorman mod_node_state(pgdat, item, 1, 1); 59975ef7184SMel Gorman } 60075ef7184SMel Gorman 60175ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 60275ef7184SMel Gorman { 60375ef7184SMel Gorman mod_node_state(page_pgdat(page), item, 1, 1); 60475ef7184SMel Gorman } 60575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 60675ef7184SMel Gorman 60775ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 60875ef7184SMel Gorman { 60975ef7184SMel Gorman mod_node_state(page_pgdat(page), item, -1, -1); 61075ef7184SMel Gorman } 61175ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 6127c839120SChristoph Lameter #else 6137c839120SChristoph Lameter /* 6147c839120SChristoph Lameter * Use interrupt disable to serialize counter updates 6157c839120SChristoph Lameter */ 6167c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 6176cdb18adSHeiko Carstens long delta) 6187c839120SChristoph Lameter { 6197c839120SChristoph Lameter unsigned long flags; 6207c839120SChristoph Lameter 6217c839120SChristoph Lameter local_irq_save(flags); 6227c839120SChristoph Lameter __mod_zone_page_state(zone, item, delta); 6237c839120SChristoph Lameter local_irq_restore(flags); 6247c839120SChristoph Lameter } 6257c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 6267c839120SChristoph Lameter 6272244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 6282244b95aSChristoph Lameter { 6292244b95aSChristoph Lameter unsigned long flags; 6302244b95aSChristoph Lameter struct zone *zone; 6312244b95aSChristoph Lameter 6322244b95aSChristoph Lameter zone = page_zone(page); 6332244b95aSChristoph Lameter local_irq_save(flags); 634ca889e6cSChristoph Lameter __inc_zone_state(zone, item); 6352244b95aSChristoph Lameter local_irq_restore(flags); 6362244b95aSChristoph Lameter } 6372244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 6382244b95aSChristoph Lameter 6392244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 6402244b95aSChristoph Lameter { 6412244b95aSChristoph Lameter unsigned long flags; 6422244b95aSChristoph Lameter 6432244b95aSChristoph Lameter local_irq_save(flags); 644a302eb4eSChristoph Lameter __dec_zone_page_state(page, item); 6452244b95aSChristoph Lameter local_irq_restore(flags); 6462244b95aSChristoph Lameter } 6472244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 6482244b95aSChristoph Lameter 64975ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 65075ef7184SMel Gorman { 65175ef7184SMel Gorman unsigned long flags; 65275ef7184SMel Gorman 65375ef7184SMel Gorman local_irq_save(flags); 65475ef7184SMel Gorman __inc_node_state(pgdat, item); 65575ef7184SMel Gorman local_irq_restore(flags); 65675ef7184SMel Gorman } 65775ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state); 65875ef7184SMel Gorman 65975ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 66075ef7184SMel Gorman long delta) 66175ef7184SMel Gorman { 66275ef7184SMel Gorman unsigned long flags; 66375ef7184SMel Gorman 66475ef7184SMel Gorman local_irq_save(flags); 66575ef7184SMel Gorman __mod_node_page_state(pgdat, item, delta); 66675ef7184SMel Gorman local_irq_restore(flags); 66775ef7184SMel Gorman } 66875ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 66975ef7184SMel Gorman 67075ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 67175ef7184SMel Gorman { 67275ef7184SMel Gorman unsigned long flags; 67375ef7184SMel Gorman struct pglist_data *pgdat; 67475ef7184SMel Gorman 67575ef7184SMel Gorman pgdat = page_pgdat(page); 67675ef7184SMel Gorman local_irq_save(flags); 67775ef7184SMel Gorman __inc_node_state(pgdat, item); 67875ef7184SMel Gorman local_irq_restore(flags); 67975ef7184SMel Gorman } 68075ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 68175ef7184SMel Gorman 68275ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 68375ef7184SMel Gorman { 68475ef7184SMel Gorman unsigned long flags; 68575ef7184SMel Gorman 68675ef7184SMel Gorman local_irq_save(flags); 68775ef7184SMel Gorman __dec_node_page_state(page, item); 68875ef7184SMel Gorman local_irq_restore(flags); 68975ef7184SMel Gorman } 69075ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 69175ef7184SMel Gorman #endif 6927cc36bbdSChristoph Lameter 6937cc36bbdSChristoph Lameter /* 6947cc36bbdSChristoph Lameter * Fold a differential into the global counters. 6957cc36bbdSChristoph Lameter * Returns the number of counters updated. 6967cc36bbdSChristoph Lameter */ 6973a321d2aSKemi Wang #ifdef CONFIG_NUMA 6983a321d2aSKemi Wang static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) 6993a321d2aSKemi Wang { 7003a321d2aSKemi Wang int i; 7013a321d2aSKemi Wang int changes = 0; 7023a321d2aSKemi Wang 7033a321d2aSKemi Wang for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 7043a321d2aSKemi Wang if (zone_diff[i]) { 7053a321d2aSKemi Wang atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 7063a321d2aSKemi Wang changes++; 7073a321d2aSKemi Wang } 7083a321d2aSKemi Wang 7093a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 7103a321d2aSKemi Wang if (numa_diff[i]) { 7113a321d2aSKemi Wang atomic_long_add(numa_diff[i], &vm_numa_stat[i]); 7123a321d2aSKemi Wang changes++; 7133a321d2aSKemi Wang } 7143a321d2aSKemi Wang 7153a321d2aSKemi Wang for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 7163a321d2aSKemi Wang if (node_diff[i]) { 7173a321d2aSKemi Wang atomic_long_add(node_diff[i], &vm_node_stat[i]); 7183a321d2aSKemi Wang changes++; 7193a321d2aSKemi Wang } 7203a321d2aSKemi Wang return changes; 7213a321d2aSKemi Wang } 7223a321d2aSKemi Wang #else 72375ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff) 7244edb0748SChristoph Lameter { 7254edb0748SChristoph Lameter int i; 7267cc36bbdSChristoph Lameter int changes = 0; 7274edb0748SChristoph Lameter 7284edb0748SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 72975ef7184SMel Gorman if (zone_diff[i]) { 73075ef7184SMel Gorman atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 73175ef7184SMel Gorman changes++; 73275ef7184SMel Gorman } 73375ef7184SMel Gorman 73475ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 73575ef7184SMel Gorman if (node_diff[i]) { 73675ef7184SMel Gorman atomic_long_add(node_diff[i], &vm_node_stat[i]); 7377cc36bbdSChristoph Lameter changes++; 7387cc36bbdSChristoph Lameter } 7397cc36bbdSChristoph Lameter return changes; 7404edb0748SChristoph Lameter } 7413a321d2aSKemi Wang #endif /* CONFIG_NUMA */ 7424edb0748SChristoph Lameter 7432244b95aSChristoph Lameter /* 7442bb921e5SChristoph Lameter * Update the zone counters for the current cpu. 745a7f75e25SChristoph Lameter * 7464037d452SChristoph Lameter * Note that refresh_cpu_vm_stats strives to only access 7474037d452SChristoph Lameter * node local memory. The per cpu pagesets on remote zones are placed 7484037d452SChristoph Lameter * in the memory local to the processor using that pageset. So the 7494037d452SChristoph Lameter * loop over all zones will access a series of cachelines local to 7504037d452SChristoph Lameter * the processor. 7514037d452SChristoph Lameter * 7524037d452SChristoph Lameter * The call to zone_page_state_add updates the cachelines with the 7534037d452SChristoph Lameter * statistics in the remote zone struct as well as the global cachelines 7544037d452SChristoph Lameter * with the global counters. These could cause remote node cache line 7554037d452SChristoph Lameter * bouncing and will have to be only done when necessary. 7567cc36bbdSChristoph Lameter * 7577cc36bbdSChristoph Lameter * The function returns the number of global counters updated. 7582244b95aSChristoph Lameter */ 7590eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets) 7602244b95aSChristoph Lameter { 76175ef7184SMel Gorman struct pglist_data *pgdat; 7622244b95aSChristoph Lameter struct zone *zone; 7632244b95aSChristoph Lameter int i; 76475ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 7653a321d2aSKemi Wang #ifdef CONFIG_NUMA 7663a321d2aSKemi Wang int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; 7673a321d2aSKemi Wang #endif 76875ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 7697cc36bbdSChristoph Lameter int changes = 0; 7702244b95aSChristoph Lameter 771ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 772fbc2edb0SChristoph Lameter struct per_cpu_pageset __percpu *p = zone->pageset; 7732244b95aSChristoph Lameter 774fbc2edb0SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 775a7f75e25SChristoph Lameter int v; 776a7f75e25SChristoph Lameter 777fbc2edb0SChristoph Lameter v = this_cpu_xchg(p->vm_stat_diff[i], 0); 778fbc2edb0SChristoph Lameter if (v) { 779fbc2edb0SChristoph Lameter 780a7f75e25SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 78175ef7184SMel Gorman global_zone_diff[i] += v; 7824037d452SChristoph Lameter #ifdef CONFIG_NUMA 7834037d452SChristoph Lameter /* 3 seconds idle till flush */ 784fbc2edb0SChristoph Lameter __this_cpu_write(p->expire, 3); 7854037d452SChristoph Lameter #endif 7862244b95aSChristoph Lameter } 787fbc2edb0SChristoph Lameter } 7884037d452SChristoph Lameter #ifdef CONFIG_NUMA 7893a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { 7903a321d2aSKemi Wang int v; 7913a321d2aSKemi Wang 7923a321d2aSKemi Wang v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0); 7933a321d2aSKemi Wang if (v) { 7943a321d2aSKemi Wang 7953a321d2aSKemi Wang atomic_long_add(v, &zone->vm_numa_stat[i]); 7963a321d2aSKemi Wang global_numa_diff[i] += v; 7973a321d2aSKemi Wang __this_cpu_write(p->expire, 3); 7983a321d2aSKemi Wang } 7993a321d2aSKemi Wang } 8003a321d2aSKemi Wang 8010eb77e98SChristoph Lameter if (do_pagesets) { 8020eb77e98SChristoph Lameter cond_resched(); 8034037d452SChristoph Lameter /* 8044037d452SChristoph Lameter * Deal with draining the remote pageset of this 8054037d452SChristoph Lameter * processor 8064037d452SChristoph Lameter * 8074037d452SChristoph Lameter * Check if there are pages remaining in this pageset 8084037d452SChristoph Lameter * if not then there is nothing to expire. 8094037d452SChristoph Lameter */ 810fbc2edb0SChristoph Lameter if (!__this_cpu_read(p->expire) || 811fbc2edb0SChristoph Lameter !__this_cpu_read(p->pcp.count)) 8124037d452SChristoph Lameter continue; 8134037d452SChristoph Lameter 8144037d452SChristoph Lameter /* 8154037d452SChristoph Lameter * We never drain zones local to this processor. 8164037d452SChristoph Lameter */ 8174037d452SChristoph Lameter if (zone_to_nid(zone) == numa_node_id()) { 818fbc2edb0SChristoph Lameter __this_cpu_write(p->expire, 0); 8194037d452SChristoph Lameter continue; 8204037d452SChristoph Lameter } 8214037d452SChristoph Lameter 822fbc2edb0SChristoph Lameter if (__this_cpu_dec_return(p->expire)) 8234037d452SChristoph Lameter continue; 8244037d452SChristoph Lameter 8257cc36bbdSChristoph Lameter if (__this_cpu_read(p->pcp.count)) { 8267c8e0181SChristoph Lameter drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); 8277cc36bbdSChristoph Lameter changes++; 8287cc36bbdSChristoph Lameter } 8290eb77e98SChristoph Lameter } 8304037d452SChristoph Lameter #endif 8312244b95aSChristoph Lameter } 83275ef7184SMel Gorman 83375ef7184SMel Gorman for_each_online_pgdat(pgdat) { 83475ef7184SMel Gorman struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats; 83575ef7184SMel Gorman 83675ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 83775ef7184SMel Gorman int v; 83875ef7184SMel Gorman 83975ef7184SMel Gorman v = this_cpu_xchg(p->vm_node_stat_diff[i], 0); 84075ef7184SMel Gorman if (v) { 84175ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 84275ef7184SMel Gorman global_node_diff[i] += v; 84375ef7184SMel Gorman } 84475ef7184SMel Gorman } 84575ef7184SMel Gorman } 84675ef7184SMel Gorman 8473a321d2aSKemi Wang #ifdef CONFIG_NUMA 8483a321d2aSKemi Wang changes += fold_diff(global_zone_diff, global_numa_diff, 8493a321d2aSKemi Wang global_node_diff); 8503a321d2aSKemi Wang #else 85175ef7184SMel Gorman changes += fold_diff(global_zone_diff, global_node_diff); 8523a321d2aSKemi Wang #endif 8537cc36bbdSChristoph Lameter return changes; 8542244b95aSChristoph Lameter } 8552244b95aSChristoph Lameter 85640f4b1eaSCody P Schafer /* 8572bb921e5SChristoph Lameter * Fold the data for an offline cpu into the global array. 8582bb921e5SChristoph Lameter * There cannot be any access by the offline cpu and therefore 8592bb921e5SChristoph Lameter * synchronization is simplified. 8602bb921e5SChristoph Lameter */ 8612bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu) 8622bb921e5SChristoph Lameter { 86375ef7184SMel Gorman struct pglist_data *pgdat; 8642bb921e5SChristoph Lameter struct zone *zone; 8652bb921e5SChristoph Lameter int i; 86675ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 8673a321d2aSKemi Wang #ifdef CONFIG_NUMA 8683a321d2aSKemi Wang int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; 8693a321d2aSKemi Wang #endif 87075ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 8712bb921e5SChristoph Lameter 8722bb921e5SChristoph Lameter for_each_populated_zone(zone) { 8732bb921e5SChristoph Lameter struct per_cpu_pageset *p; 8742bb921e5SChristoph Lameter 8752bb921e5SChristoph Lameter p = per_cpu_ptr(zone->pageset, cpu); 8762bb921e5SChristoph Lameter 8772bb921e5SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 8782bb921e5SChristoph Lameter if (p->vm_stat_diff[i]) { 8792bb921e5SChristoph Lameter int v; 8802bb921e5SChristoph Lameter 8812bb921e5SChristoph Lameter v = p->vm_stat_diff[i]; 8822bb921e5SChristoph Lameter p->vm_stat_diff[i] = 0; 8832bb921e5SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 88475ef7184SMel Gorman global_zone_diff[i] += v; 8852bb921e5SChristoph Lameter } 8863a321d2aSKemi Wang 8873a321d2aSKemi Wang #ifdef CONFIG_NUMA 8883a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 8893a321d2aSKemi Wang if (p->vm_numa_stat_diff[i]) { 8903a321d2aSKemi Wang int v; 8913a321d2aSKemi Wang 8923a321d2aSKemi Wang v = p->vm_numa_stat_diff[i]; 8933a321d2aSKemi Wang p->vm_numa_stat_diff[i] = 0; 8943a321d2aSKemi Wang atomic_long_add(v, &zone->vm_numa_stat[i]); 8953a321d2aSKemi Wang global_numa_diff[i] += v; 8963a321d2aSKemi Wang } 8973a321d2aSKemi Wang #endif 8982bb921e5SChristoph Lameter } 8992bb921e5SChristoph Lameter 90075ef7184SMel Gorman for_each_online_pgdat(pgdat) { 90175ef7184SMel Gorman struct per_cpu_nodestat *p; 90275ef7184SMel Gorman 90375ef7184SMel Gorman p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); 90475ef7184SMel Gorman 90575ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 90675ef7184SMel Gorman if (p->vm_node_stat_diff[i]) { 90775ef7184SMel Gorman int v; 90875ef7184SMel Gorman 90975ef7184SMel Gorman v = p->vm_node_stat_diff[i]; 91075ef7184SMel Gorman p->vm_node_stat_diff[i] = 0; 91175ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 91275ef7184SMel Gorman global_node_diff[i] += v; 91375ef7184SMel Gorman } 91475ef7184SMel Gorman } 91575ef7184SMel Gorman 9163a321d2aSKemi Wang #ifdef CONFIG_NUMA 9173a321d2aSKemi Wang fold_diff(global_zone_diff, global_numa_diff, global_node_diff); 9183a321d2aSKemi Wang #else 91975ef7184SMel Gorman fold_diff(global_zone_diff, global_node_diff); 9203a321d2aSKemi Wang #endif 9212bb921e5SChristoph Lameter } 9222bb921e5SChristoph Lameter 9232bb921e5SChristoph Lameter /* 92440f4b1eaSCody P Schafer * this is only called if !populated_zone(zone), which implies no other users of 92540f4b1eaSCody P Schafer * pset->vm_stat_diff[] exsist. 92640f4b1eaSCody P Schafer */ 9275a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) 9285a883813SMinchan Kim { 9295a883813SMinchan Kim int i; 9305a883813SMinchan Kim 9315a883813SMinchan Kim for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 9325a883813SMinchan Kim if (pset->vm_stat_diff[i]) { 9335a883813SMinchan Kim int v = pset->vm_stat_diff[i]; 9345a883813SMinchan Kim pset->vm_stat_diff[i] = 0; 9355a883813SMinchan Kim atomic_long_add(v, &zone->vm_stat[i]); 93675ef7184SMel Gorman atomic_long_add(v, &vm_zone_stat[i]); 9375a883813SMinchan Kim } 9383a321d2aSKemi Wang 9393a321d2aSKemi Wang #ifdef CONFIG_NUMA 9403a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 9413a321d2aSKemi Wang if (pset->vm_numa_stat_diff[i]) { 9423a321d2aSKemi Wang int v = pset->vm_numa_stat_diff[i]; 9433a321d2aSKemi Wang 9443a321d2aSKemi Wang pset->vm_numa_stat_diff[i] = 0; 9453a321d2aSKemi Wang atomic_long_add(v, &zone->vm_numa_stat[i]); 9463a321d2aSKemi Wang atomic_long_add(v, &vm_numa_stat[i]); 9473a321d2aSKemi Wang } 9483a321d2aSKemi Wang #endif 9495a883813SMinchan Kim } 9502244b95aSChristoph Lameter #endif 9512244b95aSChristoph Lameter 952ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA 9533a321d2aSKemi Wang void __inc_numa_state(struct zone *zone, 9543a321d2aSKemi Wang enum numa_stat_item item) 9553a321d2aSKemi Wang { 9563a321d2aSKemi Wang struct per_cpu_pageset __percpu *pcp = zone->pageset; 9571d90ca89SKemi Wang u16 __percpu *p = pcp->vm_numa_stat_diff + item; 9581d90ca89SKemi Wang u16 v; 9593a321d2aSKemi Wang 9603a321d2aSKemi Wang v = __this_cpu_inc_return(*p); 9613a321d2aSKemi Wang 9621d90ca89SKemi Wang if (unlikely(v > NUMA_STATS_THRESHOLD)) { 9631d90ca89SKemi Wang zone_numa_state_add(v, zone, item); 9641d90ca89SKemi Wang __this_cpu_write(*p, 0); 9653a321d2aSKemi Wang } 9663a321d2aSKemi Wang } 9673a321d2aSKemi Wang 968ca889e6cSChristoph Lameter /* 96975ef7184SMel Gorman * Determine the per node value of a stat item. This function 97075ef7184SMel Gorman * is called frequently in a NUMA machine, so try to be as 97175ef7184SMel Gorman * frugal as possible. 972c2d42c16SAndrew Morton */ 97375ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node, 97475ef7184SMel Gorman enum zone_stat_item item) 975c2d42c16SAndrew Morton { 976c2d42c16SAndrew Morton struct zone *zones = NODE_DATA(node)->node_zones; 977e87d59f7SJoonsoo Kim int i; 978e87d59f7SJoonsoo Kim unsigned long count = 0; 979c2d42c16SAndrew Morton 980e87d59f7SJoonsoo Kim for (i = 0; i < MAX_NR_ZONES; i++) 981e87d59f7SJoonsoo Kim count += zone_page_state(zones + i, item); 982e87d59f7SJoonsoo Kim 983e87d59f7SJoonsoo Kim return count; 984c2d42c16SAndrew Morton } 985c2d42c16SAndrew Morton 98663803222SKemi Wang /* 98763803222SKemi Wang * Determine the per node value of a numa stat item. To avoid deviation, 98863803222SKemi Wang * the per cpu stat number in vm_numa_stat_diff[] is also included. 98963803222SKemi Wang */ 9903a321d2aSKemi Wang unsigned long sum_zone_numa_state(int node, 9913a321d2aSKemi Wang enum numa_stat_item item) 9923a321d2aSKemi Wang { 9933a321d2aSKemi Wang struct zone *zones = NODE_DATA(node)->node_zones; 9943a321d2aSKemi Wang int i; 9953a321d2aSKemi Wang unsigned long count = 0; 9963a321d2aSKemi Wang 9973a321d2aSKemi Wang for (i = 0; i < MAX_NR_ZONES; i++) 99863803222SKemi Wang count += zone_numa_state_snapshot(zones + i, item); 9993a321d2aSKemi Wang 10003a321d2aSKemi Wang return count; 10013a321d2aSKemi Wang } 10023a321d2aSKemi Wang 100375ef7184SMel Gorman /* 100475ef7184SMel Gorman * Determine the per node value of a stat item. 100575ef7184SMel Gorman */ 1006ea426c2aSRoman Gushchin unsigned long node_page_state_pages(struct pglist_data *pgdat, 100775ef7184SMel Gorman enum node_stat_item item) 100875ef7184SMel Gorman { 100975ef7184SMel Gorman long x = atomic_long_read(&pgdat->vm_stat[item]); 101075ef7184SMel Gorman #ifdef CONFIG_SMP 101175ef7184SMel Gorman if (x < 0) 101275ef7184SMel Gorman x = 0; 101375ef7184SMel Gorman #endif 101475ef7184SMel Gorman return x; 101575ef7184SMel Gorman } 1016ea426c2aSRoman Gushchin 1017ea426c2aSRoman Gushchin unsigned long node_page_state(struct pglist_data *pgdat, 1018ea426c2aSRoman Gushchin enum node_stat_item item) 1019ea426c2aSRoman Gushchin { 1020ea426c2aSRoman Gushchin VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 1021ea426c2aSRoman Gushchin 1022ea426c2aSRoman Gushchin return node_page_state_pages(pgdat, item); 1023ea426c2aSRoman Gushchin } 1024ca889e6cSChristoph Lameter #endif 1025ca889e6cSChristoph Lameter 1026d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION 102736deb0beSNamhyung Kim 1028d7a5752cSMel Gorman struct contig_page_info { 1029d7a5752cSMel Gorman unsigned long free_pages; 1030d7a5752cSMel Gorman unsigned long free_blocks_total; 1031d7a5752cSMel Gorman unsigned long free_blocks_suitable; 1032d7a5752cSMel Gorman }; 1033d7a5752cSMel Gorman 1034d7a5752cSMel Gorman /* 1035d7a5752cSMel Gorman * Calculate the number of free pages in a zone, how many contiguous 1036d7a5752cSMel Gorman * pages are free and how many are large enough to satisfy an allocation of 1037d7a5752cSMel Gorman * the target size. Note that this function makes no attempt to estimate 1038d7a5752cSMel Gorman * how many suitable free blocks there *might* be if MOVABLE pages were 1039d7a5752cSMel Gorman * migrated. Calculating that is possible, but expensive and can be 1040d7a5752cSMel Gorman * figured out from userspace 1041d7a5752cSMel Gorman */ 1042d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone, 1043d7a5752cSMel Gorman unsigned int suitable_order, 1044d7a5752cSMel Gorman struct contig_page_info *info) 1045d7a5752cSMel Gorman { 1046d7a5752cSMel Gorman unsigned int order; 1047d7a5752cSMel Gorman 1048d7a5752cSMel Gorman info->free_pages = 0; 1049d7a5752cSMel Gorman info->free_blocks_total = 0; 1050d7a5752cSMel Gorman info->free_blocks_suitable = 0; 1051d7a5752cSMel Gorman 1052d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; order++) { 1053d7a5752cSMel Gorman unsigned long blocks; 1054d7a5752cSMel Gorman 1055d7a5752cSMel Gorman /* Count number of free blocks */ 1056d7a5752cSMel Gorman blocks = zone->free_area[order].nr_free; 1057d7a5752cSMel Gorman info->free_blocks_total += blocks; 1058d7a5752cSMel Gorman 1059d7a5752cSMel Gorman /* Count free base pages */ 1060d7a5752cSMel Gorman info->free_pages += blocks << order; 1061d7a5752cSMel Gorman 1062d7a5752cSMel Gorman /* Count the suitable free blocks */ 1063d7a5752cSMel Gorman if (order >= suitable_order) 1064d7a5752cSMel Gorman info->free_blocks_suitable += blocks << 1065d7a5752cSMel Gorman (order - suitable_order); 1066d7a5752cSMel Gorman } 1067d7a5752cSMel Gorman } 1068f1a5ab12SMel Gorman 1069f1a5ab12SMel Gorman /* 1070f1a5ab12SMel Gorman * A fragmentation index only makes sense if an allocation of a requested 1071f1a5ab12SMel Gorman * size would fail. If that is true, the fragmentation index indicates 1072f1a5ab12SMel Gorman * whether external fragmentation or a lack of memory was the problem. 1073f1a5ab12SMel Gorman * The value can be used to determine if page reclaim or compaction 1074f1a5ab12SMel Gorman * should be used 1075f1a5ab12SMel Gorman */ 107656de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 1077f1a5ab12SMel Gorman { 1078f1a5ab12SMel Gorman unsigned long requested = 1UL << order; 1079f1a5ab12SMel Gorman 108088d6ac40SWen Yang if (WARN_ON_ONCE(order >= MAX_ORDER)) 108188d6ac40SWen Yang return 0; 108288d6ac40SWen Yang 1083f1a5ab12SMel Gorman if (!info->free_blocks_total) 1084f1a5ab12SMel Gorman return 0; 1085f1a5ab12SMel Gorman 1086f1a5ab12SMel Gorman /* Fragmentation index only makes sense when a request would fail */ 1087f1a5ab12SMel Gorman if (info->free_blocks_suitable) 1088f1a5ab12SMel Gorman return -1000; 1089f1a5ab12SMel Gorman 1090f1a5ab12SMel Gorman /* 1091f1a5ab12SMel Gorman * Index is between 0 and 1 so return within 3 decimal places 1092f1a5ab12SMel Gorman * 1093f1a5ab12SMel Gorman * 0 => allocation would fail due to lack of memory 1094f1a5ab12SMel Gorman * 1 => allocation would fail due to fragmentation 1095f1a5ab12SMel Gorman */ 1096f1a5ab12SMel Gorman return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 1097f1a5ab12SMel Gorman } 109856de7263SMel Gorman 1099facdaa91SNitin Gupta /* 1100facdaa91SNitin Gupta * Calculates external fragmentation within a zone wrt the given order. 1101facdaa91SNitin Gupta * It is defined as the percentage of pages found in blocks of size 1102facdaa91SNitin Gupta * less than 1 << order. It returns values in range [0, 100]. 1103facdaa91SNitin Gupta */ 1104d34c0a75SNitin Gupta unsigned int extfrag_for_order(struct zone *zone, unsigned int order) 1105facdaa91SNitin Gupta { 1106facdaa91SNitin Gupta struct contig_page_info info; 1107facdaa91SNitin Gupta 1108facdaa91SNitin Gupta fill_contig_page_info(zone, order, &info); 1109facdaa91SNitin Gupta if (info.free_pages == 0) 1110facdaa91SNitin Gupta return 0; 1111facdaa91SNitin Gupta 1112facdaa91SNitin Gupta return div_u64((info.free_pages - 1113facdaa91SNitin Gupta (info.free_blocks_suitable << order)) * 100, 1114facdaa91SNitin Gupta info.free_pages); 1115facdaa91SNitin Gupta } 1116facdaa91SNitin Gupta 111756de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */ 111856de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order) 111956de7263SMel Gorman { 112056de7263SMel Gorman struct contig_page_info info; 112156de7263SMel Gorman 112256de7263SMel Gorman fill_contig_page_info(zone, order, &info); 112356de7263SMel Gorman return __fragmentation_index(order, &info); 112456de7263SMel Gorman } 1125d7a5752cSMel Gorman #endif 1126d7a5752cSMel Gorman 1127ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \ 1128ebc5d83dSKonstantin Khlebnikov defined(CONFIG_NUMA) || defined(CONFIG_MEMCG) 1129fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA 1130fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma", 1131fa25c503SKOSAKI Motohiro #else 1132fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) 1133fa25c503SKOSAKI Motohiro #endif 1134fa25c503SKOSAKI Motohiro 1135fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32 1136fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32", 1137fa25c503SKOSAKI Motohiro #else 1138fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) 1139fa25c503SKOSAKI Motohiro #endif 1140fa25c503SKOSAKI Motohiro 1141fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM 1142fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high", 1143fa25c503SKOSAKI Motohiro #else 1144fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) 1145fa25c503SKOSAKI Motohiro #endif 1146fa25c503SKOSAKI Motohiro 1147fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 1148fa25c503SKOSAKI Motohiro TEXT_FOR_HIGHMEM(xx) xx "_movable", 1149fa25c503SKOSAKI Motohiro 1150fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = { 11518d92890bSNeilBrown /* enum zone_stat_item counters */ 1152fa25c503SKOSAKI Motohiro "nr_free_pages", 115371c799f4SMinchan Kim "nr_zone_inactive_anon", 115471c799f4SMinchan Kim "nr_zone_active_anon", 115571c799f4SMinchan Kim "nr_zone_inactive_file", 115671c799f4SMinchan Kim "nr_zone_active_file", 115771c799f4SMinchan Kim "nr_zone_unevictable", 11585a1c84b4SMel Gorman "nr_zone_write_pending", 1159fa25c503SKOSAKI Motohiro "nr_mlock", 1160fa25c503SKOSAKI Motohiro "nr_bounce", 116191537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC) 116291537feeSMinchan Kim "nr_zspages", 116391537feeSMinchan Kim #endif 11643a321d2aSKemi Wang "nr_free_cma", 11653a321d2aSKemi Wang 11663a321d2aSKemi Wang /* enum numa_stat_item counters */ 1167fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1168fa25c503SKOSAKI Motohiro "numa_hit", 1169fa25c503SKOSAKI Motohiro "numa_miss", 1170fa25c503SKOSAKI Motohiro "numa_foreign", 1171fa25c503SKOSAKI Motohiro "numa_interleave", 1172fa25c503SKOSAKI Motohiro "numa_local", 1173fa25c503SKOSAKI Motohiro "numa_other", 1174fa25c503SKOSAKI Motohiro #endif 117509316c09SKonstantin Khlebnikov 11769d7ea9a2SKonstantin Khlebnikov /* enum node_stat_item counters */ 1177599d0c95SMel Gorman "nr_inactive_anon", 1178599d0c95SMel Gorman "nr_active_anon", 1179599d0c95SMel Gorman "nr_inactive_file", 1180599d0c95SMel Gorman "nr_active_file", 1181599d0c95SMel Gorman "nr_unevictable", 1182385386cfSJohannes Weiner "nr_slab_reclaimable", 1183385386cfSJohannes Weiner "nr_slab_unreclaimable", 1184599d0c95SMel Gorman "nr_isolated_anon", 1185599d0c95SMel Gorman "nr_isolated_file", 118668d48e6aSJohannes Weiner "workingset_nodes", 1187170b04b7SJoonsoo Kim "workingset_refault_anon", 1188170b04b7SJoonsoo Kim "workingset_refault_file", 1189170b04b7SJoonsoo Kim "workingset_activate_anon", 1190170b04b7SJoonsoo Kim "workingset_activate_file", 1191170b04b7SJoonsoo Kim "workingset_restore_anon", 1192170b04b7SJoonsoo Kim "workingset_restore_file", 11931e6b1085SMel Gorman "workingset_nodereclaim", 119450658e2eSMel Gorman "nr_anon_pages", 119550658e2eSMel Gorman "nr_mapped", 119611fb9989SMel Gorman "nr_file_pages", 119711fb9989SMel Gorman "nr_dirty", 119811fb9989SMel Gorman "nr_writeback", 119911fb9989SMel Gorman "nr_writeback_temp", 120011fb9989SMel Gorman "nr_shmem", 120111fb9989SMel Gorman "nr_shmem_hugepages", 120211fb9989SMel Gorman "nr_shmem_pmdmapped", 120360fbf0abSSong Liu "nr_file_hugepages", 120460fbf0abSSong Liu "nr_file_pmdmapped", 120511fb9989SMel Gorman "nr_anon_transparent_hugepages", 1206c4a25635SMel Gorman "nr_vmscan_write", 1207c4a25635SMel Gorman "nr_vmscan_immediate_reclaim", 1208c4a25635SMel Gorman "nr_dirtied", 1209c4a25635SMel Gorman "nr_written", 1210b29940c1SVlastimil Babka "nr_kernel_misc_reclaimable", 12111970dc6fSJohn Hubbard "nr_foll_pin_acquired", 12121970dc6fSJohn Hubbard "nr_foll_pin_released", 1213991e7673SShakeel Butt "nr_kernel_stack", 1214991e7673SShakeel Butt #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 1215991e7673SShakeel Butt "nr_shadow_call_stack", 1216991e7673SShakeel Butt #endif 1217f0c0c115SShakeel Butt "nr_page_table_pages", 1218b6038942SShakeel Butt #ifdef CONFIG_SWAP 1219b6038942SShakeel Butt "nr_swapcached", 1220b6038942SShakeel Butt #endif 1221599d0c95SMel Gorman 122209316c09SKonstantin Khlebnikov /* enum writeback_stat_item counters */ 1223fa25c503SKOSAKI Motohiro "nr_dirty_threshold", 1224fa25c503SKOSAKI Motohiro "nr_dirty_background_threshold", 1225fa25c503SKOSAKI Motohiro 1226ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) 122709316c09SKonstantin Khlebnikov /* enum vm_event_item counters */ 1228fa25c503SKOSAKI Motohiro "pgpgin", 1229fa25c503SKOSAKI Motohiro "pgpgout", 1230fa25c503SKOSAKI Motohiro "pswpin", 1231fa25c503SKOSAKI Motohiro "pswpout", 1232fa25c503SKOSAKI Motohiro 1233fa25c503SKOSAKI Motohiro TEXTS_FOR_ZONES("pgalloc") 12347cc30fcfSMel Gorman TEXTS_FOR_ZONES("allocstall") 12357cc30fcfSMel Gorman TEXTS_FOR_ZONES("pgskip") 1236fa25c503SKOSAKI Motohiro 1237fa25c503SKOSAKI Motohiro "pgfree", 1238fa25c503SKOSAKI Motohiro "pgactivate", 1239fa25c503SKOSAKI Motohiro "pgdeactivate", 1240f7ad2a6cSShaohua Li "pglazyfree", 1241fa25c503SKOSAKI Motohiro 1242fa25c503SKOSAKI Motohiro "pgfault", 1243fa25c503SKOSAKI Motohiro "pgmajfault", 1244854e9ed0SMinchan Kim "pglazyfreed", 1245fa25c503SKOSAKI Motohiro 1246599d0c95SMel Gorman "pgrefill", 1247798a6b87SPeter Xu "pgreuse", 1248599d0c95SMel Gorman "pgsteal_kswapd", 1249599d0c95SMel Gorman "pgsteal_direct", 1250599d0c95SMel Gorman "pgscan_kswapd", 1251599d0c95SMel Gorman "pgscan_direct", 125268243e76SMel Gorman "pgscan_direct_throttle", 1253497a6c1bSJohannes Weiner "pgscan_anon", 1254497a6c1bSJohannes Weiner "pgscan_file", 1255497a6c1bSJohannes Weiner "pgsteal_anon", 1256497a6c1bSJohannes Weiner "pgsteal_file", 1257fa25c503SKOSAKI Motohiro 1258fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1259fa25c503SKOSAKI Motohiro "zone_reclaim_failed", 1260fa25c503SKOSAKI Motohiro #endif 1261fa25c503SKOSAKI Motohiro "pginodesteal", 1262fa25c503SKOSAKI Motohiro "slabs_scanned", 1263fa25c503SKOSAKI Motohiro "kswapd_inodesteal", 1264fa25c503SKOSAKI Motohiro "kswapd_low_wmark_hit_quickly", 1265fa25c503SKOSAKI Motohiro "kswapd_high_wmark_hit_quickly", 1266fa25c503SKOSAKI Motohiro "pageoutrun", 1267fa25c503SKOSAKI Motohiro 1268fa25c503SKOSAKI Motohiro "pgrotated", 1269fa25c503SKOSAKI Motohiro 12705509a5d2SDave Hansen "drop_pagecache", 12715509a5d2SDave Hansen "drop_slab", 12728e675f7aSKonstantin Khlebnikov "oom_kill", 12735509a5d2SDave Hansen 127403c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING 127503c5a6e1SMel Gorman "numa_pte_updates", 127672403b4aSMel Gorman "numa_huge_pte_updates", 127703c5a6e1SMel Gorman "numa_hint_faults", 127803c5a6e1SMel Gorman "numa_hint_faults_local", 127903c5a6e1SMel Gorman "numa_pages_migrated", 128003c5a6e1SMel Gorman #endif 12815647bc29SMel Gorman #ifdef CONFIG_MIGRATION 12825647bc29SMel Gorman "pgmigrate_success", 12835647bc29SMel Gorman "pgmigrate_fail", 12841a5bae25SAnshuman Khandual "thp_migration_success", 12851a5bae25SAnshuman Khandual "thp_migration_fail", 12861a5bae25SAnshuman Khandual "thp_migration_split", 12875647bc29SMel Gorman #endif 1288fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION 1289397487dbSMel Gorman "compact_migrate_scanned", 1290397487dbSMel Gorman "compact_free_scanned", 1291397487dbSMel Gorman "compact_isolated", 1292fa25c503SKOSAKI Motohiro "compact_stall", 1293fa25c503SKOSAKI Motohiro "compact_fail", 1294fa25c503SKOSAKI Motohiro "compact_success", 1295698b1b30SVlastimil Babka "compact_daemon_wake", 12967f354a54SDavid Rientjes "compact_daemon_migrate_scanned", 12977f354a54SDavid Rientjes "compact_daemon_free_scanned", 1298fa25c503SKOSAKI Motohiro #endif 1299fa25c503SKOSAKI Motohiro 1300fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE 1301fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_success", 1302fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_fail", 1303fa25c503SKOSAKI Motohiro #endif 1304fa25c503SKOSAKI Motohiro "unevictable_pgs_culled", 1305fa25c503SKOSAKI Motohiro "unevictable_pgs_scanned", 1306fa25c503SKOSAKI Motohiro "unevictable_pgs_rescued", 1307fa25c503SKOSAKI Motohiro "unevictable_pgs_mlocked", 1308fa25c503SKOSAKI Motohiro "unevictable_pgs_munlocked", 1309fa25c503SKOSAKI Motohiro "unevictable_pgs_cleared", 1310fa25c503SKOSAKI Motohiro "unevictable_pgs_stranded", 1311fa25c503SKOSAKI Motohiro 1312fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1313fa25c503SKOSAKI Motohiro "thp_fault_alloc", 1314fa25c503SKOSAKI Motohiro "thp_fault_fallback", 131585b9f46eSDavid Rientjes "thp_fault_fallback_charge", 1316fa25c503SKOSAKI Motohiro "thp_collapse_alloc", 1317fa25c503SKOSAKI Motohiro "thp_collapse_alloc_failed", 131895ecedcdSKirill A. Shutemov "thp_file_alloc", 1319dcdf11eeSDavid Rientjes "thp_file_fallback", 132085b9f46eSDavid Rientjes "thp_file_fallback_charge", 132195ecedcdSKirill A. Shutemov "thp_file_mapped", 1322122afea9SKirill A. Shutemov "thp_split_page", 1323122afea9SKirill A. Shutemov "thp_split_page_failed", 1324f9719a03SKirill A. Shutemov "thp_deferred_split_page", 1325122afea9SKirill A. Shutemov "thp_split_pmd", 1326ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 1327ce9311cfSYisheng Xie "thp_split_pud", 1328ce9311cfSYisheng Xie #endif 1329d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc", 1330d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc_failed", 1331225311a4SHuang Ying "thp_swpout", 1332fe490cc0SHuang Ying "thp_swpout_fallback", 1333fa25c503SKOSAKI Motohiro #endif 133409316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON 133509316c09SKonstantin Khlebnikov "balloon_inflate", 133609316c09SKonstantin Khlebnikov "balloon_deflate", 133709316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION 133809316c09SKonstantin Khlebnikov "balloon_migrate", 133909316c09SKonstantin Khlebnikov #endif 134009316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */ 1341ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH 13429824cf97SDave Hansen "nr_tlb_remote_flush", 13439824cf97SDave Hansen "nr_tlb_remote_flush_received", 13449824cf97SDave Hansen "nr_tlb_local_flush_all", 13459824cf97SDave Hansen "nr_tlb_local_flush_one", 1346ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */ 1347fa25c503SKOSAKI Motohiro 13484f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE 13494f115147SDavidlohr Bueso "vmacache_find_calls", 13504f115147SDavidlohr Bueso "vmacache_find_hits", 13514f115147SDavidlohr Bueso #endif 1352cbc65df2SHuang Ying #ifdef CONFIG_SWAP 1353cbc65df2SHuang Ying "swap_ra", 1354cbc65df2SHuang Ying "swap_ra_hit", 1355cbc65df2SHuang Ying #endif 1356ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ 1357fa25c503SKOSAKI Motohiro }; 1358ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ 1359fa25c503SKOSAKI Motohiro 13603c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ 13613c486871SAndrew Morton defined(CONFIG_PROC_FS) 13623c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos) 13633c486871SAndrew Morton { 13643c486871SAndrew Morton pg_data_t *pgdat; 13653c486871SAndrew Morton loff_t node = *pos; 13663c486871SAndrew Morton 13673c486871SAndrew Morton for (pgdat = first_online_pgdat(); 13683c486871SAndrew Morton pgdat && node; 13693c486871SAndrew Morton pgdat = next_online_pgdat(pgdat)) 13703c486871SAndrew Morton --node; 13713c486871SAndrew Morton 13723c486871SAndrew Morton return pgdat; 13733c486871SAndrew Morton } 13743c486871SAndrew Morton 13753c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 13763c486871SAndrew Morton { 13773c486871SAndrew Morton pg_data_t *pgdat = (pg_data_t *)arg; 13783c486871SAndrew Morton 13793c486871SAndrew Morton (*pos)++; 13803c486871SAndrew Morton return next_online_pgdat(pgdat); 13813c486871SAndrew Morton } 13823c486871SAndrew Morton 13833c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg) 13843c486871SAndrew Morton { 13853c486871SAndrew Morton } 13863c486871SAndrew Morton 1387b2bd8598SDavid Rientjes /* 1388b2bd8598SDavid Rientjes * Walk zones in a node and print using a callback. 1389b2bd8598SDavid Rientjes * If @assert_populated is true, only use callback for zones that are populated. 1390b2bd8598SDavid Rientjes */ 13913c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 1392727c080fSVinayak Menon bool assert_populated, bool nolock, 13933c486871SAndrew Morton void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 13943c486871SAndrew Morton { 13953c486871SAndrew Morton struct zone *zone; 13963c486871SAndrew Morton struct zone *node_zones = pgdat->node_zones; 13973c486871SAndrew Morton unsigned long flags; 13983c486871SAndrew Morton 13993c486871SAndrew Morton for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 1400b2bd8598SDavid Rientjes if (assert_populated && !populated_zone(zone)) 14013c486871SAndrew Morton continue; 14023c486871SAndrew Morton 1403727c080fSVinayak Menon if (!nolock) 14043c486871SAndrew Morton spin_lock_irqsave(&zone->lock, flags); 14053c486871SAndrew Morton print(m, pgdat, zone); 1406727c080fSVinayak Menon if (!nolock) 14073c486871SAndrew Morton spin_unlock_irqrestore(&zone->lock, flags); 14083c486871SAndrew Morton } 14093c486871SAndrew Morton } 14103c486871SAndrew Morton #endif 14113c486871SAndrew Morton 1412d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS 1413467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 1414467c996cSMel Gorman struct zone *zone) 1415467c996cSMel Gorman { 1416467c996cSMel Gorman int order; 1417467c996cSMel Gorman 1418f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1419f6ac2354SChristoph Lameter for (order = 0; order < MAX_ORDER; ++order) 1420f6ac2354SChristoph Lameter seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 1421f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1422f6ac2354SChristoph Lameter } 1423467c996cSMel Gorman 1424467c996cSMel Gorman /* 1425467c996cSMel Gorman * This walks the free areas for each zone. 1426467c996cSMel Gorman */ 1427467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg) 1428467c996cSMel Gorman { 1429467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1430727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, frag_show_print); 1431467c996cSMel Gorman return 0; 1432467c996cSMel Gorman } 1433467c996cSMel Gorman 1434467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m, 1435467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1436467c996cSMel Gorman { 1437467c996cSMel Gorman int order, mtype; 1438467c996cSMel Gorman 1439467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 1440467c996cSMel Gorman seq_printf(m, "Node %4d, zone %8s, type %12s ", 1441467c996cSMel Gorman pgdat->node_id, 1442467c996cSMel Gorman zone->name, 1443467c996cSMel Gorman migratetype_names[mtype]); 1444467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 1445467c996cSMel Gorman unsigned long freecount = 0; 1446467c996cSMel Gorman struct free_area *area; 1447467c996cSMel Gorman struct list_head *curr; 144893b3a674SMichal Hocko bool overflow = false; 1449467c996cSMel Gorman 1450467c996cSMel Gorman area = &(zone->free_area[order]); 1451467c996cSMel Gorman 145293b3a674SMichal Hocko list_for_each(curr, &area->free_list[mtype]) { 145393b3a674SMichal Hocko /* 145493b3a674SMichal Hocko * Cap the free_list iteration because it might 145593b3a674SMichal Hocko * be really large and we are under a spinlock 145693b3a674SMichal Hocko * so a long time spent here could trigger a 145793b3a674SMichal Hocko * hard lockup detector. Anyway this is a 145893b3a674SMichal Hocko * debugging tool so knowing there is a handful 145993b3a674SMichal Hocko * of pages of this order should be more than 146093b3a674SMichal Hocko * sufficient. 146193b3a674SMichal Hocko */ 146293b3a674SMichal Hocko if (++freecount >= 100000) { 146393b3a674SMichal Hocko overflow = true; 146493b3a674SMichal Hocko break; 146593b3a674SMichal Hocko } 146693b3a674SMichal Hocko } 146793b3a674SMichal Hocko seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount); 146893b3a674SMichal Hocko spin_unlock_irq(&zone->lock); 146993b3a674SMichal Hocko cond_resched(); 147093b3a674SMichal Hocko spin_lock_irq(&zone->lock); 1471467c996cSMel Gorman } 1472467c996cSMel Gorman seq_putc(m, '\n'); 1473467c996cSMel Gorman } 1474467c996cSMel Gorman } 1475467c996cSMel Gorman 1476467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */ 1477467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg) 1478467c996cSMel Gorman { 1479467c996cSMel Gorman int order; 1480467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1481467c996cSMel Gorman 1482467c996cSMel Gorman /* Print header */ 1483467c996cSMel Gorman seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 1484467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) 1485467c996cSMel Gorman seq_printf(m, "%6d ", order); 1486467c996cSMel Gorman seq_putc(m, '\n'); 1487467c996cSMel Gorman 1488727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print); 1489467c996cSMel Gorman 1490467c996cSMel Gorman return 0; 1491467c996cSMel Gorman } 1492467c996cSMel Gorman 1493467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m, 1494467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1495467c996cSMel Gorman { 1496467c996cSMel Gorman int mtype; 1497467c996cSMel Gorman unsigned long pfn; 1498467c996cSMel Gorman unsigned long start_pfn = zone->zone_start_pfn; 1499108bcc96SCody P Schafer unsigned long end_pfn = zone_end_pfn(zone); 1500467c996cSMel Gorman unsigned long count[MIGRATE_TYPES] = { 0, }; 1501467c996cSMel Gorman 1502467c996cSMel Gorman for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 1503467c996cSMel Gorman struct page *page; 1504467c996cSMel Gorman 1505d336e94eSMichal Hocko page = pfn_to_online_page(pfn); 1506d336e94eSMichal Hocko if (!page) 1507467c996cSMel Gorman continue; 1508467c996cSMel Gorman 1509a91c43c7SJoonsoo Kim if (page_zone(page) != zone) 1510a91c43c7SJoonsoo Kim continue; 1511a91c43c7SJoonsoo Kim 1512467c996cSMel Gorman mtype = get_pageblock_migratetype(page); 1513467c996cSMel Gorman 1514e80d6a24SMel Gorman if (mtype < MIGRATE_TYPES) 1515467c996cSMel Gorman count[mtype]++; 1516467c996cSMel Gorman } 1517467c996cSMel Gorman 1518467c996cSMel Gorman /* Print counts */ 1519467c996cSMel Gorman seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1520467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1521467c996cSMel Gorman seq_printf(m, "%12lu ", count[mtype]); 1522467c996cSMel Gorman seq_putc(m, '\n'); 1523467c996cSMel Gorman } 1524467c996cSMel Gorman 1525f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */ 1526467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 1527467c996cSMel Gorman { 1528467c996cSMel Gorman int mtype; 1529467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1530467c996cSMel Gorman 1531467c996cSMel Gorman seq_printf(m, "\n%-23s", "Number of blocks type "); 1532467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1533467c996cSMel Gorman seq_printf(m, "%12s ", migratetype_names[mtype]); 1534467c996cSMel Gorman seq_putc(m, '\n'); 1535727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, 1536727c080fSVinayak Menon pagetypeinfo_showblockcount_print); 1537467c996cSMel Gorman 1538467c996cSMel Gorman return 0; 1539467c996cSMel Gorman } 1540467c996cSMel Gorman 154148c96a36SJoonsoo Kim /* 154248c96a36SJoonsoo Kim * Print out the number of pageblocks for each migratetype that contain pages 154348c96a36SJoonsoo Kim * of other types. This gives an indication of how well fallbacks are being 154448c96a36SJoonsoo Kim * contained by rmqueue_fallback(). It requires information from PAGE_OWNER 154548c96a36SJoonsoo Kim * to determine what is going on 154648c96a36SJoonsoo Kim */ 154748c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) 154848c96a36SJoonsoo Kim { 154948c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER 155048c96a36SJoonsoo Kim int mtype; 155148c96a36SJoonsoo Kim 15527dd80b8aSVlastimil Babka if (!static_branch_unlikely(&page_owner_inited)) 155348c96a36SJoonsoo Kim return; 155448c96a36SJoonsoo Kim 155548c96a36SJoonsoo Kim drain_all_pages(NULL); 155648c96a36SJoonsoo Kim 155748c96a36SJoonsoo Kim seq_printf(m, "\n%-23s", "Number of mixed blocks "); 155848c96a36SJoonsoo Kim for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 155948c96a36SJoonsoo Kim seq_printf(m, "%12s ", migratetype_names[mtype]); 156048c96a36SJoonsoo Kim seq_putc(m, '\n'); 156148c96a36SJoonsoo Kim 1562727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, true, 1563727c080fSVinayak Menon pagetypeinfo_showmixedcount_print); 156448c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */ 156548c96a36SJoonsoo Kim } 156648c96a36SJoonsoo Kim 1567467c996cSMel Gorman /* 1568467c996cSMel Gorman * This prints out statistics in relation to grouping pages by mobility. 1569467c996cSMel Gorman * It is expensive to collect so do not constantly read the file. 1570467c996cSMel Gorman */ 1571467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg) 1572467c996cSMel Gorman { 1573467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1574467c996cSMel Gorman 157541b25a37SKOSAKI Motohiro /* check memoryless node */ 1576a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 157741b25a37SKOSAKI Motohiro return 0; 157841b25a37SKOSAKI Motohiro 1579467c996cSMel Gorman seq_printf(m, "Page block order: %d\n", pageblock_order); 1580467c996cSMel Gorman seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 1581467c996cSMel Gorman seq_putc(m, '\n'); 1582467c996cSMel Gorman pagetypeinfo_showfree(m, pgdat); 1583467c996cSMel Gorman pagetypeinfo_showblockcount(m, pgdat); 158448c96a36SJoonsoo Kim pagetypeinfo_showmixedcount(m, pgdat); 1585467c996cSMel Gorman 1586f6ac2354SChristoph Lameter return 0; 1587f6ac2354SChristoph Lameter } 1588f6ac2354SChristoph Lameter 15898f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = { 1590f6ac2354SChristoph Lameter .start = frag_start, 1591f6ac2354SChristoph Lameter .next = frag_next, 1592f6ac2354SChristoph Lameter .stop = frag_stop, 1593f6ac2354SChristoph Lameter .show = frag_show, 1594f6ac2354SChristoph Lameter }; 1595f6ac2354SChristoph Lameter 159674e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = { 1597467c996cSMel Gorman .start = frag_start, 1598467c996cSMel Gorman .next = frag_next, 1599467c996cSMel Gorman .stop = frag_stop, 1600467c996cSMel Gorman .show = pagetypeinfo_show, 1601467c996cSMel Gorman }; 1602467c996cSMel Gorman 1603e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) 1604e2ecc8a7SMel Gorman { 1605e2ecc8a7SMel Gorman int zid; 1606e2ecc8a7SMel Gorman 1607e2ecc8a7SMel Gorman for (zid = 0; zid < MAX_NR_ZONES; zid++) { 1608e2ecc8a7SMel Gorman struct zone *compare = &pgdat->node_zones[zid]; 1609e2ecc8a7SMel Gorman 1610e2ecc8a7SMel Gorman if (populated_zone(compare)) 1611e2ecc8a7SMel Gorman return zone == compare; 1612e2ecc8a7SMel Gorman } 1613e2ecc8a7SMel Gorman 1614e2ecc8a7SMel Gorman return false; 1615e2ecc8a7SMel Gorman } 1616e2ecc8a7SMel Gorman 1617467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 1618467c996cSMel Gorman struct zone *zone) 1619f6ac2354SChristoph Lameter { 1620f6ac2354SChristoph Lameter int i; 1621f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 1622e2ecc8a7SMel Gorman if (is_zone_first_populated(pgdat, zone)) { 1623e2ecc8a7SMel Gorman seq_printf(m, "\n per-node stats"); 1624e2ecc8a7SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 162569473e5dSMuchun Song unsigned long pages = node_page_state_pages(pgdat, i); 162669473e5dSMuchun Song 162769473e5dSMuchun Song if (vmstat_item_print_in_thp(i)) 162869473e5dSMuchun Song pages /= HPAGE_PMD_NR; 16299d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", node_stat_name(i), 163069473e5dSMuchun Song pages); 1631e2ecc8a7SMel Gorman } 1632e2ecc8a7SMel Gorman } 1633f6ac2354SChristoph Lameter seq_printf(m, 1634f6ac2354SChristoph Lameter "\n pages free %lu" 1635f6ac2354SChristoph Lameter "\n min %lu" 1636f6ac2354SChristoph Lameter "\n low %lu" 1637f6ac2354SChristoph Lameter "\n high %lu" 1638f6ac2354SChristoph Lameter "\n spanned %lu" 16399feedc9dSJiang Liu "\n present %lu" 1640*3c381db1SDavid Hildenbrand "\n managed %lu" 1641*3c381db1SDavid Hildenbrand "\n cma %lu", 164288f5acf8SMel Gorman zone_page_state(zone, NR_FREE_PAGES), 164341858966SMel Gorman min_wmark_pages(zone), 164441858966SMel Gorman low_wmark_pages(zone), 164541858966SMel Gorman high_wmark_pages(zone), 1646f6ac2354SChristoph Lameter zone->spanned_pages, 16479feedc9dSJiang Liu zone->present_pages, 1648*3c381db1SDavid Hildenbrand zone_managed_pages(zone), 1649*3c381db1SDavid Hildenbrand zone_cma_pages(zone)); 16502244b95aSChristoph Lameter 1651f6ac2354SChristoph Lameter seq_printf(m, 16523484b2deSMel Gorman "\n protection: (%ld", 1653f6ac2354SChristoph Lameter zone->lowmem_reserve[0]); 1654f6ac2354SChristoph Lameter for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 16553484b2deSMel Gorman seq_printf(m, ", %ld", zone->lowmem_reserve[i]); 16567dfb8bf3SDavid Rientjes seq_putc(m, ')'); 16577dfb8bf3SDavid Rientjes 1658a8a4b7aeSBaoquan He /* If unpopulated, no other information is useful */ 1659a8a4b7aeSBaoquan He if (!populated_zone(zone)) { 1660a8a4b7aeSBaoquan He seq_putc(m, '\n'); 1661a8a4b7aeSBaoquan He return; 1662a8a4b7aeSBaoquan He } 1663a8a4b7aeSBaoquan He 16647dfb8bf3SDavid Rientjes for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 16659d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", zone_stat_name(i), 16667dfb8bf3SDavid Rientjes zone_page_state(zone, i)); 16677dfb8bf3SDavid Rientjes 16683a321d2aSKemi Wang #ifdef CONFIG_NUMA 16693a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 16709d7ea9a2SKonstantin Khlebnikov seq_printf(m, "\n %-12s %lu", numa_stat_name(i), 167163803222SKemi Wang zone_numa_state_snapshot(zone, i)); 16723a321d2aSKemi Wang #endif 16733a321d2aSKemi Wang 16747dfb8bf3SDavid Rientjes seq_printf(m, "\n pagesets"); 1675f6ac2354SChristoph Lameter for_each_online_cpu(i) { 1676f6ac2354SChristoph Lameter struct per_cpu_pageset *pageset; 1677f6ac2354SChristoph Lameter 167899dcc3e5SChristoph Lameter pageset = per_cpu_ptr(zone->pageset, i); 1679f6ac2354SChristoph Lameter seq_printf(m, 16803dfa5721SChristoph Lameter "\n cpu: %i" 1681f6ac2354SChristoph Lameter "\n count: %i" 1682f6ac2354SChristoph Lameter "\n high: %i" 1683f6ac2354SChristoph Lameter "\n batch: %i", 16843dfa5721SChristoph Lameter i, 16853dfa5721SChristoph Lameter pageset->pcp.count, 16863dfa5721SChristoph Lameter pageset->pcp.high, 16873dfa5721SChristoph Lameter pageset->pcp.batch); 1688df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 1689df9ecabaSChristoph Lameter seq_printf(m, "\n vm stats threshold: %d", 1690df9ecabaSChristoph Lameter pageset->stat_threshold); 1691df9ecabaSChristoph Lameter #endif 1692f6ac2354SChristoph Lameter } 1693f6ac2354SChristoph Lameter seq_printf(m, 1694599d0c95SMel Gorman "\n node_unreclaimable: %u" 16953a50d14dSAndrey Ryabinin "\n start_pfn: %lu", 1696c73322d0SJohannes Weiner pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, 16973a50d14dSAndrey Ryabinin zone->zone_start_pfn); 1698f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1699f6ac2354SChristoph Lameter } 1700467c996cSMel Gorman 1701467c996cSMel Gorman /* 1702b2bd8598SDavid Rientjes * Output information about zones in @pgdat. All zones are printed regardless 1703b2bd8598SDavid Rientjes * of whether they are populated or not: lowmem_reserve_ratio operates on the 1704b2bd8598SDavid Rientjes * set of all zones and userspace would not be aware of such zones if they are 1705b2bd8598SDavid Rientjes * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio). 1706467c996cSMel Gorman */ 1707467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg) 1708467c996cSMel Gorman { 1709467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1710727c080fSVinayak Menon walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print); 1711f6ac2354SChristoph Lameter return 0; 1712f6ac2354SChristoph Lameter } 1713f6ac2354SChristoph Lameter 17145c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = { 1715f6ac2354SChristoph Lameter .start = frag_start, /* iterate over all zones. The same as in 1716f6ac2354SChristoph Lameter * fragmentation. */ 1717f6ac2354SChristoph Lameter .next = frag_next, 1718f6ac2354SChristoph Lameter .stop = frag_stop, 1719f6ac2354SChristoph Lameter .show = zoneinfo_show, 1720f6ac2354SChristoph Lameter }; 1721f6ac2354SChristoph Lameter 17229d7ea9a2SKonstantin Khlebnikov #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ 17239d7ea9a2SKonstantin Khlebnikov NR_VM_NUMA_STAT_ITEMS + \ 17249d7ea9a2SKonstantin Khlebnikov NR_VM_NODE_STAT_ITEMS + \ 17259d7ea9a2SKonstantin Khlebnikov NR_VM_WRITEBACK_STAT_ITEMS + \ 17269d7ea9a2SKonstantin Khlebnikov (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ 17279d7ea9a2SKonstantin Khlebnikov NR_VM_EVENT_ITEMS : 0)) 172879da826aSMichael Rubin 1729f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos) 1730f6ac2354SChristoph Lameter { 17312244b95aSChristoph Lameter unsigned long *v; 17329d7ea9a2SKonstantin Khlebnikov int i; 1733f6ac2354SChristoph Lameter 17349d7ea9a2SKonstantin Khlebnikov if (*pos >= NR_VMSTAT_ITEMS) 1735f6ac2354SChristoph Lameter return NULL; 1736f6ac2354SChristoph Lameter 17379d7ea9a2SKonstantin Khlebnikov BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); 17389d7ea9a2SKonstantin Khlebnikov v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); 17392244b95aSChristoph Lameter m->private = v; 17402244b95aSChristoph Lameter if (!v) 1741f6ac2354SChristoph Lameter return ERR_PTR(-ENOMEM); 17422244b95aSChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1743c41f012aSMichal Hocko v[i] = global_zone_page_state(i); 174479da826aSMichael Rubin v += NR_VM_ZONE_STAT_ITEMS; 174579da826aSMichael Rubin 17463a321d2aSKemi Wang #ifdef CONFIG_NUMA 17473a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 17483a321d2aSKemi Wang v[i] = global_numa_state(i); 17493a321d2aSKemi Wang v += NR_VM_NUMA_STAT_ITEMS; 17503a321d2aSKemi Wang #endif 17513a321d2aSKemi Wang 175269473e5dSMuchun Song for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1753ea426c2aSRoman Gushchin v[i] = global_node_page_state_pages(i); 175469473e5dSMuchun Song if (vmstat_item_print_in_thp(i)) 175569473e5dSMuchun Song v[i] /= HPAGE_PMD_NR; 175669473e5dSMuchun Song } 175775ef7184SMel Gorman v += NR_VM_NODE_STAT_ITEMS; 175875ef7184SMel Gorman 175979da826aSMichael Rubin global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, 176079da826aSMichael Rubin v + NR_DIRTY_THRESHOLD); 176179da826aSMichael Rubin v += NR_VM_WRITEBACK_STAT_ITEMS; 176279da826aSMichael Rubin 1763f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 176479da826aSMichael Rubin all_vm_events(v); 176579da826aSMichael Rubin v[PGPGIN] /= 2; /* sectors -> kbytes */ 176679da826aSMichael Rubin v[PGPGOUT] /= 2; 1767f8891e5eSChristoph Lameter #endif 1768ff8b16d7SWu Fengguang return (unsigned long *)m->private + *pos; 1769f6ac2354SChristoph Lameter } 1770f6ac2354SChristoph Lameter 1771f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 1772f6ac2354SChristoph Lameter { 1773f6ac2354SChristoph Lameter (*pos)++; 17749d7ea9a2SKonstantin Khlebnikov if (*pos >= NR_VMSTAT_ITEMS) 1775f6ac2354SChristoph Lameter return NULL; 1776f6ac2354SChristoph Lameter return (unsigned long *)m->private + *pos; 1777f6ac2354SChristoph Lameter } 1778f6ac2354SChristoph Lameter 1779f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg) 1780f6ac2354SChristoph Lameter { 1781f6ac2354SChristoph Lameter unsigned long *l = arg; 1782f6ac2354SChristoph Lameter unsigned long off = l - (unsigned long *)m->private; 178368ba0326SAlexey Dobriyan 178468ba0326SAlexey Dobriyan seq_puts(m, vmstat_text[off]); 178575ba1d07SJoe Perches seq_put_decimal_ull(m, " ", *l); 178668ba0326SAlexey Dobriyan seq_putc(m, '\n'); 17878d92890bSNeilBrown 17888d92890bSNeilBrown if (off == NR_VMSTAT_ITEMS - 1) { 17898d92890bSNeilBrown /* 17908d92890bSNeilBrown * We've come to the end - add any deprecated counters to avoid 17918d92890bSNeilBrown * breaking userspace which might depend on them being present. 17928d92890bSNeilBrown */ 17938d92890bSNeilBrown seq_puts(m, "nr_unstable 0\n"); 17948d92890bSNeilBrown } 1795f6ac2354SChristoph Lameter return 0; 1796f6ac2354SChristoph Lameter } 1797f6ac2354SChristoph Lameter 1798f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg) 1799f6ac2354SChristoph Lameter { 1800f6ac2354SChristoph Lameter kfree(m->private); 1801f6ac2354SChristoph Lameter m->private = NULL; 1802f6ac2354SChristoph Lameter } 1803f6ac2354SChristoph Lameter 1804b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = { 1805f6ac2354SChristoph Lameter .start = vmstat_start, 1806f6ac2354SChristoph Lameter .next = vmstat_next, 1807f6ac2354SChristoph Lameter .stop = vmstat_stop, 1808f6ac2354SChristoph Lameter .show = vmstat_show, 1809f6ac2354SChristoph Lameter }; 1810f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */ 1811f6ac2354SChristoph Lameter 1812df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 1813d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 181477461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ; 1815d1187ed2SChristoph Lameter 181652b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS 181752b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work) 181852b6f46bSHugh Dickins { 181952b6f46bSHugh Dickins refresh_cpu_vm_stats(true); 182052b6f46bSHugh Dickins } 182152b6f46bSHugh Dickins 182252b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write, 182332927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 182452b6f46bSHugh Dickins { 182552b6f46bSHugh Dickins long val; 182652b6f46bSHugh Dickins int err; 182752b6f46bSHugh Dickins int i; 182852b6f46bSHugh Dickins 182952b6f46bSHugh Dickins /* 183052b6f46bSHugh Dickins * The regular update, every sysctl_stat_interval, may come later 183152b6f46bSHugh Dickins * than expected: leaving a significant amount in per_cpu buckets. 183252b6f46bSHugh Dickins * This is particularly misleading when checking a quantity of HUGE 183352b6f46bSHugh Dickins * pages, immediately after running a test. /proc/sys/vm/stat_refresh, 183452b6f46bSHugh Dickins * which can equally be echo'ed to or cat'ted from (by root), 183552b6f46bSHugh Dickins * can be used to update the stats just before reading them. 183652b6f46bSHugh Dickins * 1837c41f012aSMichal Hocko * Oh, and since global_zone_page_state() etc. are so careful to hide 183852b6f46bSHugh Dickins * transiently negative values, report an error here if any of 183952b6f46bSHugh Dickins * the stats is negative, so we know to go looking for imbalance. 184052b6f46bSHugh Dickins */ 184152b6f46bSHugh Dickins err = schedule_on_each_cpu(refresh_vm_stats); 184252b6f46bSHugh Dickins if (err) 184352b6f46bSHugh Dickins return err; 184452b6f46bSHugh Dickins for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 184575ef7184SMel Gorman val = atomic_long_read(&vm_zone_stat[i]); 184652b6f46bSHugh Dickins if (val < 0) { 184752b6f46bSHugh Dickins pr_warn("%s: %s %ld\n", 18489d7ea9a2SKonstantin Khlebnikov __func__, zone_stat_name(i), val); 184952b6f46bSHugh Dickins err = -EINVAL; 185052b6f46bSHugh Dickins } 185152b6f46bSHugh Dickins } 18523a321d2aSKemi Wang #ifdef CONFIG_NUMA 18533a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { 18543a321d2aSKemi Wang val = atomic_long_read(&vm_numa_stat[i]); 18553a321d2aSKemi Wang if (val < 0) { 18563a321d2aSKemi Wang pr_warn("%s: %s %ld\n", 18579d7ea9a2SKonstantin Khlebnikov __func__, numa_stat_name(i), val); 18583a321d2aSKemi Wang err = -EINVAL; 18593a321d2aSKemi Wang } 18603a321d2aSKemi Wang } 18613a321d2aSKemi Wang #endif 186252b6f46bSHugh Dickins if (err) 186352b6f46bSHugh Dickins return err; 186452b6f46bSHugh Dickins if (write) 186552b6f46bSHugh Dickins *ppos += *lenp; 186652b6f46bSHugh Dickins else 186752b6f46bSHugh Dickins *lenp = 0; 186852b6f46bSHugh Dickins return 0; 186952b6f46bSHugh Dickins } 187052b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */ 187152b6f46bSHugh Dickins 1872d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w) 1873d1187ed2SChristoph Lameter { 18740eb77e98SChristoph Lameter if (refresh_cpu_vm_stats(true)) { 18757cc36bbdSChristoph Lameter /* 18767cc36bbdSChristoph Lameter * Counters were updated so we expect more updates 18777cc36bbdSChristoph Lameter * to occur in the future. Keep on running the 18787cc36bbdSChristoph Lameter * update worker thread. 18797cc36bbdSChristoph Lameter */ 1880ce612879SMichal Hocko queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, 1881176bed1dSLinus Torvalds this_cpu_ptr(&vmstat_work), 188298f4ebb2SAnton Blanchard round_jiffies_relative(sysctl_stat_interval)); 1883f01f17d3SMichal Hocko } 1884d1187ed2SChristoph Lameter } 1885d1187ed2SChristoph Lameter 18867cc36bbdSChristoph Lameter /* 18870eb77e98SChristoph Lameter * Switch off vmstat processing and then fold all the remaining differentials 18880eb77e98SChristoph Lameter * until the diffs stay at zero. The function is used by NOHZ and can only be 18890eb77e98SChristoph Lameter * invoked when tick processing is not active. 18900eb77e98SChristoph Lameter */ 18910eb77e98SChristoph Lameter /* 18927cc36bbdSChristoph Lameter * Check if the diffs for a certain cpu indicate that 18937cc36bbdSChristoph Lameter * an update is needed. 18947cc36bbdSChristoph Lameter */ 18957cc36bbdSChristoph Lameter static bool need_update(int cpu) 1896d1187ed2SChristoph Lameter { 18977cc36bbdSChristoph Lameter struct zone *zone; 1898d1187ed2SChristoph Lameter 18997cc36bbdSChristoph Lameter for_each_populated_zone(zone) { 19007cc36bbdSChristoph Lameter struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); 19017cc36bbdSChristoph Lameter 19027cc36bbdSChristoph Lameter BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); 19033a321d2aSKemi Wang #ifdef CONFIG_NUMA 19041d90ca89SKemi Wang BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2); 19053a321d2aSKemi Wang #endif 190663803222SKemi Wang 19077cc36bbdSChristoph Lameter /* 19087cc36bbdSChristoph Lameter * The fast way of checking if there are any vmstat diffs. 19097cc36bbdSChristoph Lameter */ 191013c9aaf7SJanne Huttunen if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * 191113c9aaf7SJanne Huttunen sizeof(p->vm_stat_diff[0]))) 19127cc36bbdSChristoph Lameter return true; 19133a321d2aSKemi Wang #ifdef CONFIG_NUMA 191413c9aaf7SJanne Huttunen if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * 191513c9aaf7SJanne Huttunen sizeof(p->vm_numa_stat_diff[0]))) 19163a321d2aSKemi Wang return true; 19173a321d2aSKemi Wang #endif 19187cc36bbdSChristoph Lameter } 19197cc36bbdSChristoph Lameter return false; 19207cc36bbdSChristoph Lameter } 19217cc36bbdSChristoph Lameter 19227b8da4c7SChristoph Lameter /* 19237b8da4c7SChristoph Lameter * Switch off vmstat processing and then fold all the remaining differentials 19247b8da4c7SChristoph Lameter * until the diffs stay at zero. The function is used by NOHZ and can only be 19257b8da4c7SChristoph Lameter * invoked when tick processing is not active. 19267b8da4c7SChristoph Lameter */ 1927f01f17d3SMichal Hocko void quiet_vmstat(void) 1928f01f17d3SMichal Hocko { 1929f01f17d3SMichal Hocko if (system_state != SYSTEM_RUNNING) 1930f01f17d3SMichal Hocko return; 1931f01f17d3SMichal Hocko 19327b8da4c7SChristoph Lameter if (!delayed_work_pending(this_cpu_ptr(&vmstat_work))) 1933f01f17d3SMichal Hocko return; 1934f01f17d3SMichal Hocko 1935f01f17d3SMichal Hocko if (!need_update(smp_processor_id())) 1936f01f17d3SMichal Hocko return; 1937f01f17d3SMichal Hocko 1938f01f17d3SMichal Hocko /* 1939f01f17d3SMichal Hocko * Just refresh counters and do not care about the pending delayed 1940f01f17d3SMichal Hocko * vmstat_update. It doesn't fire that often to matter and canceling 1941f01f17d3SMichal Hocko * it would be too expensive from this path. 1942f01f17d3SMichal Hocko * vmstat_shepherd will take care about that for us. 1943f01f17d3SMichal Hocko */ 1944f01f17d3SMichal Hocko refresh_cpu_vm_stats(false); 1945f01f17d3SMichal Hocko } 1946f01f17d3SMichal Hocko 19477cc36bbdSChristoph Lameter /* 19487cc36bbdSChristoph Lameter * Shepherd worker thread that checks the 19497cc36bbdSChristoph Lameter * differentials of processors that have their worker 19507cc36bbdSChristoph Lameter * threads for vm statistics updates disabled because of 19517cc36bbdSChristoph Lameter * inactivity. 19527cc36bbdSChristoph Lameter */ 19537cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w); 19547cc36bbdSChristoph Lameter 19550eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd); 19567cc36bbdSChristoph Lameter 19577cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w) 19587cc36bbdSChristoph Lameter { 19597cc36bbdSChristoph Lameter int cpu; 19607cc36bbdSChristoph Lameter 19617cc36bbdSChristoph Lameter get_online_cpus(); 19627cc36bbdSChristoph Lameter /* Check processors whose vmstat worker threads have been disabled */ 19637b8da4c7SChristoph Lameter for_each_online_cpu(cpu) { 1964f01f17d3SMichal Hocko struct delayed_work *dw = &per_cpu(vmstat_work, cpu); 19657cc36bbdSChristoph Lameter 19667b8da4c7SChristoph Lameter if (!delayed_work_pending(dw) && need_update(cpu)) 1967ce612879SMichal Hocko queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); 1968f01f17d3SMichal Hocko } 19697cc36bbdSChristoph Lameter put_online_cpus(); 19707cc36bbdSChristoph Lameter 19717cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 19727cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 19737cc36bbdSChristoph Lameter } 19747cc36bbdSChristoph Lameter 19757cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void) 19767cc36bbdSChristoph Lameter { 19777cc36bbdSChristoph Lameter int cpu; 19787cc36bbdSChristoph Lameter 19797cc36bbdSChristoph Lameter for_each_possible_cpu(cpu) 1980ccde8bd4SMichal Hocko INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), 19817cc36bbdSChristoph Lameter vmstat_update); 19827cc36bbdSChristoph Lameter 19837cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 19847cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 1985d1187ed2SChristoph Lameter } 1986d1187ed2SChristoph Lameter 198703e86dbaSTim Chen static void __init init_cpu_node_state(void) 198803e86dbaSTim Chen { 19894c501327SSebastian Andrzej Siewior int node; 199003e86dbaSTim Chen 19914c501327SSebastian Andrzej Siewior for_each_online_node(node) { 19924c501327SSebastian Andrzej Siewior if (cpumask_weight(cpumask_of_node(node)) > 0) 19934c501327SSebastian Andrzej Siewior node_set_state(node, N_CPU); 19944c501327SSebastian Andrzej Siewior } 199503e86dbaSTim Chen } 199603e86dbaSTim Chen 19975438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu) 1998807a1bd2SToshi Kani { 19995ee28a44SKAMEZAWA Hiroyuki refresh_zone_stat_thresholds(); 2000ad596925SChristoph Lameter node_set_state(cpu_to_node(cpu), N_CPU); 20015438da97SSebastian Andrzej Siewior return 0; 2002df9ecabaSChristoph Lameter } 2003df9ecabaSChristoph Lameter 20045438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu) 20055438da97SSebastian Andrzej Siewior { 20065438da97SSebastian Andrzej Siewior cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); 20075438da97SSebastian Andrzej Siewior return 0; 20085438da97SSebastian Andrzej Siewior } 20095438da97SSebastian Andrzej Siewior 20105438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu) 20115438da97SSebastian Andrzej Siewior { 20125438da97SSebastian Andrzej Siewior const struct cpumask *node_cpus; 20135438da97SSebastian Andrzej Siewior int node; 20145438da97SSebastian Andrzej Siewior 20155438da97SSebastian Andrzej Siewior node = cpu_to_node(cpu); 20165438da97SSebastian Andrzej Siewior 20175438da97SSebastian Andrzej Siewior refresh_zone_stat_thresholds(); 20185438da97SSebastian Andrzej Siewior node_cpus = cpumask_of_node(node); 20195438da97SSebastian Andrzej Siewior if (cpumask_weight(node_cpus) > 0) 20205438da97SSebastian Andrzej Siewior return 0; 20215438da97SSebastian Andrzej Siewior 20225438da97SSebastian Andrzej Siewior node_clear_state(node, N_CPU); 20235438da97SSebastian Andrzej Siewior return 0; 20245438da97SSebastian Andrzej Siewior } 20255438da97SSebastian Andrzej Siewior 20268f32f7e5SAlexey Dobriyan #endif 2027df9ecabaSChristoph Lameter 2028ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq; 2029ce612879SMichal Hocko 2030597b7305SMichal Hocko void __init init_mm_internals(void) 2031df9ecabaSChristoph Lameter { 2032ce612879SMichal Hocko int ret __maybe_unused; 20335438da97SSebastian Andrzej Siewior 203480d136e1SMichal Hocko mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); 2035ce612879SMichal Hocko 2036ce612879SMichal Hocko #ifdef CONFIG_SMP 20375438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", 20385438da97SSebastian Andrzej Siewior NULL, vmstat_cpu_dead); 20395438da97SSebastian Andrzej Siewior if (ret < 0) 20405438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'dead' hotplug state\n"); 20415438da97SSebastian Andrzej Siewior 20425438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online", 20435438da97SSebastian Andrzej Siewior vmstat_cpu_online, 20445438da97SSebastian Andrzej Siewior vmstat_cpu_down_prep); 20455438da97SSebastian Andrzej Siewior if (ret < 0) 20465438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'online' hotplug state\n"); 20475438da97SSebastian Andrzej Siewior 20485438da97SSebastian Andrzej Siewior get_online_cpus(); 204903e86dbaSTim Chen init_cpu_node_state(); 20505438da97SSebastian Andrzej Siewior put_online_cpus(); 2051d1187ed2SChristoph Lameter 20527cc36bbdSChristoph Lameter start_shepherd_timer(); 20538f32f7e5SAlexey Dobriyan #endif 20548f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS 2055fddda2b7SChristoph Hellwig proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); 2056abaed011SMichal Hocko proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); 2057fddda2b7SChristoph Hellwig proc_create_seq("vmstat", 0444, NULL, &vmstat_op); 2058fddda2b7SChristoph Hellwig proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); 20598f32f7e5SAlexey Dobriyan #endif 2060df9ecabaSChristoph Lameter } 2061d7a5752cSMel Gorman 2062d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 2063d7a5752cSMel Gorman 2064d7a5752cSMel Gorman /* 2065d7a5752cSMel Gorman * Return an index indicating how much of the available free memory is 2066d7a5752cSMel Gorman * unusable for an allocation of the requested size. 2067d7a5752cSMel Gorman */ 2068d7a5752cSMel Gorman static int unusable_free_index(unsigned int order, 2069d7a5752cSMel Gorman struct contig_page_info *info) 2070d7a5752cSMel Gorman { 2071d7a5752cSMel Gorman /* No free memory is interpreted as all free memory is unusable */ 2072d7a5752cSMel Gorman if (info->free_pages == 0) 2073d7a5752cSMel Gorman return 1000; 2074d7a5752cSMel Gorman 2075d7a5752cSMel Gorman /* 2076d7a5752cSMel Gorman * Index should be a value between 0 and 1. Return a value to 3 2077d7a5752cSMel Gorman * decimal places. 2078d7a5752cSMel Gorman * 2079d7a5752cSMel Gorman * 0 => no fragmentation 2080d7a5752cSMel Gorman * 1 => high fragmentation 2081d7a5752cSMel Gorman */ 2082d7a5752cSMel Gorman return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 2083d7a5752cSMel Gorman 2084d7a5752cSMel Gorman } 2085d7a5752cSMel Gorman 2086d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m, 2087d7a5752cSMel Gorman pg_data_t *pgdat, struct zone *zone) 2088d7a5752cSMel Gorman { 2089d7a5752cSMel Gorman unsigned int order; 2090d7a5752cSMel Gorman int index; 2091d7a5752cSMel Gorman struct contig_page_info info; 2092d7a5752cSMel Gorman 2093d7a5752cSMel Gorman seq_printf(m, "Node %d, zone %8s ", 2094d7a5752cSMel Gorman pgdat->node_id, 2095d7a5752cSMel Gorman zone->name); 2096d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2097d7a5752cSMel Gorman fill_contig_page_info(zone, order, &info); 2098d7a5752cSMel Gorman index = unusable_free_index(order, &info); 2099d7a5752cSMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2100d7a5752cSMel Gorman } 2101d7a5752cSMel Gorman 2102d7a5752cSMel Gorman seq_putc(m, '\n'); 2103d7a5752cSMel Gorman } 2104d7a5752cSMel Gorman 2105d7a5752cSMel Gorman /* 2106d7a5752cSMel Gorman * Display unusable free space index 2107d7a5752cSMel Gorman * 2108d7a5752cSMel Gorman * The unusable free space index measures how much of the available free 2109d7a5752cSMel Gorman * memory cannot be used to satisfy an allocation of a given size and is a 2110d7a5752cSMel Gorman * value between 0 and 1. The higher the value, the more of free memory is 2111d7a5752cSMel Gorman * unusable and by implication, the worse the external fragmentation is. This 2112d7a5752cSMel Gorman * can be expressed as a percentage by multiplying by 100. 2113d7a5752cSMel Gorman */ 2114d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg) 2115d7a5752cSMel Gorman { 2116d7a5752cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2117d7a5752cSMel Gorman 2118d7a5752cSMel Gorman /* check memoryless node */ 2119a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 2120d7a5752cSMel Gorman return 0; 2121d7a5752cSMel Gorman 2122727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, unusable_show_print); 2123d7a5752cSMel Gorman 2124d7a5752cSMel Gorman return 0; 2125d7a5752cSMel Gorman } 2126d7a5752cSMel Gorman 212701a99560SKefeng Wang static const struct seq_operations unusable_sops = { 2128d7a5752cSMel Gorman .start = frag_start, 2129d7a5752cSMel Gorman .next = frag_next, 2130d7a5752cSMel Gorman .stop = frag_stop, 2131d7a5752cSMel Gorman .show = unusable_show, 2132d7a5752cSMel Gorman }; 2133d7a5752cSMel Gorman 213401a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(unusable); 2135d7a5752cSMel Gorman 2136f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m, 2137f1a5ab12SMel Gorman pg_data_t *pgdat, struct zone *zone) 2138f1a5ab12SMel Gorman { 2139f1a5ab12SMel Gorman unsigned int order; 2140f1a5ab12SMel Gorman int index; 2141f1a5ab12SMel Gorman 2142f1a5ab12SMel Gorman /* Alloc on stack as interrupts are disabled for zone walk */ 2143f1a5ab12SMel Gorman struct contig_page_info info; 2144f1a5ab12SMel Gorman 2145f1a5ab12SMel Gorman seq_printf(m, "Node %d, zone %8s ", 2146f1a5ab12SMel Gorman pgdat->node_id, 2147f1a5ab12SMel Gorman zone->name); 2148f1a5ab12SMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2149f1a5ab12SMel Gorman fill_contig_page_info(zone, order, &info); 215056de7263SMel Gorman index = __fragmentation_index(order, &info); 2151f1a5ab12SMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2152f1a5ab12SMel Gorman } 2153f1a5ab12SMel Gorman 2154f1a5ab12SMel Gorman seq_putc(m, '\n'); 2155f1a5ab12SMel Gorman } 2156f1a5ab12SMel Gorman 2157f1a5ab12SMel Gorman /* 2158f1a5ab12SMel Gorman * Display fragmentation index for orders that allocations would fail for 2159f1a5ab12SMel Gorman */ 2160f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg) 2161f1a5ab12SMel Gorman { 2162f1a5ab12SMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2163f1a5ab12SMel Gorman 2164727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, extfrag_show_print); 2165f1a5ab12SMel Gorman 2166f1a5ab12SMel Gorman return 0; 2167f1a5ab12SMel Gorman } 2168f1a5ab12SMel Gorman 216901a99560SKefeng Wang static const struct seq_operations extfrag_sops = { 2170f1a5ab12SMel Gorman .start = frag_start, 2171f1a5ab12SMel Gorman .next = frag_next, 2172f1a5ab12SMel Gorman .stop = frag_stop, 2173f1a5ab12SMel Gorman .show = extfrag_show, 2174f1a5ab12SMel Gorman }; 2175f1a5ab12SMel Gorman 217601a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(extfrag); 2177f1a5ab12SMel Gorman 2178d7a5752cSMel Gorman static int __init extfrag_debug_init(void) 2179d7a5752cSMel Gorman { 2180bde8bd8aSSasikantha babu struct dentry *extfrag_debug_root; 2181bde8bd8aSSasikantha babu 2182d7a5752cSMel Gorman extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 2183d7a5752cSMel Gorman 2184d9f7979cSGreg Kroah-Hartman debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL, 218501a99560SKefeng Wang &unusable_fops); 2186d7a5752cSMel Gorman 2187d9f7979cSGreg Kroah-Hartman debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL, 218801a99560SKefeng Wang &extfrag_fops); 2189f1a5ab12SMel Gorman 2190d7a5752cSMel Gorman return 0; 2191d7a5752cSMel Gorman } 2192d7a5752cSMel Gorman 2193d7a5752cSMel Gorman module_init(extfrag_debug_init); 2194d7a5752cSMel Gorman #endif 2195