1f6ac2354SChristoph Lameter /* 2f6ac2354SChristoph Lameter * linux/mm/vmstat.c 3f6ac2354SChristoph Lameter * 4f6ac2354SChristoph Lameter * Manages VM statistics 5f6ac2354SChristoph Lameter * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 62244b95aSChristoph Lameter * 72244b95aSChristoph Lameter * zoned VM statistics 82244b95aSChristoph Lameter * Copyright (C) 2006 Silicon Graphics, Inc., 92244b95aSChristoph Lameter * Christoph Lameter <christoph@lameter.com> 107cc36bbdSChristoph Lameter * Copyright (C) 2008-2014 Christoph Lameter 11f6ac2354SChristoph Lameter */ 128f32f7e5SAlexey Dobriyan #include <linux/fs.h> 13f6ac2354SChristoph Lameter #include <linux/mm.h> 144e950f6fSAlexey Dobriyan #include <linux/err.h> 152244b95aSChristoph Lameter #include <linux/module.h> 165a0e3ad6STejun Heo #include <linux/slab.h> 17df9ecabaSChristoph Lameter #include <linux/cpu.h> 187cc36bbdSChristoph Lameter #include <linux/cpumask.h> 19c748e134SAdrian Bunk #include <linux/vmstat.h> 203c486871SAndrew Morton #include <linux/proc_fs.h> 213c486871SAndrew Morton #include <linux/seq_file.h> 223c486871SAndrew Morton #include <linux/debugfs.h> 23e8edc6e0SAlexey Dobriyan #include <linux/sched.h> 24f1a5ab12SMel Gorman #include <linux/math64.h> 2579da826aSMichael Rubin #include <linux/writeback.h> 2636deb0beSNamhyung Kim #include <linux/compaction.h> 276e543d57SLisa Du #include <linux/mm_inline.h> 2848c96a36SJoonsoo Kim #include <linux/page_ext.h> 2948c96a36SJoonsoo Kim #include <linux/page_owner.h> 306e543d57SLisa Du 316e543d57SLisa Du #include "internal.h" 32f6ac2354SChristoph Lameter 331d90ca89SKemi Wang #define NUMA_STATS_THRESHOLD (U16_MAX - 2) 341d90ca89SKemi Wang 354518085eSKemi Wang #ifdef CONFIG_NUMA 364518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; 374518085eSKemi Wang 384518085eSKemi Wang /* zero numa counters within a zone */ 394518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone) 404518085eSKemi Wang { 414518085eSKemi Wang int item, cpu; 424518085eSKemi Wang 434518085eSKemi Wang for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) { 444518085eSKemi Wang atomic_long_set(&zone->vm_numa_stat[item], 0); 454518085eSKemi Wang for_each_online_cpu(cpu) 464518085eSKemi Wang per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item] 474518085eSKemi Wang = 0; 484518085eSKemi Wang } 494518085eSKemi Wang } 504518085eSKemi Wang 514518085eSKemi Wang /* zero numa counters of all the populated zones */ 524518085eSKemi Wang static void zero_zones_numa_counters(void) 534518085eSKemi Wang { 544518085eSKemi Wang struct zone *zone; 554518085eSKemi Wang 564518085eSKemi Wang for_each_populated_zone(zone) 574518085eSKemi Wang zero_zone_numa_counters(zone); 584518085eSKemi Wang } 594518085eSKemi Wang 604518085eSKemi Wang /* zero global numa counters */ 614518085eSKemi Wang static void zero_global_numa_counters(void) 624518085eSKemi Wang { 634518085eSKemi Wang int item; 644518085eSKemi Wang 654518085eSKemi Wang for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) 664518085eSKemi Wang atomic_long_set(&vm_numa_stat[item], 0); 674518085eSKemi Wang } 684518085eSKemi Wang 694518085eSKemi Wang static void invalid_numa_statistics(void) 704518085eSKemi Wang { 714518085eSKemi Wang zero_zones_numa_counters(); 724518085eSKemi Wang zero_global_numa_counters(); 734518085eSKemi Wang } 744518085eSKemi Wang 754518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock); 764518085eSKemi Wang 774518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, 784518085eSKemi Wang void __user *buffer, size_t *length, loff_t *ppos) 794518085eSKemi Wang { 804518085eSKemi Wang int ret, oldval; 814518085eSKemi Wang 824518085eSKemi Wang mutex_lock(&vm_numa_stat_lock); 834518085eSKemi Wang if (write) 844518085eSKemi Wang oldval = sysctl_vm_numa_stat; 854518085eSKemi Wang ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 864518085eSKemi Wang if (ret || !write) 874518085eSKemi Wang goto out; 884518085eSKemi Wang 894518085eSKemi Wang if (oldval == sysctl_vm_numa_stat) 904518085eSKemi Wang goto out; 914518085eSKemi Wang else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) { 924518085eSKemi Wang static_branch_enable(&vm_numa_stat_key); 934518085eSKemi Wang pr_info("enable numa statistics\n"); 944518085eSKemi Wang } else { 954518085eSKemi Wang static_branch_disable(&vm_numa_stat_key); 964518085eSKemi Wang invalid_numa_statistics(); 974518085eSKemi Wang pr_info("disable numa statistics, and clear numa counters\n"); 984518085eSKemi Wang } 994518085eSKemi Wang 1004518085eSKemi Wang out: 1014518085eSKemi Wang mutex_unlock(&vm_numa_stat_lock); 1024518085eSKemi Wang return ret; 1034518085eSKemi Wang } 1044518085eSKemi Wang #endif 1054518085eSKemi Wang 106f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 107f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 108f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states); 109f8891e5eSChristoph Lameter 11031f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret) 111f8891e5eSChristoph Lameter { 1129eccf2a8SChristoph Lameter int cpu; 113f8891e5eSChristoph Lameter int i; 114f8891e5eSChristoph Lameter 115f8891e5eSChristoph Lameter memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 116f8891e5eSChristoph Lameter 11731f961a8SMinchan Kim for_each_online_cpu(cpu) { 118f8891e5eSChristoph Lameter struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 119f8891e5eSChristoph Lameter 120f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 121f8891e5eSChristoph Lameter ret[i] += this->event[i]; 122f8891e5eSChristoph Lameter } 123f8891e5eSChristoph Lameter } 124f8891e5eSChristoph Lameter 125f8891e5eSChristoph Lameter /* 126f8891e5eSChristoph Lameter * Accumulate the vm event counters across all CPUs. 127f8891e5eSChristoph Lameter * The result is unavoidably approximate - it can change 128f8891e5eSChristoph Lameter * during and after execution of this function. 129f8891e5eSChristoph Lameter */ 130f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret) 131f8891e5eSChristoph Lameter { 132b5be1132SKOSAKI Motohiro get_online_cpus(); 13331f961a8SMinchan Kim sum_vm_events(ret); 134b5be1132SKOSAKI Motohiro put_online_cpus(); 135f8891e5eSChristoph Lameter } 13632dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events); 137f8891e5eSChristoph Lameter 138f8891e5eSChristoph Lameter /* 139f8891e5eSChristoph Lameter * Fold the foreign cpu events into our own. 140f8891e5eSChristoph Lameter * 141f8891e5eSChristoph Lameter * This is adding to the events on one processor 142f8891e5eSChristoph Lameter * but keeps the global counts constant. 143f8891e5eSChristoph Lameter */ 144f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu) 145f8891e5eSChristoph Lameter { 146f8891e5eSChristoph Lameter struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 147f8891e5eSChristoph Lameter int i; 148f8891e5eSChristoph Lameter 149f8891e5eSChristoph Lameter for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 150f8891e5eSChristoph Lameter count_vm_events(i, fold_state->event[i]); 151f8891e5eSChristoph Lameter fold_state->event[i] = 0; 152f8891e5eSChristoph Lameter } 153f8891e5eSChristoph Lameter } 154f8891e5eSChristoph Lameter 155f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */ 156f8891e5eSChristoph Lameter 1572244b95aSChristoph Lameter /* 1582244b95aSChristoph Lameter * Manage combined zone based / global counters 1592244b95aSChristoph Lameter * 1602244b95aSChristoph Lameter * vm_stat contains the global counters 1612244b95aSChristoph Lameter */ 16275ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; 1633a321d2aSKemi Wang atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp; 16475ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; 16575ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat); 1663a321d2aSKemi Wang EXPORT_SYMBOL(vm_numa_stat); 16775ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat); 1682244b95aSChristoph Lameter 1692244b95aSChristoph Lameter #ifdef CONFIG_SMP 1702244b95aSChristoph Lameter 171b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone) 17288f5acf8SMel Gorman { 17388f5acf8SMel Gorman int threshold; 17488f5acf8SMel Gorman int watermark_distance; 17588f5acf8SMel Gorman 17688f5acf8SMel Gorman /* 17788f5acf8SMel Gorman * As vmstats are not up to date, there is drift between the estimated 17888f5acf8SMel Gorman * and real values. For high thresholds and a high number of CPUs, it 17988f5acf8SMel Gorman * is possible for the min watermark to be breached while the estimated 18088f5acf8SMel Gorman * value looks fine. The pressure threshold is a reduced value such 18188f5acf8SMel Gorman * that even the maximum amount of drift will not accidentally breach 18288f5acf8SMel Gorman * the min watermark 18388f5acf8SMel Gorman */ 18488f5acf8SMel Gorman watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone); 18588f5acf8SMel Gorman threshold = max(1, (int)(watermark_distance / num_online_cpus())); 18688f5acf8SMel Gorman 18788f5acf8SMel Gorman /* 18888f5acf8SMel Gorman * Maximum threshold is 125 18988f5acf8SMel Gorman */ 19088f5acf8SMel Gorman threshold = min(125, threshold); 19188f5acf8SMel Gorman 19288f5acf8SMel Gorman return threshold; 19388f5acf8SMel Gorman } 19488f5acf8SMel Gorman 195b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone) 196df9ecabaSChristoph Lameter { 197df9ecabaSChristoph Lameter int threshold; 198df9ecabaSChristoph Lameter int mem; /* memory in 128 MB units */ 1992244b95aSChristoph Lameter 2002244b95aSChristoph Lameter /* 201df9ecabaSChristoph Lameter * The threshold scales with the number of processors and the amount 202df9ecabaSChristoph Lameter * of memory per zone. More memory means that we can defer updates for 203df9ecabaSChristoph Lameter * longer, more processors could lead to more contention. 204df9ecabaSChristoph Lameter * fls() is used to have a cheap way of logarithmic scaling. 2052244b95aSChristoph Lameter * 206df9ecabaSChristoph Lameter * Some sample thresholds: 207df9ecabaSChristoph Lameter * 208df9ecabaSChristoph Lameter * Threshold Processors (fls) Zonesize fls(mem+1) 209df9ecabaSChristoph Lameter * ------------------------------------------------------------------ 210df9ecabaSChristoph Lameter * 8 1 1 0.9-1 GB 4 211df9ecabaSChristoph Lameter * 16 2 2 0.9-1 GB 4 212df9ecabaSChristoph Lameter * 20 2 2 1-2 GB 5 213df9ecabaSChristoph Lameter * 24 2 2 2-4 GB 6 214df9ecabaSChristoph Lameter * 28 2 2 4-8 GB 7 215df9ecabaSChristoph Lameter * 32 2 2 8-16 GB 8 216df9ecabaSChristoph Lameter * 4 2 2 <128M 1 217df9ecabaSChristoph Lameter * 30 4 3 2-4 GB 5 218df9ecabaSChristoph Lameter * 48 4 3 8-16 GB 8 219df9ecabaSChristoph Lameter * 32 8 4 1-2 GB 4 220df9ecabaSChristoph Lameter * 32 8 4 0.9-1GB 4 221df9ecabaSChristoph Lameter * 10 16 5 <128M 1 222df9ecabaSChristoph Lameter * 40 16 5 900M 4 223df9ecabaSChristoph Lameter * 70 64 7 2-4 GB 5 224df9ecabaSChristoph Lameter * 84 64 7 4-8 GB 6 225df9ecabaSChristoph Lameter * 108 512 9 4-8 GB 6 226df9ecabaSChristoph Lameter * 125 1024 10 8-16 GB 8 227df9ecabaSChristoph Lameter * 125 1024 10 16-32 GB 9 2282244b95aSChristoph Lameter */ 229df9ecabaSChristoph Lameter 230*9705bea5SArun KS mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT); 231df9ecabaSChristoph Lameter 232df9ecabaSChristoph Lameter threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 233df9ecabaSChristoph Lameter 234df9ecabaSChristoph Lameter /* 235df9ecabaSChristoph Lameter * Maximum threshold is 125 236df9ecabaSChristoph Lameter */ 237df9ecabaSChristoph Lameter threshold = min(125, threshold); 238df9ecabaSChristoph Lameter 239df9ecabaSChristoph Lameter return threshold; 240df9ecabaSChristoph Lameter } 241df9ecabaSChristoph Lameter 242df9ecabaSChristoph Lameter /* 243df9ecabaSChristoph Lameter * Refresh the thresholds for each zone. 244df9ecabaSChristoph Lameter */ 245a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void) 2462244b95aSChristoph Lameter { 24775ef7184SMel Gorman struct pglist_data *pgdat; 248df9ecabaSChristoph Lameter struct zone *zone; 249df9ecabaSChristoph Lameter int cpu; 250df9ecabaSChristoph Lameter int threshold; 251df9ecabaSChristoph Lameter 25275ef7184SMel Gorman /* Zero current pgdat thresholds */ 25375ef7184SMel Gorman for_each_online_pgdat(pgdat) { 25475ef7184SMel Gorman for_each_online_cpu(cpu) { 25575ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0; 25675ef7184SMel Gorman } 25775ef7184SMel Gorman } 25875ef7184SMel Gorman 259ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 26075ef7184SMel Gorman struct pglist_data *pgdat = zone->zone_pgdat; 261aa454840SChristoph Lameter unsigned long max_drift, tolerate_drift; 262aa454840SChristoph Lameter 263b44129b3SMel Gorman threshold = calculate_normal_threshold(zone); 264df9ecabaSChristoph Lameter 26575ef7184SMel Gorman for_each_online_cpu(cpu) { 26675ef7184SMel Gorman int pgdat_threshold; 26775ef7184SMel Gorman 26899dcc3e5SChristoph Lameter per_cpu_ptr(zone->pageset, cpu)->stat_threshold 26999dcc3e5SChristoph Lameter = threshold; 2701d90ca89SKemi Wang 27175ef7184SMel Gorman /* Base nodestat threshold on the largest populated zone. */ 27275ef7184SMel Gorman pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; 27375ef7184SMel Gorman per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold 27475ef7184SMel Gorman = max(threshold, pgdat_threshold); 27575ef7184SMel Gorman } 27675ef7184SMel Gorman 277aa454840SChristoph Lameter /* 278aa454840SChristoph Lameter * Only set percpu_drift_mark if there is a danger that 279aa454840SChristoph Lameter * NR_FREE_PAGES reports the low watermark is ok when in fact 280aa454840SChristoph Lameter * the min watermark could be breached by an allocation 281aa454840SChristoph Lameter */ 282aa454840SChristoph Lameter tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 283aa454840SChristoph Lameter max_drift = num_online_cpus() * threshold; 284aa454840SChristoph Lameter if (max_drift > tolerate_drift) 285aa454840SChristoph Lameter zone->percpu_drift_mark = high_wmark_pages(zone) + 286aa454840SChristoph Lameter max_drift; 287df9ecabaSChristoph Lameter } 2882244b95aSChristoph Lameter } 2892244b95aSChristoph Lameter 290b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat, 291b44129b3SMel Gorman int (*calculate_pressure)(struct zone *)) 29288f5acf8SMel Gorman { 29388f5acf8SMel Gorman struct zone *zone; 29488f5acf8SMel Gorman int cpu; 29588f5acf8SMel Gorman int threshold; 29688f5acf8SMel Gorman int i; 29788f5acf8SMel Gorman 29888f5acf8SMel Gorman for (i = 0; i < pgdat->nr_zones; i++) { 29988f5acf8SMel Gorman zone = &pgdat->node_zones[i]; 30088f5acf8SMel Gorman if (!zone->percpu_drift_mark) 30188f5acf8SMel Gorman continue; 30288f5acf8SMel Gorman 303b44129b3SMel Gorman threshold = (*calculate_pressure)(zone); 3041d90ca89SKemi Wang for_each_online_cpu(cpu) 30588f5acf8SMel Gorman per_cpu_ptr(zone->pageset, cpu)->stat_threshold 30688f5acf8SMel Gorman = threshold; 30788f5acf8SMel Gorman } 30888f5acf8SMel Gorman } 30988f5acf8SMel Gorman 3102244b95aSChristoph Lameter /* 311bea04b07SJianyu Zhan * For use when we know that interrupts are disabled, 312bea04b07SJianyu Zhan * or when we know that preemption is disabled and that 313bea04b07SJianyu Zhan * particular counter cannot be updated from interrupt context. 3142244b95aSChristoph Lameter */ 3152244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 3166cdb18adSHeiko Carstens long delta) 3172244b95aSChristoph Lameter { 31812938a92SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 31912938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 3202244b95aSChristoph Lameter long x; 32112938a92SChristoph Lameter long t; 3222244b95aSChristoph Lameter 32312938a92SChristoph Lameter x = delta + __this_cpu_read(*p); 3242244b95aSChristoph Lameter 32512938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 32612938a92SChristoph Lameter 32712938a92SChristoph Lameter if (unlikely(x > t || x < -t)) { 3282244b95aSChristoph Lameter zone_page_state_add(x, zone, item); 3292244b95aSChristoph Lameter x = 0; 3302244b95aSChristoph Lameter } 33112938a92SChristoph Lameter __this_cpu_write(*p, x); 3322244b95aSChristoph Lameter } 3332244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state); 3342244b95aSChristoph Lameter 33575ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 33675ef7184SMel Gorman long delta) 33775ef7184SMel Gorman { 33875ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 33975ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 34075ef7184SMel Gorman long x; 34175ef7184SMel Gorman long t; 34275ef7184SMel Gorman 34375ef7184SMel Gorman x = delta + __this_cpu_read(*p); 34475ef7184SMel Gorman 34575ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 34675ef7184SMel Gorman 34775ef7184SMel Gorman if (unlikely(x > t || x < -t)) { 34875ef7184SMel Gorman node_page_state_add(x, pgdat, item); 34975ef7184SMel Gorman x = 0; 35075ef7184SMel Gorman } 35175ef7184SMel Gorman __this_cpu_write(*p, x); 35275ef7184SMel Gorman } 35375ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state); 35475ef7184SMel Gorman 3552244b95aSChristoph Lameter /* 3562244b95aSChristoph Lameter * Optimized increment and decrement functions. 3572244b95aSChristoph Lameter * 3582244b95aSChristoph Lameter * These are only for a single page and therefore can take a struct page * 3592244b95aSChristoph Lameter * argument instead of struct zone *. This allows the inclusion of the code 3602244b95aSChristoph Lameter * generated for page_zone(page) into the optimized functions. 3612244b95aSChristoph Lameter * 3622244b95aSChristoph Lameter * No overflow check is necessary and therefore the differential can be 3632244b95aSChristoph Lameter * incremented or decremented in place which may allow the compilers to 3642244b95aSChristoph Lameter * generate better code. 3652244b95aSChristoph Lameter * The increment or decrement is known and therefore one boundary check can 3662244b95aSChristoph Lameter * be omitted. 3672244b95aSChristoph Lameter * 368df9ecabaSChristoph Lameter * NOTE: These functions are very performance sensitive. Change only 369df9ecabaSChristoph Lameter * with care. 370df9ecabaSChristoph Lameter * 3712244b95aSChristoph Lameter * Some processors have inc/dec instructions that are atomic vs an interrupt. 3722244b95aSChristoph Lameter * However, the code must first determine the differential location in a zone 3732244b95aSChristoph Lameter * based on the processor number and then inc/dec the counter. There is no 3742244b95aSChristoph Lameter * guarantee without disabling preemption that the processor will not change 3752244b95aSChristoph Lameter * in between and therefore the atomicity vs. interrupt cannot be exploited 3762244b95aSChristoph Lameter * in a useful way here. 3772244b95aSChristoph Lameter */ 378c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 3792244b95aSChristoph Lameter { 38012938a92SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 38112938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 38212938a92SChristoph Lameter s8 v, t; 3832244b95aSChristoph Lameter 384908ee0f1SChristoph Lameter v = __this_cpu_inc_return(*p); 38512938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 38612938a92SChristoph Lameter if (unlikely(v > t)) { 38712938a92SChristoph Lameter s8 overstep = t >> 1; 3882244b95aSChristoph Lameter 38912938a92SChristoph Lameter zone_page_state_add(v + overstep, zone, item); 39012938a92SChristoph Lameter __this_cpu_write(*p, -overstep); 3912244b95aSChristoph Lameter } 3922244b95aSChristoph Lameter } 393ca889e6cSChristoph Lameter 39475ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 39575ef7184SMel Gorman { 39675ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 39775ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 39875ef7184SMel Gorman s8 v, t; 39975ef7184SMel Gorman 40075ef7184SMel Gorman v = __this_cpu_inc_return(*p); 40175ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 40275ef7184SMel Gorman if (unlikely(v > t)) { 40375ef7184SMel Gorman s8 overstep = t >> 1; 40475ef7184SMel Gorman 40575ef7184SMel Gorman node_page_state_add(v + overstep, pgdat, item); 40675ef7184SMel Gorman __this_cpu_write(*p, -overstep); 40775ef7184SMel Gorman } 40875ef7184SMel Gorman } 40975ef7184SMel Gorman 410ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 411ca889e6cSChristoph Lameter { 412ca889e6cSChristoph Lameter __inc_zone_state(page_zone(page), item); 413ca889e6cSChristoph Lameter } 4142244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state); 4152244b95aSChristoph Lameter 41675ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item) 41775ef7184SMel Gorman { 41875ef7184SMel Gorman __inc_node_state(page_pgdat(page), item); 41975ef7184SMel Gorman } 42075ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state); 42175ef7184SMel Gorman 422c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 4232244b95aSChristoph Lameter { 42412938a92SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 42512938a92SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 42612938a92SChristoph Lameter s8 v, t; 4272244b95aSChristoph Lameter 428908ee0f1SChristoph Lameter v = __this_cpu_dec_return(*p); 42912938a92SChristoph Lameter t = __this_cpu_read(pcp->stat_threshold); 43012938a92SChristoph Lameter if (unlikely(v < - t)) { 43112938a92SChristoph Lameter s8 overstep = t >> 1; 4322244b95aSChristoph Lameter 43312938a92SChristoph Lameter zone_page_state_add(v - overstep, zone, item); 43412938a92SChristoph Lameter __this_cpu_write(*p, overstep); 4352244b95aSChristoph Lameter } 4362244b95aSChristoph Lameter } 437c8785385SChristoph Lameter 43875ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) 43975ef7184SMel Gorman { 44075ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 44175ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 44275ef7184SMel Gorman s8 v, t; 44375ef7184SMel Gorman 44475ef7184SMel Gorman v = __this_cpu_dec_return(*p); 44575ef7184SMel Gorman t = __this_cpu_read(pcp->stat_threshold); 44675ef7184SMel Gorman if (unlikely(v < - t)) { 44775ef7184SMel Gorman s8 overstep = t >> 1; 44875ef7184SMel Gorman 44975ef7184SMel Gorman node_page_state_add(v - overstep, pgdat, item); 45075ef7184SMel Gorman __this_cpu_write(*p, overstep); 45175ef7184SMel Gorman } 45275ef7184SMel Gorman } 45375ef7184SMel Gorman 454c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 455c8785385SChristoph Lameter { 456c8785385SChristoph Lameter __dec_zone_state(page_zone(page), item); 457c8785385SChristoph Lameter } 4582244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state); 4592244b95aSChristoph Lameter 46075ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item) 46175ef7184SMel Gorman { 46275ef7184SMel Gorman __dec_node_state(page_pgdat(page), item); 46375ef7184SMel Gorman } 46475ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state); 46575ef7184SMel Gorman 4664156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL 4677c839120SChristoph Lameter /* 4687c839120SChristoph Lameter * If we have cmpxchg_local support then we do not need to incur the overhead 4697c839120SChristoph Lameter * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. 4707c839120SChristoph Lameter * 4717c839120SChristoph Lameter * mod_state() modifies the zone counter state through atomic per cpu 4727c839120SChristoph Lameter * operations. 4737c839120SChristoph Lameter * 4747c839120SChristoph Lameter * Overstep mode specifies how overstep should handled: 4757c839120SChristoph Lameter * 0 No overstepping 4767c839120SChristoph Lameter * 1 Overstepping half of threshold 4777c839120SChristoph Lameter * -1 Overstepping minus half of threshold 4787c839120SChristoph Lameter */ 47975ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone, 48075ef7184SMel Gorman enum zone_stat_item item, long delta, int overstep_mode) 4817c839120SChristoph Lameter { 4827c839120SChristoph Lameter struct per_cpu_pageset __percpu *pcp = zone->pageset; 4837c839120SChristoph Lameter s8 __percpu *p = pcp->vm_stat_diff + item; 4847c839120SChristoph Lameter long o, n, t, z; 4857c839120SChristoph Lameter 4867c839120SChristoph Lameter do { 4877c839120SChristoph Lameter z = 0; /* overflow to zone counters */ 4887c839120SChristoph Lameter 4897c839120SChristoph Lameter /* 4907c839120SChristoph Lameter * The fetching of the stat_threshold is racy. We may apply 4917c839120SChristoph Lameter * a counter threshold to the wrong the cpu if we get 492d3bc2367SChristoph Lameter * rescheduled while executing here. However, the next 493d3bc2367SChristoph Lameter * counter update will apply the threshold again and 494d3bc2367SChristoph Lameter * therefore bring the counter under the threshold again. 495d3bc2367SChristoph Lameter * 496d3bc2367SChristoph Lameter * Most of the time the thresholds are the same anyways 497d3bc2367SChristoph Lameter * for all cpus in a zone. 4987c839120SChristoph Lameter */ 4997c839120SChristoph Lameter t = this_cpu_read(pcp->stat_threshold); 5007c839120SChristoph Lameter 5017c839120SChristoph Lameter o = this_cpu_read(*p); 5027c839120SChristoph Lameter n = delta + o; 5037c839120SChristoph Lameter 5047c839120SChristoph Lameter if (n > t || n < -t) { 5057c839120SChristoph Lameter int os = overstep_mode * (t >> 1) ; 5067c839120SChristoph Lameter 5077c839120SChristoph Lameter /* Overflow must be added to zone counters */ 5087c839120SChristoph Lameter z = n + os; 5097c839120SChristoph Lameter n = -os; 5107c839120SChristoph Lameter } 5117c839120SChristoph Lameter } while (this_cpu_cmpxchg(*p, o, n) != o); 5127c839120SChristoph Lameter 5137c839120SChristoph Lameter if (z) 5147c839120SChristoph Lameter zone_page_state_add(z, zone, item); 5157c839120SChristoph Lameter } 5167c839120SChristoph Lameter 5177c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 5186cdb18adSHeiko Carstens long delta) 5197c839120SChristoph Lameter { 52075ef7184SMel Gorman mod_zone_state(zone, item, delta, 0); 5217c839120SChristoph Lameter } 5227c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 5237c839120SChristoph Lameter 5247c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 5257c839120SChristoph Lameter { 52675ef7184SMel Gorman mod_zone_state(page_zone(page), item, 1, 1); 5277c839120SChristoph Lameter } 5287c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 5297c839120SChristoph Lameter 5307c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 5317c839120SChristoph Lameter { 53275ef7184SMel Gorman mod_zone_state(page_zone(page), item, -1, -1); 5337c839120SChristoph Lameter } 5347c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 53575ef7184SMel Gorman 53675ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat, 53775ef7184SMel Gorman enum node_stat_item item, int delta, int overstep_mode) 53875ef7184SMel Gorman { 53975ef7184SMel Gorman struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 54075ef7184SMel Gorman s8 __percpu *p = pcp->vm_node_stat_diff + item; 54175ef7184SMel Gorman long o, n, t, z; 54275ef7184SMel Gorman 54375ef7184SMel Gorman do { 54475ef7184SMel Gorman z = 0; /* overflow to node counters */ 54575ef7184SMel Gorman 54675ef7184SMel Gorman /* 54775ef7184SMel Gorman * The fetching of the stat_threshold is racy. We may apply 54875ef7184SMel Gorman * a counter threshold to the wrong the cpu if we get 54975ef7184SMel Gorman * rescheduled while executing here. However, the next 55075ef7184SMel Gorman * counter update will apply the threshold again and 55175ef7184SMel Gorman * therefore bring the counter under the threshold again. 55275ef7184SMel Gorman * 55375ef7184SMel Gorman * Most of the time the thresholds are the same anyways 55475ef7184SMel Gorman * for all cpus in a node. 55575ef7184SMel Gorman */ 55675ef7184SMel Gorman t = this_cpu_read(pcp->stat_threshold); 55775ef7184SMel Gorman 55875ef7184SMel Gorman o = this_cpu_read(*p); 55975ef7184SMel Gorman n = delta + o; 56075ef7184SMel Gorman 56175ef7184SMel Gorman if (n > t || n < -t) { 56275ef7184SMel Gorman int os = overstep_mode * (t >> 1) ; 56375ef7184SMel Gorman 56475ef7184SMel Gorman /* Overflow must be added to node counters */ 56575ef7184SMel Gorman z = n + os; 56675ef7184SMel Gorman n = -os; 56775ef7184SMel Gorman } 56875ef7184SMel Gorman } while (this_cpu_cmpxchg(*p, o, n) != o); 56975ef7184SMel Gorman 57075ef7184SMel Gorman if (z) 57175ef7184SMel Gorman node_page_state_add(z, pgdat, item); 57275ef7184SMel Gorman } 57375ef7184SMel Gorman 57475ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 57575ef7184SMel Gorman long delta) 57675ef7184SMel Gorman { 57775ef7184SMel Gorman mod_node_state(pgdat, item, delta, 0); 57875ef7184SMel Gorman } 57975ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 58075ef7184SMel Gorman 58175ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 58275ef7184SMel Gorman { 58375ef7184SMel Gorman mod_node_state(pgdat, item, 1, 1); 58475ef7184SMel Gorman } 58575ef7184SMel Gorman 58675ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 58775ef7184SMel Gorman { 58875ef7184SMel Gorman mod_node_state(page_pgdat(page), item, 1, 1); 58975ef7184SMel Gorman } 59075ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 59175ef7184SMel Gorman 59275ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 59375ef7184SMel Gorman { 59475ef7184SMel Gorman mod_node_state(page_pgdat(page), item, -1, -1); 59575ef7184SMel Gorman } 59675ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 5977c839120SChristoph Lameter #else 5987c839120SChristoph Lameter /* 5997c839120SChristoph Lameter * Use interrupt disable to serialize counter updates 6007c839120SChristoph Lameter */ 6017c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 6026cdb18adSHeiko Carstens long delta) 6037c839120SChristoph Lameter { 6047c839120SChristoph Lameter unsigned long flags; 6057c839120SChristoph Lameter 6067c839120SChristoph Lameter local_irq_save(flags); 6077c839120SChristoph Lameter __mod_zone_page_state(zone, item, delta); 6087c839120SChristoph Lameter local_irq_restore(flags); 6097c839120SChristoph Lameter } 6107c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state); 6117c839120SChristoph Lameter 6122244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item) 6132244b95aSChristoph Lameter { 6142244b95aSChristoph Lameter unsigned long flags; 6152244b95aSChristoph Lameter struct zone *zone; 6162244b95aSChristoph Lameter 6172244b95aSChristoph Lameter zone = page_zone(page); 6182244b95aSChristoph Lameter local_irq_save(flags); 619ca889e6cSChristoph Lameter __inc_zone_state(zone, item); 6202244b95aSChristoph Lameter local_irq_restore(flags); 6212244b95aSChristoph Lameter } 6222244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state); 6232244b95aSChristoph Lameter 6242244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item) 6252244b95aSChristoph Lameter { 6262244b95aSChristoph Lameter unsigned long flags; 6272244b95aSChristoph Lameter 6282244b95aSChristoph Lameter local_irq_save(flags); 629a302eb4eSChristoph Lameter __dec_zone_page_state(page, item); 6302244b95aSChristoph Lameter local_irq_restore(flags); 6312244b95aSChristoph Lameter } 6322244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state); 6332244b95aSChristoph Lameter 63475ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 63575ef7184SMel Gorman { 63675ef7184SMel Gorman unsigned long flags; 63775ef7184SMel Gorman 63875ef7184SMel Gorman local_irq_save(flags); 63975ef7184SMel Gorman __inc_node_state(pgdat, item); 64075ef7184SMel Gorman local_irq_restore(flags); 64175ef7184SMel Gorman } 64275ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state); 64375ef7184SMel Gorman 64475ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 64575ef7184SMel Gorman long delta) 64675ef7184SMel Gorman { 64775ef7184SMel Gorman unsigned long flags; 64875ef7184SMel Gorman 64975ef7184SMel Gorman local_irq_save(flags); 65075ef7184SMel Gorman __mod_node_page_state(pgdat, item, delta); 65175ef7184SMel Gorman local_irq_restore(flags); 65275ef7184SMel Gorman } 65375ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state); 65475ef7184SMel Gorman 65575ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item) 65675ef7184SMel Gorman { 65775ef7184SMel Gorman unsigned long flags; 65875ef7184SMel Gorman struct pglist_data *pgdat; 65975ef7184SMel Gorman 66075ef7184SMel Gorman pgdat = page_pgdat(page); 66175ef7184SMel Gorman local_irq_save(flags); 66275ef7184SMel Gorman __inc_node_state(pgdat, item); 66375ef7184SMel Gorman local_irq_restore(flags); 66475ef7184SMel Gorman } 66575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state); 66675ef7184SMel Gorman 66775ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item) 66875ef7184SMel Gorman { 66975ef7184SMel Gorman unsigned long flags; 67075ef7184SMel Gorman 67175ef7184SMel Gorman local_irq_save(flags); 67275ef7184SMel Gorman __dec_node_page_state(page, item); 67375ef7184SMel Gorman local_irq_restore(flags); 67475ef7184SMel Gorman } 67575ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state); 67675ef7184SMel Gorman #endif 6777cc36bbdSChristoph Lameter 6787cc36bbdSChristoph Lameter /* 6797cc36bbdSChristoph Lameter * Fold a differential into the global counters. 6807cc36bbdSChristoph Lameter * Returns the number of counters updated. 6817cc36bbdSChristoph Lameter */ 6823a321d2aSKemi Wang #ifdef CONFIG_NUMA 6833a321d2aSKemi Wang static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) 6843a321d2aSKemi Wang { 6853a321d2aSKemi Wang int i; 6863a321d2aSKemi Wang int changes = 0; 6873a321d2aSKemi Wang 6883a321d2aSKemi Wang for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 6893a321d2aSKemi Wang if (zone_diff[i]) { 6903a321d2aSKemi Wang atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 6913a321d2aSKemi Wang changes++; 6923a321d2aSKemi Wang } 6933a321d2aSKemi Wang 6943a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 6953a321d2aSKemi Wang if (numa_diff[i]) { 6963a321d2aSKemi Wang atomic_long_add(numa_diff[i], &vm_numa_stat[i]); 6973a321d2aSKemi Wang changes++; 6983a321d2aSKemi Wang } 6993a321d2aSKemi Wang 7003a321d2aSKemi Wang for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 7013a321d2aSKemi Wang if (node_diff[i]) { 7023a321d2aSKemi Wang atomic_long_add(node_diff[i], &vm_node_stat[i]); 7033a321d2aSKemi Wang changes++; 7043a321d2aSKemi Wang } 7053a321d2aSKemi Wang return changes; 7063a321d2aSKemi Wang } 7073a321d2aSKemi Wang #else 70875ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff) 7094edb0748SChristoph Lameter { 7104edb0748SChristoph Lameter int i; 7117cc36bbdSChristoph Lameter int changes = 0; 7124edb0748SChristoph Lameter 7134edb0748SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 71475ef7184SMel Gorman if (zone_diff[i]) { 71575ef7184SMel Gorman atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 71675ef7184SMel Gorman changes++; 71775ef7184SMel Gorman } 71875ef7184SMel Gorman 71975ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 72075ef7184SMel Gorman if (node_diff[i]) { 72175ef7184SMel Gorman atomic_long_add(node_diff[i], &vm_node_stat[i]); 7227cc36bbdSChristoph Lameter changes++; 7237cc36bbdSChristoph Lameter } 7247cc36bbdSChristoph Lameter return changes; 7254edb0748SChristoph Lameter } 7263a321d2aSKemi Wang #endif /* CONFIG_NUMA */ 7274edb0748SChristoph Lameter 7282244b95aSChristoph Lameter /* 7292bb921e5SChristoph Lameter * Update the zone counters for the current cpu. 730a7f75e25SChristoph Lameter * 7314037d452SChristoph Lameter * Note that refresh_cpu_vm_stats strives to only access 7324037d452SChristoph Lameter * node local memory. The per cpu pagesets on remote zones are placed 7334037d452SChristoph Lameter * in the memory local to the processor using that pageset. So the 7344037d452SChristoph Lameter * loop over all zones will access a series of cachelines local to 7354037d452SChristoph Lameter * the processor. 7364037d452SChristoph Lameter * 7374037d452SChristoph Lameter * The call to zone_page_state_add updates the cachelines with the 7384037d452SChristoph Lameter * statistics in the remote zone struct as well as the global cachelines 7394037d452SChristoph Lameter * with the global counters. These could cause remote node cache line 7404037d452SChristoph Lameter * bouncing and will have to be only done when necessary. 7417cc36bbdSChristoph Lameter * 7427cc36bbdSChristoph Lameter * The function returns the number of global counters updated. 7432244b95aSChristoph Lameter */ 7440eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets) 7452244b95aSChristoph Lameter { 74675ef7184SMel Gorman struct pglist_data *pgdat; 7472244b95aSChristoph Lameter struct zone *zone; 7482244b95aSChristoph Lameter int i; 74975ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 7503a321d2aSKemi Wang #ifdef CONFIG_NUMA 7513a321d2aSKemi Wang int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; 7523a321d2aSKemi Wang #endif 75375ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 7547cc36bbdSChristoph Lameter int changes = 0; 7552244b95aSChristoph Lameter 756ee99c71cSKOSAKI Motohiro for_each_populated_zone(zone) { 757fbc2edb0SChristoph Lameter struct per_cpu_pageset __percpu *p = zone->pageset; 7582244b95aSChristoph Lameter 759fbc2edb0SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 760a7f75e25SChristoph Lameter int v; 761a7f75e25SChristoph Lameter 762fbc2edb0SChristoph Lameter v = this_cpu_xchg(p->vm_stat_diff[i], 0); 763fbc2edb0SChristoph Lameter if (v) { 764fbc2edb0SChristoph Lameter 765a7f75e25SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 76675ef7184SMel Gorman global_zone_diff[i] += v; 7674037d452SChristoph Lameter #ifdef CONFIG_NUMA 7684037d452SChristoph Lameter /* 3 seconds idle till flush */ 769fbc2edb0SChristoph Lameter __this_cpu_write(p->expire, 3); 7704037d452SChristoph Lameter #endif 7712244b95aSChristoph Lameter } 772fbc2edb0SChristoph Lameter } 7734037d452SChristoph Lameter #ifdef CONFIG_NUMA 7743a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { 7753a321d2aSKemi Wang int v; 7763a321d2aSKemi Wang 7773a321d2aSKemi Wang v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0); 7783a321d2aSKemi Wang if (v) { 7793a321d2aSKemi Wang 7803a321d2aSKemi Wang atomic_long_add(v, &zone->vm_numa_stat[i]); 7813a321d2aSKemi Wang global_numa_diff[i] += v; 7823a321d2aSKemi Wang __this_cpu_write(p->expire, 3); 7833a321d2aSKemi Wang } 7843a321d2aSKemi Wang } 7853a321d2aSKemi Wang 7860eb77e98SChristoph Lameter if (do_pagesets) { 7870eb77e98SChristoph Lameter cond_resched(); 7884037d452SChristoph Lameter /* 7894037d452SChristoph Lameter * Deal with draining the remote pageset of this 7904037d452SChristoph Lameter * processor 7914037d452SChristoph Lameter * 7924037d452SChristoph Lameter * Check if there are pages remaining in this pageset 7934037d452SChristoph Lameter * if not then there is nothing to expire. 7944037d452SChristoph Lameter */ 795fbc2edb0SChristoph Lameter if (!__this_cpu_read(p->expire) || 796fbc2edb0SChristoph Lameter !__this_cpu_read(p->pcp.count)) 7974037d452SChristoph Lameter continue; 7984037d452SChristoph Lameter 7994037d452SChristoph Lameter /* 8004037d452SChristoph Lameter * We never drain zones local to this processor. 8014037d452SChristoph Lameter */ 8024037d452SChristoph Lameter if (zone_to_nid(zone) == numa_node_id()) { 803fbc2edb0SChristoph Lameter __this_cpu_write(p->expire, 0); 8044037d452SChristoph Lameter continue; 8054037d452SChristoph Lameter } 8064037d452SChristoph Lameter 807fbc2edb0SChristoph Lameter if (__this_cpu_dec_return(p->expire)) 8084037d452SChristoph Lameter continue; 8094037d452SChristoph Lameter 8107cc36bbdSChristoph Lameter if (__this_cpu_read(p->pcp.count)) { 8117c8e0181SChristoph Lameter drain_zone_pages(zone, this_cpu_ptr(&p->pcp)); 8127cc36bbdSChristoph Lameter changes++; 8137cc36bbdSChristoph Lameter } 8140eb77e98SChristoph Lameter } 8154037d452SChristoph Lameter #endif 8162244b95aSChristoph Lameter } 81775ef7184SMel Gorman 81875ef7184SMel Gorman for_each_online_pgdat(pgdat) { 81975ef7184SMel Gorman struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats; 82075ef7184SMel Gorman 82175ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 82275ef7184SMel Gorman int v; 82375ef7184SMel Gorman 82475ef7184SMel Gorman v = this_cpu_xchg(p->vm_node_stat_diff[i], 0); 82575ef7184SMel Gorman if (v) { 82675ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 82775ef7184SMel Gorman global_node_diff[i] += v; 82875ef7184SMel Gorman } 82975ef7184SMel Gorman } 83075ef7184SMel Gorman } 83175ef7184SMel Gorman 8323a321d2aSKemi Wang #ifdef CONFIG_NUMA 8333a321d2aSKemi Wang changes += fold_diff(global_zone_diff, global_numa_diff, 8343a321d2aSKemi Wang global_node_diff); 8353a321d2aSKemi Wang #else 83675ef7184SMel Gorman changes += fold_diff(global_zone_diff, global_node_diff); 8373a321d2aSKemi Wang #endif 8387cc36bbdSChristoph Lameter return changes; 8392244b95aSChristoph Lameter } 8402244b95aSChristoph Lameter 84140f4b1eaSCody P Schafer /* 8422bb921e5SChristoph Lameter * Fold the data for an offline cpu into the global array. 8432bb921e5SChristoph Lameter * There cannot be any access by the offline cpu and therefore 8442bb921e5SChristoph Lameter * synchronization is simplified. 8452bb921e5SChristoph Lameter */ 8462bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu) 8472bb921e5SChristoph Lameter { 84875ef7184SMel Gorman struct pglist_data *pgdat; 8492bb921e5SChristoph Lameter struct zone *zone; 8502bb921e5SChristoph Lameter int i; 85175ef7184SMel Gorman int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 8523a321d2aSKemi Wang #ifdef CONFIG_NUMA 8533a321d2aSKemi Wang int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; 8543a321d2aSKemi Wang #endif 85575ef7184SMel Gorman int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 8562bb921e5SChristoph Lameter 8572bb921e5SChristoph Lameter for_each_populated_zone(zone) { 8582bb921e5SChristoph Lameter struct per_cpu_pageset *p; 8592bb921e5SChristoph Lameter 8602bb921e5SChristoph Lameter p = per_cpu_ptr(zone->pageset, cpu); 8612bb921e5SChristoph Lameter 8622bb921e5SChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 8632bb921e5SChristoph Lameter if (p->vm_stat_diff[i]) { 8642bb921e5SChristoph Lameter int v; 8652bb921e5SChristoph Lameter 8662bb921e5SChristoph Lameter v = p->vm_stat_diff[i]; 8672bb921e5SChristoph Lameter p->vm_stat_diff[i] = 0; 8682bb921e5SChristoph Lameter atomic_long_add(v, &zone->vm_stat[i]); 86975ef7184SMel Gorman global_zone_diff[i] += v; 8702bb921e5SChristoph Lameter } 8713a321d2aSKemi Wang 8723a321d2aSKemi Wang #ifdef CONFIG_NUMA 8733a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 8743a321d2aSKemi Wang if (p->vm_numa_stat_diff[i]) { 8753a321d2aSKemi Wang int v; 8763a321d2aSKemi Wang 8773a321d2aSKemi Wang v = p->vm_numa_stat_diff[i]; 8783a321d2aSKemi Wang p->vm_numa_stat_diff[i] = 0; 8793a321d2aSKemi Wang atomic_long_add(v, &zone->vm_numa_stat[i]); 8803a321d2aSKemi Wang global_numa_diff[i] += v; 8813a321d2aSKemi Wang } 8823a321d2aSKemi Wang #endif 8832bb921e5SChristoph Lameter } 8842bb921e5SChristoph Lameter 88575ef7184SMel Gorman for_each_online_pgdat(pgdat) { 88675ef7184SMel Gorman struct per_cpu_nodestat *p; 88775ef7184SMel Gorman 88875ef7184SMel Gorman p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); 88975ef7184SMel Gorman 89075ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 89175ef7184SMel Gorman if (p->vm_node_stat_diff[i]) { 89275ef7184SMel Gorman int v; 89375ef7184SMel Gorman 89475ef7184SMel Gorman v = p->vm_node_stat_diff[i]; 89575ef7184SMel Gorman p->vm_node_stat_diff[i] = 0; 89675ef7184SMel Gorman atomic_long_add(v, &pgdat->vm_stat[i]); 89775ef7184SMel Gorman global_node_diff[i] += v; 89875ef7184SMel Gorman } 89975ef7184SMel Gorman } 90075ef7184SMel Gorman 9013a321d2aSKemi Wang #ifdef CONFIG_NUMA 9023a321d2aSKemi Wang fold_diff(global_zone_diff, global_numa_diff, global_node_diff); 9033a321d2aSKemi Wang #else 90475ef7184SMel Gorman fold_diff(global_zone_diff, global_node_diff); 9053a321d2aSKemi Wang #endif 9062bb921e5SChristoph Lameter } 9072bb921e5SChristoph Lameter 9082bb921e5SChristoph Lameter /* 90940f4b1eaSCody P Schafer * this is only called if !populated_zone(zone), which implies no other users of 91040f4b1eaSCody P Schafer * pset->vm_stat_diff[] exsist. 91140f4b1eaSCody P Schafer */ 9125a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) 9135a883813SMinchan Kim { 9145a883813SMinchan Kim int i; 9155a883813SMinchan Kim 9165a883813SMinchan Kim for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 9175a883813SMinchan Kim if (pset->vm_stat_diff[i]) { 9185a883813SMinchan Kim int v = pset->vm_stat_diff[i]; 9195a883813SMinchan Kim pset->vm_stat_diff[i] = 0; 9205a883813SMinchan Kim atomic_long_add(v, &zone->vm_stat[i]); 92175ef7184SMel Gorman atomic_long_add(v, &vm_zone_stat[i]); 9225a883813SMinchan Kim } 9233a321d2aSKemi Wang 9243a321d2aSKemi Wang #ifdef CONFIG_NUMA 9253a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 9263a321d2aSKemi Wang if (pset->vm_numa_stat_diff[i]) { 9273a321d2aSKemi Wang int v = pset->vm_numa_stat_diff[i]; 9283a321d2aSKemi Wang 9293a321d2aSKemi Wang pset->vm_numa_stat_diff[i] = 0; 9303a321d2aSKemi Wang atomic_long_add(v, &zone->vm_numa_stat[i]); 9313a321d2aSKemi Wang atomic_long_add(v, &vm_numa_stat[i]); 9323a321d2aSKemi Wang } 9333a321d2aSKemi Wang #endif 9345a883813SMinchan Kim } 9352244b95aSChristoph Lameter #endif 9362244b95aSChristoph Lameter 937ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA 9383a321d2aSKemi Wang void __inc_numa_state(struct zone *zone, 9393a321d2aSKemi Wang enum numa_stat_item item) 9403a321d2aSKemi Wang { 9413a321d2aSKemi Wang struct per_cpu_pageset __percpu *pcp = zone->pageset; 9421d90ca89SKemi Wang u16 __percpu *p = pcp->vm_numa_stat_diff + item; 9431d90ca89SKemi Wang u16 v; 9443a321d2aSKemi Wang 9453a321d2aSKemi Wang v = __this_cpu_inc_return(*p); 9463a321d2aSKemi Wang 9471d90ca89SKemi Wang if (unlikely(v > NUMA_STATS_THRESHOLD)) { 9481d90ca89SKemi Wang zone_numa_state_add(v, zone, item); 9491d90ca89SKemi Wang __this_cpu_write(*p, 0); 9503a321d2aSKemi Wang } 9513a321d2aSKemi Wang } 9523a321d2aSKemi Wang 953ca889e6cSChristoph Lameter /* 95475ef7184SMel Gorman * Determine the per node value of a stat item. This function 95575ef7184SMel Gorman * is called frequently in a NUMA machine, so try to be as 95675ef7184SMel Gorman * frugal as possible. 957c2d42c16SAndrew Morton */ 95875ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node, 95975ef7184SMel Gorman enum zone_stat_item item) 960c2d42c16SAndrew Morton { 961c2d42c16SAndrew Morton struct zone *zones = NODE_DATA(node)->node_zones; 962e87d59f7SJoonsoo Kim int i; 963e87d59f7SJoonsoo Kim unsigned long count = 0; 964c2d42c16SAndrew Morton 965e87d59f7SJoonsoo Kim for (i = 0; i < MAX_NR_ZONES; i++) 966e87d59f7SJoonsoo Kim count += zone_page_state(zones + i, item); 967e87d59f7SJoonsoo Kim 968e87d59f7SJoonsoo Kim return count; 969c2d42c16SAndrew Morton } 970c2d42c16SAndrew Morton 97163803222SKemi Wang /* 97263803222SKemi Wang * Determine the per node value of a numa stat item. To avoid deviation, 97363803222SKemi Wang * the per cpu stat number in vm_numa_stat_diff[] is also included. 97463803222SKemi Wang */ 9753a321d2aSKemi Wang unsigned long sum_zone_numa_state(int node, 9763a321d2aSKemi Wang enum numa_stat_item item) 9773a321d2aSKemi Wang { 9783a321d2aSKemi Wang struct zone *zones = NODE_DATA(node)->node_zones; 9793a321d2aSKemi Wang int i; 9803a321d2aSKemi Wang unsigned long count = 0; 9813a321d2aSKemi Wang 9823a321d2aSKemi Wang for (i = 0; i < MAX_NR_ZONES; i++) 98363803222SKemi Wang count += zone_numa_state_snapshot(zones + i, item); 9843a321d2aSKemi Wang 9853a321d2aSKemi Wang return count; 9863a321d2aSKemi Wang } 9873a321d2aSKemi Wang 98875ef7184SMel Gorman /* 98975ef7184SMel Gorman * Determine the per node value of a stat item. 99075ef7184SMel Gorman */ 99175ef7184SMel Gorman unsigned long node_page_state(struct pglist_data *pgdat, 99275ef7184SMel Gorman enum node_stat_item item) 99375ef7184SMel Gorman { 99475ef7184SMel Gorman long x = atomic_long_read(&pgdat->vm_stat[item]); 99575ef7184SMel Gorman #ifdef CONFIG_SMP 99675ef7184SMel Gorman if (x < 0) 99775ef7184SMel Gorman x = 0; 99875ef7184SMel Gorman #endif 99975ef7184SMel Gorman return x; 100075ef7184SMel Gorman } 1001ca889e6cSChristoph Lameter #endif 1002ca889e6cSChristoph Lameter 1003d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION 100436deb0beSNamhyung Kim 1005d7a5752cSMel Gorman struct contig_page_info { 1006d7a5752cSMel Gorman unsigned long free_pages; 1007d7a5752cSMel Gorman unsigned long free_blocks_total; 1008d7a5752cSMel Gorman unsigned long free_blocks_suitable; 1009d7a5752cSMel Gorman }; 1010d7a5752cSMel Gorman 1011d7a5752cSMel Gorman /* 1012d7a5752cSMel Gorman * Calculate the number of free pages in a zone, how many contiguous 1013d7a5752cSMel Gorman * pages are free and how many are large enough to satisfy an allocation of 1014d7a5752cSMel Gorman * the target size. Note that this function makes no attempt to estimate 1015d7a5752cSMel Gorman * how many suitable free blocks there *might* be if MOVABLE pages were 1016d7a5752cSMel Gorman * migrated. Calculating that is possible, but expensive and can be 1017d7a5752cSMel Gorman * figured out from userspace 1018d7a5752cSMel Gorman */ 1019d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone, 1020d7a5752cSMel Gorman unsigned int suitable_order, 1021d7a5752cSMel Gorman struct contig_page_info *info) 1022d7a5752cSMel Gorman { 1023d7a5752cSMel Gorman unsigned int order; 1024d7a5752cSMel Gorman 1025d7a5752cSMel Gorman info->free_pages = 0; 1026d7a5752cSMel Gorman info->free_blocks_total = 0; 1027d7a5752cSMel Gorman info->free_blocks_suitable = 0; 1028d7a5752cSMel Gorman 1029d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; order++) { 1030d7a5752cSMel Gorman unsigned long blocks; 1031d7a5752cSMel Gorman 1032d7a5752cSMel Gorman /* Count number of free blocks */ 1033d7a5752cSMel Gorman blocks = zone->free_area[order].nr_free; 1034d7a5752cSMel Gorman info->free_blocks_total += blocks; 1035d7a5752cSMel Gorman 1036d7a5752cSMel Gorman /* Count free base pages */ 1037d7a5752cSMel Gorman info->free_pages += blocks << order; 1038d7a5752cSMel Gorman 1039d7a5752cSMel Gorman /* Count the suitable free blocks */ 1040d7a5752cSMel Gorman if (order >= suitable_order) 1041d7a5752cSMel Gorman info->free_blocks_suitable += blocks << 1042d7a5752cSMel Gorman (order - suitable_order); 1043d7a5752cSMel Gorman } 1044d7a5752cSMel Gorman } 1045f1a5ab12SMel Gorman 1046f1a5ab12SMel Gorman /* 1047f1a5ab12SMel Gorman * A fragmentation index only makes sense if an allocation of a requested 1048f1a5ab12SMel Gorman * size would fail. If that is true, the fragmentation index indicates 1049f1a5ab12SMel Gorman * whether external fragmentation or a lack of memory was the problem. 1050f1a5ab12SMel Gorman * The value can be used to determine if page reclaim or compaction 1051f1a5ab12SMel Gorman * should be used 1052f1a5ab12SMel Gorman */ 105356de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 1054f1a5ab12SMel Gorman { 1055f1a5ab12SMel Gorman unsigned long requested = 1UL << order; 1056f1a5ab12SMel Gorman 105788d6ac40SWen Yang if (WARN_ON_ONCE(order >= MAX_ORDER)) 105888d6ac40SWen Yang return 0; 105988d6ac40SWen Yang 1060f1a5ab12SMel Gorman if (!info->free_blocks_total) 1061f1a5ab12SMel Gorman return 0; 1062f1a5ab12SMel Gorman 1063f1a5ab12SMel Gorman /* Fragmentation index only makes sense when a request would fail */ 1064f1a5ab12SMel Gorman if (info->free_blocks_suitable) 1065f1a5ab12SMel Gorman return -1000; 1066f1a5ab12SMel Gorman 1067f1a5ab12SMel Gorman /* 1068f1a5ab12SMel Gorman * Index is between 0 and 1 so return within 3 decimal places 1069f1a5ab12SMel Gorman * 1070f1a5ab12SMel Gorman * 0 => allocation would fail due to lack of memory 1071f1a5ab12SMel Gorman * 1 => allocation would fail due to fragmentation 1072f1a5ab12SMel Gorman */ 1073f1a5ab12SMel Gorman return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 1074f1a5ab12SMel Gorman } 107556de7263SMel Gorman 107656de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */ 107756de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order) 107856de7263SMel Gorman { 107956de7263SMel Gorman struct contig_page_info info; 108056de7263SMel Gorman 108156de7263SMel Gorman fill_contig_page_info(zone, order, &info); 108256de7263SMel Gorman return __fragmentation_index(order, &info); 108356de7263SMel Gorman } 1084d7a5752cSMel Gorman #endif 1085d7a5752cSMel Gorman 10860d6617c7SDavid Rientjes #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) 1087fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA 1088fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma", 1089fa25c503SKOSAKI Motohiro #else 1090fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) 1091fa25c503SKOSAKI Motohiro #endif 1092fa25c503SKOSAKI Motohiro 1093fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32 1094fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32", 1095fa25c503SKOSAKI Motohiro #else 1096fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) 1097fa25c503SKOSAKI Motohiro #endif 1098fa25c503SKOSAKI Motohiro 1099fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM 1100fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high", 1101fa25c503SKOSAKI Motohiro #else 1102fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) 1103fa25c503SKOSAKI Motohiro #endif 1104fa25c503SKOSAKI Motohiro 1105fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 1106fa25c503SKOSAKI Motohiro TEXT_FOR_HIGHMEM(xx) xx "_movable", 1107fa25c503SKOSAKI Motohiro 1108fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = { 110909316c09SKonstantin Khlebnikov /* enum zone_stat_item countes */ 1110fa25c503SKOSAKI Motohiro "nr_free_pages", 111171c799f4SMinchan Kim "nr_zone_inactive_anon", 111271c799f4SMinchan Kim "nr_zone_active_anon", 111371c799f4SMinchan Kim "nr_zone_inactive_file", 111471c799f4SMinchan Kim "nr_zone_active_file", 111571c799f4SMinchan Kim "nr_zone_unevictable", 11165a1c84b4SMel Gorman "nr_zone_write_pending", 1117fa25c503SKOSAKI Motohiro "nr_mlock", 1118fa25c503SKOSAKI Motohiro "nr_page_table_pages", 1119fa25c503SKOSAKI Motohiro "nr_kernel_stack", 1120fa25c503SKOSAKI Motohiro "nr_bounce", 112191537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC) 112291537feeSMinchan Kim "nr_zspages", 112391537feeSMinchan Kim #endif 11243a321d2aSKemi Wang "nr_free_cma", 11253a321d2aSKemi Wang 11263a321d2aSKemi Wang /* enum numa_stat_item counters */ 1127fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1128fa25c503SKOSAKI Motohiro "numa_hit", 1129fa25c503SKOSAKI Motohiro "numa_miss", 1130fa25c503SKOSAKI Motohiro "numa_foreign", 1131fa25c503SKOSAKI Motohiro "numa_interleave", 1132fa25c503SKOSAKI Motohiro "numa_local", 1133fa25c503SKOSAKI Motohiro "numa_other", 1134fa25c503SKOSAKI Motohiro #endif 113509316c09SKonstantin Khlebnikov 1136599d0c95SMel Gorman /* Node-based counters */ 1137599d0c95SMel Gorman "nr_inactive_anon", 1138599d0c95SMel Gorman "nr_active_anon", 1139599d0c95SMel Gorman "nr_inactive_file", 1140599d0c95SMel Gorman "nr_active_file", 1141599d0c95SMel Gorman "nr_unevictable", 1142385386cfSJohannes Weiner "nr_slab_reclaimable", 1143385386cfSJohannes Weiner "nr_slab_unreclaimable", 1144599d0c95SMel Gorman "nr_isolated_anon", 1145599d0c95SMel Gorman "nr_isolated_file", 114668d48e6aSJohannes Weiner "workingset_nodes", 11471e6b1085SMel Gorman "workingset_refault", 11481e6b1085SMel Gorman "workingset_activate", 11491899ad18SJohannes Weiner "workingset_restore", 11501e6b1085SMel Gorman "workingset_nodereclaim", 115150658e2eSMel Gorman "nr_anon_pages", 115250658e2eSMel Gorman "nr_mapped", 115311fb9989SMel Gorman "nr_file_pages", 115411fb9989SMel Gorman "nr_dirty", 115511fb9989SMel Gorman "nr_writeback", 115611fb9989SMel Gorman "nr_writeback_temp", 115711fb9989SMel Gorman "nr_shmem", 115811fb9989SMel Gorman "nr_shmem_hugepages", 115911fb9989SMel Gorman "nr_shmem_pmdmapped", 116011fb9989SMel Gorman "nr_anon_transparent_hugepages", 116111fb9989SMel Gorman "nr_unstable", 1162c4a25635SMel Gorman "nr_vmscan_write", 1163c4a25635SMel Gorman "nr_vmscan_immediate_reclaim", 1164c4a25635SMel Gorman "nr_dirtied", 1165c4a25635SMel Gorman "nr_written", 1166b29940c1SVlastimil Babka "nr_kernel_misc_reclaimable", 1167599d0c95SMel Gorman 116809316c09SKonstantin Khlebnikov /* enum writeback_stat_item counters */ 1169fa25c503SKOSAKI Motohiro "nr_dirty_threshold", 1170fa25c503SKOSAKI Motohiro "nr_dirty_background_threshold", 1171fa25c503SKOSAKI Motohiro 1172fa25c503SKOSAKI Motohiro #ifdef CONFIG_VM_EVENT_COUNTERS 117309316c09SKonstantin Khlebnikov /* enum vm_event_item counters */ 1174fa25c503SKOSAKI Motohiro "pgpgin", 1175fa25c503SKOSAKI Motohiro "pgpgout", 1176fa25c503SKOSAKI Motohiro "pswpin", 1177fa25c503SKOSAKI Motohiro "pswpout", 1178fa25c503SKOSAKI Motohiro 1179fa25c503SKOSAKI Motohiro TEXTS_FOR_ZONES("pgalloc") 11807cc30fcfSMel Gorman TEXTS_FOR_ZONES("allocstall") 11817cc30fcfSMel Gorman TEXTS_FOR_ZONES("pgskip") 1182fa25c503SKOSAKI Motohiro 1183fa25c503SKOSAKI Motohiro "pgfree", 1184fa25c503SKOSAKI Motohiro "pgactivate", 1185fa25c503SKOSAKI Motohiro "pgdeactivate", 1186f7ad2a6cSShaohua Li "pglazyfree", 1187fa25c503SKOSAKI Motohiro 1188fa25c503SKOSAKI Motohiro "pgfault", 1189fa25c503SKOSAKI Motohiro "pgmajfault", 1190854e9ed0SMinchan Kim "pglazyfreed", 1191fa25c503SKOSAKI Motohiro 1192599d0c95SMel Gorman "pgrefill", 1193599d0c95SMel Gorman "pgsteal_kswapd", 1194599d0c95SMel Gorman "pgsteal_direct", 1195599d0c95SMel Gorman "pgscan_kswapd", 1196599d0c95SMel Gorman "pgscan_direct", 119768243e76SMel Gorman "pgscan_direct_throttle", 1198fa25c503SKOSAKI Motohiro 1199fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA 1200fa25c503SKOSAKI Motohiro "zone_reclaim_failed", 1201fa25c503SKOSAKI Motohiro #endif 1202fa25c503SKOSAKI Motohiro "pginodesteal", 1203fa25c503SKOSAKI Motohiro "slabs_scanned", 1204fa25c503SKOSAKI Motohiro "kswapd_inodesteal", 1205fa25c503SKOSAKI Motohiro "kswapd_low_wmark_hit_quickly", 1206fa25c503SKOSAKI Motohiro "kswapd_high_wmark_hit_quickly", 1207fa25c503SKOSAKI Motohiro "pageoutrun", 1208fa25c503SKOSAKI Motohiro 1209fa25c503SKOSAKI Motohiro "pgrotated", 1210fa25c503SKOSAKI Motohiro 12115509a5d2SDave Hansen "drop_pagecache", 12125509a5d2SDave Hansen "drop_slab", 12138e675f7aSKonstantin Khlebnikov "oom_kill", 12145509a5d2SDave Hansen 121503c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING 121603c5a6e1SMel Gorman "numa_pte_updates", 121772403b4aSMel Gorman "numa_huge_pte_updates", 121803c5a6e1SMel Gorman "numa_hint_faults", 121903c5a6e1SMel Gorman "numa_hint_faults_local", 122003c5a6e1SMel Gorman "numa_pages_migrated", 122103c5a6e1SMel Gorman #endif 12225647bc29SMel Gorman #ifdef CONFIG_MIGRATION 12235647bc29SMel Gorman "pgmigrate_success", 12245647bc29SMel Gorman "pgmigrate_fail", 12255647bc29SMel Gorman #endif 1226fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION 1227397487dbSMel Gorman "compact_migrate_scanned", 1228397487dbSMel Gorman "compact_free_scanned", 1229397487dbSMel Gorman "compact_isolated", 1230fa25c503SKOSAKI Motohiro "compact_stall", 1231fa25c503SKOSAKI Motohiro "compact_fail", 1232fa25c503SKOSAKI Motohiro "compact_success", 1233698b1b30SVlastimil Babka "compact_daemon_wake", 12347f354a54SDavid Rientjes "compact_daemon_migrate_scanned", 12357f354a54SDavid Rientjes "compact_daemon_free_scanned", 1236fa25c503SKOSAKI Motohiro #endif 1237fa25c503SKOSAKI Motohiro 1238fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE 1239fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_success", 1240fa25c503SKOSAKI Motohiro "htlb_buddy_alloc_fail", 1241fa25c503SKOSAKI Motohiro #endif 1242fa25c503SKOSAKI Motohiro "unevictable_pgs_culled", 1243fa25c503SKOSAKI Motohiro "unevictable_pgs_scanned", 1244fa25c503SKOSAKI Motohiro "unevictable_pgs_rescued", 1245fa25c503SKOSAKI Motohiro "unevictable_pgs_mlocked", 1246fa25c503SKOSAKI Motohiro "unevictable_pgs_munlocked", 1247fa25c503SKOSAKI Motohiro "unevictable_pgs_cleared", 1248fa25c503SKOSAKI Motohiro "unevictable_pgs_stranded", 1249fa25c503SKOSAKI Motohiro 1250fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1251fa25c503SKOSAKI Motohiro "thp_fault_alloc", 1252fa25c503SKOSAKI Motohiro "thp_fault_fallback", 1253fa25c503SKOSAKI Motohiro "thp_collapse_alloc", 1254fa25c503SKOSAKI Motohiro "thp_collapse_alloc_failed", 125595ecedcdSKirill A. Shutemov "thp_file_alloc", 125695ecedcdSKirill A. Shutemov "thp_file_mapped", 1257122afea9SKirill A. Shutemov "thp_split_page", 1258122afea9SKirill A. Shutemov "thp_split_page_failed", 1259f9719a03SKirill A. Shutemov "thp_deferred_split_page", 1260122afea9SKirill A. Shutemov "thp_split_pmd", 1261ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 1262ce9311cfSYisheng Xie "thp_split_pud", 1263ce9311cfSYisheng Xie #endif 1264d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc", 1265d8a8e1f0SKirill A. Shutemov "thp_zero_page_alloc_failed", 1266225311a4SHuang Ying "thp_swpout", 1267fe490cc0SHuang Ying "thp_swpout_fallback", 1268fa25c503SKOSAKI Motohiro #endif 126909316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON 127009316c09SKonstantin Khlebnikov "balloon_inflate", 127109316c09SKonstantin Khlebnikov "balloon_deflate", 127209316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION 127309316c09SKonstantin Khlebnikov "balloon_migrate", 127409316c09SKonstantin Khlebnikov #endif 127509316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */ 1276ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH 12776df46865SDave Hansen #ifdef CONFIG_SMP 12789824cf97SDave Hansen "nr_tlb_remote_flush", 12799824cf97SDave Hansen "nr_tlb_remote_flush_received", 128058bc4c34SJann Horn #else 128158bc4c34SJann Horn "", /* nr_tlb_remote_flush */ 128258bc4c34SJann Horn "", /* nr_tlb_remote_flush_received */ 1283ec659934SMel Gorman #endif /* CONFIG_SMP */ 12849824cf97SDave Hansen "nr_tlb_local_flush_all", 12859824cf97SDave Hansen "nr_tlb_local_flush_one", 1286ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */ 1287fa25c503SKOSAKI Motohiro 12884f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE 12894f115147SDavidlohr Bueso "vmacache_find_calls", 12904f115147SDavidlohr Bueso "vmacache_find_hits", 12914f115147SDavidlohr Bueso #endif 1292cbc65df2SHuang Ying #ifdef CONFIG_SWAP 1293cbc65df2SHuang Ying "swap_ra", 1294cbc65df2SHuang Ying "swap_ra_hit", 1295cbc65df2SHuang Ying #endif 1296fa25c503SKOSAKI Motohiro #endif /* CONFIG_VM_EVENTS_COUNTERS */ 1297fa25c503SKOSAKI Motohiro }; 12980d6617c7SDavid Rientjes #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ 1299fa25c503SKOSAKI Motohiro 13003c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ 13013c486871SAndrew Morton defined(CONFIG_PROC_FS) 13023c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos) 13033c486871SAndrew Morton { 13043c486871SAndrew Morton pg_data_t *pgdat; 13053c486871SAndrew Morton loff_t node = *pos; 13063c486871SAndrew Morton 13073c486871SAndrew Morton for (pgdat = first_online_pgdat(); 13083c486871SAndrew Morton pgdat && node; 13093c486871SAndrew Morton pgdat = next_online_pgdat(pgdat)) 13103c486871SAndrew Morton --node; 13113c486871SAndrew Morton 13123c486871SAndrew Morton return pgdat; 13133c486871SAndrew Morton } 13143c486871SAndrew Morton 13153c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 13163c486871SAndrew Morton { 13173c486871SAndrew Morton pg_data_t *pgdat = (pg_data_t *)arg; 13183c486871SAndrew Morton 13193c486871SAndrew Morton (*pos)++; 13203c486871SAndrew Morton return next_online_pgdat(pgdat); 13213c486871SAndrew Morton } 13223c486871SAndrew Morton 13233c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg) 13243c486871SAndrew Morton { 13253c486871SAndrew Morton } 13263c486871SAndrew Morton 1327b2bd8598SDavid Rientjes /* 1328b2bd8598SDavid Rientjes * Walk zones in a node and print using a callback. 1329b2bd8598SDavid Rientjes * If @assert_populated is true, only use callback for zones that are populated. 1330b2bd8598SDavid Rientjes */ 13313c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 1332727c080fSVinayak Menon bool assert_populated, bool nolock, 13333c486871SAndrew Morton void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 13343c486871SAndrew Morton { 13353c486871SAndrew Morton struct zone *zone; 13363c486871SAndrew Morton struct zone *node_zones = pgdat->node_zones; 13373c486871SAndrew Morton unsigned long flags; 13383c486871SAndrew Morton 13393c486871SAndrew Morton for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 1340b2bd8598SDavid Rientjes if (assert_populated && !populated_zone(zone)) 13413c486871SAndrew Morton continue; 13423c486871SAndrew Morton 1343727c080fSVinayak Menon if (!nolock) 13443c486871SAndrew Morton spin_lock_irqsave(&zone->lock, flags); 13453c486871SAndrew Morton print(m, pgdat, zone); 1346727c080fSVinayak Menon if (!nolock) 13473c486871SAndrew Morton spin_unlock_irqrestore(&zone->lock, flags); 13483c486871SAndrew Morton } 13493c486871SAndrew Morton } 13503c486871SAndrew Morton #endif 13513c486871SAndrew Morton 1352d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS 1353467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 1354467c996cSMel Gorman struct zone *zone) 1355467c996cSMel Gorman { 1356467c996cSMel Gorman int order; 1357467c996cSMel Gorman 1358f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1359f6ac2354SChristoph Lameter for (order = 0; order < MAX_ORDER; ++order) 1360f6ac2354SChristoph Lameter seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 1361f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1362f6ac2354SChristoph Lameter } 1363467c996cSMel Gorman 1364467c996cSMel Gorman /* 1365467c996cSMel Gorman * This walks the free areas for each zone. 1366467c996cSMel Gorman */ 1367467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg) 1368467c996cSMel Gorman { 1369467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1370727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, frag_show_print); 1371467c996cSMel Gorman return 0; 1372467c996cSMel Gorman } 1373467c996cSMel Gorman 1374467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m, 1375467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1376467c996cSMel Gorman { 1377467c996cSMel Gorman int order, mtype; 1378467c996cSMel Gorman 1379467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 1380467c996cSMel Gorman seq_printf(m, "Node %4d, zone %8s, type %12s ", 1381467c996cSMel Gorman pgdat->node_id, 1382467c996cSMel Gorman zone->name, 1383467c996cSMel Gorman migratetype_names[mtype]); 1384467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 1385467c996cSMel Gorman unsigned long freecount = 0; 1386467c996cSMel Gorman struct free_area *area; 1387467c996cSMel Gorman struct list_head *curr; 1388467c996cSMel Gorman 1389467c996cSMel Gorman area = &(zone->free_area[order]); 1390467c996cSMel Gorman 1391467c996cSMel Gorman list_for_each(curr, &area->free_list[mtype]) 1392467c996cSMel Gorman freecount++; 1393467c996cSMel Gorman seq_printf(m, "%6lu ", freecount); 1394467c996cSMel Gorman } 1395467c996cSMel Gorman seq_putc(m, '\n'); 1396467c996cSMel Gorman } 1397467c996cSMel Gorman } 1398467c996cSMel Gorman 1399467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */ 1400467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg) 1401467c996cSMel Gorman { 1402467c996cSMel Gorman int order; 1403467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1404467c996cSMel Gorman 1405467c996cSMel Gorman /* Print header */ 1406467c996cSMel Gorman seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 1407467c996cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) 1408467c996cSMel Gorman seq_printf(m, "%6d ", order); 1409467c996cSMel Gorman seq_putc(m, '\n'); 1410467c996cSMel Gorman 1411727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print); 1412467c996cSMel Gorman 1413467c996cSMel Gorman return 0; 1414467c996cSMel Gorman } 1415467c996cSMel Gorman 1416467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m, 1417467c996cSMel Gorman pg_data_t *pgdat, struct zone *zone) 1418467c996cSMel Gorman { 1419467c996cSMel Gorman int mtype; 1420467c996cSMel Gorman unsigned long pfn; 1421467c996cSMel Gorman unsigned long start_pfn = zone->zone_start_pfn; 1422108bcc96SCody P Schafer unsigned long end_pfn = zone_end_pfn(zone); 1423467c996cSMel Gorman unsigned long count[MIGRATE_TYPES] = { 0, }; 1424467c996cSMel Gorman 1425467c996cSMel Gorman for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 1426467c996cSMel Gorman struct page *page; 1427467c996cSMel Gorman 1428d336e94eSMichal Hocko page = pfn_to_online_page(pfn); 1429d336e94eSMichal Hocko if (!page) 1430467c996cSMel Gorman continue; 1431467c996cSMel Gorman 1432eb33575cSMel Gorman /* Watch for unexpected holes punched in the memmap */ 1433eb33575cSMel Gorman if (!memmap_valid_within(pfn, page, zone)) 1434e80d6a24SMel Gorman continue; 1435eb33575cSMel Gorman 1436a91c43c7SJoonsoo Kim if (page_zone(page) != zone) 1437a91c43c7SJoonsoo Kim continue; 1438a91c43c7SJoonsoo Kim 1439467c996cSMel Gorman mtype = get_pageblock_migratetype(page); 1440467c996cSMel Gorman 1441e80d6a24SMel Gorman if (mtype < MIGRATE_TYPES) 1442467c996cSMel Gorman count[mtype]++; 1443467c996cSMel Gorman } 1444467c996cSMel Gorman 1445467c996cSMel Gorman /* Print counts */ 1446467c996cSMel Gorman seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1447467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1448467c996cSMel Gorman seq_printf(m, "%12lu ", count[mtype]); 1449467c996cSMel Gorman seq_putc(m, '\n'); 1450467c996cSMel Gorman } 1451467c996cSMel Gorman 1452f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */ 1453467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 1454467c996cSMel Gorman { 1455467c996cSMel Gorman int mtype; 1456467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1457467c996cSMel Gorman 1458467c996cSMel Gorman seq_printf(m, "\n%-23s", "Number of blocks type "); 1459467c996cSMel Gorman for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1460467c996cSMel Gorman seq_printf(m, "%12s ", migratetype_names[mtype]); 1461467c996cSMel Gorman seq_putc(m, '\n'); 1462727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, 1463727c080fSVinayak Menon pagetypeinfo_showblockcount_print); 1464467c996cSMel Gorman 1465467c996cSMel Gorman return 0; 1466467c996cSMel Gorman } 1467467c996cSMel Gorman 146848c96a36SJoonsoo Kim /* 146948c96a36SJoonsoo Kim * Print out the number of pageblocks for each migratetype that contain pages 147048c96a36SJoonsoo Kim * of other types. This gives an indication of how well fallbacks are being 147148c96a36SJoonsoo Kim * contained by rmqueue_fallback(). It requires information from PAGE_OWNER 147248c96a36SJoonsoo Kim * to determine what is going on 147348c96a36SJoonsoo Kim */ 147448c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) 147548c96a36SJoonsoo Kim { 147648c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER 147748c96a36SJoonsoo Kim int mtype; 147848c96a36SJoonsoo Kim 14797dd80b8aSVlastimil Babka if (!static_branch_unlikely(&page_owner_inited)) 148048c96a36SJoonsoo Kim return; 148148c96a36SJoonsoo Kim 148248c96a36SJoonsoo Kim drain_all_pages(NULL); 148348c96a36SJoonsoo Kim 148448c96a36SJoonsoo Kim seq_printf(m, "\n%-23s", "Number of mixed blocks "); 148548c96a36SJoonsoo Kim for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 148648c96a36SJoonsoo Kim seq_printf(m, "%12s ", migratetype_names[mtype]); 148748c96a36SJoonsoo Kim seq_putc(m, '\n'); 148848c96a36SJoonsoo Kim 1489727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, true, 1490727c080fSVinayak Menon pagetypeinfo_showmixedcount_print); 149148c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */ 149248c96a36SJoonsoo Kim } 149348c96a36SJoonsoo Kim 1494467c996cSMel Gorman /* 1495467c996cSMel Gorman * This prints out statistics in relation to grouping pages by mobility. 1496467c996cSMel Gorman * It is expensive to collect so do not constantly read the file. 1497467c996cSMel Gorman */ 1498467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg) 1499467c996cSMel Gorman { 1500467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1501467c996cSMel Gorman 150241b25a37SKOSAKI Motohiro /* check memoryless node */ 1503a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 150441b25a37SKOSAKI Motohiro return 0; 150541b25a37SKOSAKI Motohiro 1506467c996cSMel Gorman seq_printf(m, "Page block order: %d\n", pageblock_order); 1507467c996cSMel Gorman seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 1508467c996cSMel Gorman seq_putc(m, '\n'); 1509467c996cSMel Gorman pagetypeinfo_showfree(m, pgdat); 1510467c996cSMel Gorman pagetypeinfo_showblockcount(m, pgdat); 151148c96a36SJoonsoo Kim pagetypeinfo_showmixedcount(m, pgdat); 1512467c996cSMel Gorman 1513f6ac2354SChristoph Lameter return 0; 1514f6ac2354SChristoph Lameter } 1515f6ac2354SChristoph Lameter 15168f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = { 1517f6ac2354SChristoph Lameter .start = frag_start, 1518f6ac2354SChristoph Lameter .next = frag_next, 1519f6ac2354SChristoph Lameter .stop = frag_stop, 1520f6ac2354SChristoph Lameter .show = frag_show, 1521f6ac2354SChristoph Lameter }; 1522f6ac2354SChristoph Lameter 152374e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = { 1524467c996cSMel Gorman .start = frag_start, 1525467c996cSMel Gorman .next = frag_next, 1526467c996cSMel Gorman .stop = frag_stop, 1527467c996cSMel Gorman .show = pagetypeinfo_show, 1528467c996cSMel Gorman }; 1529467c996cSMel Gorman 1530e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) 1531e2ecc8a7SMel Gorman { 1532e2ecc8a7SMel Gorman int zid; 1533e2ecc8a7SMel Gorman 1534e2ecc8a7SMel Gorman for (zid = 0; zid < MAX_NR_ZONES; zid++) { 1535e2ecc8a7SMel Gorman struct zone *compare = &pgdat->node_zones[zid]; 1536e2ecc8a7SMel Gorman 1537e2ecc8a7SMel Gorman if (populated_zone(compare)) 1538e2ecc8a7SMel Gorman return zone == compare; 1539e2ecc8a7SMel Gorman } 1540e2ecc8a7SMel Gorman 1541e2ecc8a7SMel Gorman return false; 1542e2ecc8a7SMel Gorman } 1543e2ecc8a7SMel Gorman 1544467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 1545467c996cSMel Gorman struct zone *zone) 1546f6ac2354SChristoph Lameter { 1547f6ac2354SChristoph Lameter int i; 1548f6ac2354SChristoph Lameter seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 1549e2ecc8a7SMel Gorman if (is_zone_first_populated(pgdat, zone)) { 1550e2ecc8a7SMel Gorman seq_printf(m, "\n per-node stats"); 1551e2ecc8a7SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1552e2ecc8a7SMel Gorman seq_printf(m, "\n %-12s %lu", 15533a321d2aSKemi Wang vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + 15543a321d2aSKemi Wang NR_VM_NUMA_STAT_ITEMS], 1555e2ecc8a7SMel Gorman node_page_state(pgdat, i)); 1556e2ecc8a7SMel Gorman } 1557e2ecc8a7SMel Gorman } 1558f6ac2354SChristoph Lameter seq_printf(m, 1559f6ac2354SChristoph Lameter "\n pages free %lu" 1560f6ac2354SChristoph Lameter "\n min %lu" 1561f6ac2354SChristoph Lameter "\n low %lu" 1562f6ac2354SChristoph Lameter "\n high %lu" 1563f6ac2354SChristoph Lameter "\n spanned %lu" 15649feedc9dSJiang Liu "\n present %lu" 15659feedc9dSJiang Liu "\n managed %lu", 156688f5acf8SMel Gorman zone_page_state(zone, NR_FREE_PAGES), 156741858966SMel Gorman min_wmark_pages(zone), 156841858966SMel Gorman low_wmark_pages(zone), 156941858966SMel Gorman high_wmark_pages(zone), 1570f6ac2354SChristoph Lameter zone->spanned_pages, 15719feedc9dSJiang Liu zone->present_pages, 1572*9705bea5SArun KS zone_managed_pages(zone)); 15732244b95aSChristoph Lameter 1574f6ac2354SChristoph Lameter seq_printf(m, 15753484b2deSMel Gorman "\n protection: (%ld", 1576f6ac2354SChristoph Lameter zone->lowmem_reserve[0]); 1577f6ac2354SChristoph Lameter for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 15783484b2deSMel Gorman seq_printf(m, ", %ld", zone->lowmem_reserve[i]); 15797dfb8bf3SDavid Rientjes seq_putc(m, ')'); 15807dfb8bf3SDavid Rientjes 15817dfb8bf3SDavid Rientjes /* If unpopulated, no other information is useful */ 15827dfb8bf3SDavid Rientjes if (!populated_zone(zone)) { 15837dfb8bf3SDavid Rientjes seq_putc(m, '\n'); 15847dfb8bf3SDavid Rientjes return; 15857dfb8bf3SDavid Rientjes } 15867dfb8bf3SDavid Rientjes 15877dfb8bf3SDavid Rientjes for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 15887dfb8bf3SDavid Rientjes seq_printf(m, "\n %-12s %lu", vmstat_text[i], 15897dfb8bf3SDavid Rientjes zone_page_state(zone, i)); 15907dfb8bf3SDavid Rientjes 15913a321d2aSKemi Wang #ifdef CONFIG_NUMA 15923a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 15933a321d2aSKemi Wang seq_printf(m, "\n %-12s %lu", 15943a321d2aSKemi Wang vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], 159563803222SKemi Wang zone_numa_state_snapshot(zone, i)); 15963a321d2aSKemi Wang #endif 15973a321d2aSKemi Wang 15987dfb8bf3SDavid Rientjes seq_printf(m, "\n pagesets"); 1599f6ac2354SChristoph Lameter for_each_online_cpu(i) { 1600f6ac2354SChristoph Lameter struct per_cpu_pageset *pageset; 1601f6ac2354SChristoph Lameter 160299dcc3e5SChristoph Lameter pageset = per_cpu_ptr(zone->pageset, i); 1603f6ac2354SChristoph Lameter seq_printf(m, 16043dfa5721SChristoph Lameter "\n cpu: %i" 1605f6ac2354SChristoph Lameter "\n count: %i" 1606f6ac2354SChristoph Lameter "\n high: %i" 1607f6ac2354SChristoph Lameter "\n batch: %i", 16083dfa5721SChristoph Lameter i, 16093dfa5721SChristoph Lameter pageset->pcp.count, 16103dfa5721SChristoph Lameter pageset->pcp.high, 16113dfa5721SChristoph Lameter pageset->pcp.batch); 1612df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 1613df9ecabaSChristoph Lameter seq_printf(m, "\n vm stats threshold: %d", 1614df9ecabaSChristoph Lameter pageset->stat_threshold); 1615df9ecabaSChristoph Lameter #endif 1616f6ac2354SChristoph Lameter } 1617f6ac2354SChristoph Lameter seq_printf(m, 1618599d0c95SMel Gorman "\n node_unreclaimable: %u" 16193a50d14dSAndrey Ryabinin "\n start_pfn: %lu", 1620c73322d0SJohannes Weiner pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, 16213a50d14dSAndrey Ryabinin zone->zone_start_pfn); 1622f6ac2354SChristoph Lameter seq_putc(m, '\n'); 1623f6ac2354SChristoph Lameter } 1624467c996cSMel Gorman 1625467c996cSMel Gorman /* 1626b2bd8598SDavid Rientjes * Output information about zones in @pgdat. All zones are printed regardless 1627b2bd8598SDavid Rientjes * of whether they are populated or not: lowmem_reserve_ratio operates on the 1628b2bd8598SDavid Rientjes * set of all zones and userspace would not be aware of such zones if they are 1629b2bd8598SDavid Rientjes * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio). 1630467c996cSMel Gorman */ 1631467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg) 1632467c996cSMel Gorman { 1633467c996cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 1634727c080fSVinayak Menon walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print); 1635f6ac2354SChristoph Lameter return 0; 1636f6ac2354SChristoph Lameter } 1637f6ac2354SChristoph Lameter 16385c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = { 1639f6ac2354SChristoph Lameter .start = frag_start, /* iterate over all zones. The same as in 1640f6ac2354SChristoph Lameter * fragmentation. */ 1641f6ac2354SChristoph Lameter .next = frag_next, 1642f6ac2354SChristoph Lameter .stop = frag_stop, 1643f6ac2354SChristoph Lameter .show = zoneinfo_show, 1644f6ac2354SChristoph Lameter }; 1645f6ac2354SChristoph Lameter 164679da826aSMichael Rubin enum writeback_stat_item { 164779da826aSMichael Rubin NR_DIRTY_THRESHOLD, 164879da826aSMichael Rubin NR_DIRTY_BG_THRESHOLD, 164979da826aSMichael Rubin NR_VM_WRITEBACK_STAT_ITEMS, 165079da826aSMichael Rubin }; 165179da826aSMichael Rubin 1652f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos) 1653f6ac2354SChristoph Lameter { 16542244b95aSChristoph Lameter unsigned long *v; 165579da826aSMichael Rubin int i, stat_items_size; 1656f6ac2354SChristoph Lameter 1657f6ac2354SChristoph Lameter if (*pos >= ARRAY_SIZE(vmstat_text)) 1658f6ac2354SChristoph Lameter return NULL; 165979da826aSMichael Rubin stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + 16603a321d2aSKemi Wang NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) + 166175ef7184SMel Gorman NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + 166279da826aSMichael Rubin NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); 1663f6ac2354SChristoph Lameter 1664f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 166579da826aSMichael Rubin stat_items_size += sizeof(struct vm_event_state); 1666f8891e5eSChristoph Lameter #endif 166779da826aSMichael Rubin 1668f0ecf25aSJann Horn BUILD_BUG_ON(stat_items_size != 1669f0ecf25aSJann Horn ARRAY_SIZE(vmstat_text) * sizeof(unsigned long)); 167079da826aSMichael Rubin v = kmalloc(stat_items_size, GFP_KERNEL); 16712244b95aSChristoph Lameter m->private = v; 16722244b95aSChristoph Lameter if (!v) 1673f6ac2354SChristoph Lameter return ERR_PTR(-ENOMEM); 16742244b95aSChristoph Lameter for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1675c41f012aSMichal Hocko v[i] = global_zone_page_state(i); 167679da826aSMichael Rubin v += NR_VM_ZONE_STAT_ITEMS; 167779da826aSMichael Rubin 16783a321d2aSKemi Wang #ifdef CONFIG_NUMA 16793a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 16803a321d2aSKemi Wang v[i] = global_numa_state(i); 16813a321d2aSKemi Wang v += NR_VM_NUMA_STAT_ITEMS; 16823a321d2aSKemi Wang #endif 16833a321d2aSKemi Wang 168475ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 168575ef7184SMel Gorman v[i] = global_node_page_state(i); 168675ef7184SMel Gorman v += NR_VM_NODE_STAT_ITEMS; 168775ef7184SMel Gorman 168879da826aSMichael Rubin global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, 168979da826aSMichael Rubin v + NR_DIRTY_THRESHOLD); 169079da826aSMichael Rubin v += NR_VM_WRITEBACK_STAT_ITEMS; 169179da826aSMichael Rubin 1692f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS 169379da826aSMichael Rubin all_vm_events(v); 169479da826aSMichael Rubin v[PGPGIN] /= 2; /* sectors -> kbytes */ 169579da826aSMichael Rubin v[PGPGOUT] /= 2; 1696f8891e5eSChristoph Lameter #endif 1697ff8b16d7SWu Fengguang return (unsigned long *)m->private + *pos; 1698f6ac2354SChristoph Lameter } 1699f6ac2354SChristoph Lameter 1700f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 1701f6ac2354SChristoph Lameter { 1702f6ac2354SChristoph Lameter (*pos)++; 1703f6ac2354SChristoph Lameter if (*pos >= ARRAY_SIZE(vmstat_text)) 1704f6ac2354SChristoph Lameter return NULL; 1705f6ac2354SChristoph Lameter return (unsigned long *)m->private + *pos; 1706f6ac2354SChristoph Lameter } 1707f6ac2354SChristoph Lameter 1708f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg) 1709f6ac2354SChristoph Lameter { 1710f6ac2354SChristoph Lameter unsigned long *l = arg; 1711f6ac2354SChristoph Lameter unsigned long off = l - (unsigned long *)m->private; 171268ba0326SAlexey Dobriyan 171368ba0326SAlexey Dobriyan seq_puts(m, vmstat_text[off]); 171475ba1d07SJoe Perches seq_put_decimal_ull(m, " ", *l); 171568ba0326SAlexey Dobriyan seq_putc(m, '\n'); 1716f6ac2354SChristoph Lameter return 0; 1717f6ac2354SChristoph Lameter } 1718f6ac2354SChristoph Lameter 1719f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg) 1720f6ac2354SChristoph Lameter { 1721f6ac2354SChristoph Lameter kfree(m->private); 1722f6ac2354SChristoph Lameter m->private = NULL; 1723f6ac2354SChristoph Lameter } 1724f6ac2354SChristoph Lameter 1725b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = { 1726f6ac2354SChristoph Lameter .start = vmstat_start, 1727f6ac2354SChristoph Lameter .next = vmstat_next, 1728f6ac2354SChristoph Lameter .stop = vmstat_stop, 1729f6ac2354SChristoph Lameter .show = vmstat_show, 1730f6ac2354SChristoph Lameter }; 1731f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */ 1732f6ac2354SChristoph Lameter 1733df9ecabaSChristoph Lameter #ifdef CONFIG_SMP 1734d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 173577461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ; 1736d1187ed2SChristoph Lameter 173752b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS 173852b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work) 173952b6f46bSHugh Dickins { 174052b6f46bSHugh Dickins refresh_cpu_vm_stats(true); 174152b6f46bSHugh Dickins } 174252b6f46bSHugh Dickins 174352b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write, 174452b6f46bSHugh Dickins void __user *buffer, size_t *lenp, loff_t *ppos) 174552b6f46bSHugh Dickins { 174652b6f46bSHugh Dickins long val; 174752b6f46bSHugh Dickins int err; 174852b6f46bSHugh Dickins int i; 174952b6f46bSHugh Dickins 175052b6f46bSHugh Dickins /* 175152b6f46bSHugh Dickins * The regular update, every sysctl_stat_interval, may come later 175252b6f46bSHugh Dickins * than expected: leaving a significant amount in per_cpu buckets. 175352b6f46bSHugh Dickins * This is particularly misleading when checking a quantity of HUGE 175452b6f46bSHugh Dickins * pages, immediately after running a test. /proc/sys/vm/stat_refresh, 175552b6f46bSHugh Dickins * which can equally be echo'ed to or cat'ted from (by root), 175652b6f46bSHugh Dickins * can be used to update the stats just before reading them. 175752b6f46bSHugh Dickins * 1758c41f012aSMichal Hocko * Oh, and since global_zone_page_state() etc. are so careful to hide 175952b6f46bSHugh Dickins * transiently negative values, report an error here if any of 176052b6f46bSHugh Dickins * the stats is negative, so we know to go looking for imbalance. 176152b6f46bSHugh Dickins */ 176252b6f46bSHugh Dickins err = schedule_on_each_cpu(refresh_vm_stats); 176352b6f46bSHugh Dickins if (err) 176452b6f46bSHugh Dickins return err; 176552b6f46bSHugh Dickins for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 176675ef7184SMel Gorman val = atomic_long_read(&vm_zone_stat[i]); 176752b6f46bSHugh Dickins if (val < 0) { 176852b6f46bSHugh Dickins pr_warn("%s: %s %ld\n", 176952b6f46bSHugh Dickins __func__, vmstat_text[i], val); 177052b6f46bSHugh Dickins err = -EINVAL; 177152b6f46bSHugh Dickins } 177252b6f46bSHugh Dickins } 17733a321d2aSKemi Wang #ifdef CONFIG_NUMA 17743a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { 17753a321d2aSKemi Wang val = atomic_long_read(&vm_numa_stat[i]); 17763a321d2aSKemi Wang if (val < 0) { 17773a321d2aSKemi Wang pr_warn("%s: %s %ld\n", 17783a321d2aSKemi Wang __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val); 17793a321d2aSKemi Wang err = -EINVAL; 17803a321d2aSKemi Wang } 17813a321d2aSKemi Wang } 17823a321d2aSKemi Wang #endif 178352b6f46bSHugh Dickins if (err) 178452b6f46bSHugh Dickins return err; 178552b6f46bSHugh Dickins if (write) 178652b6f46bSHugh Dickins *ppos += *lenp; 178752b6f46bSHugh Dickins else 178852b6f46bSHugh Dickins *lenp = 0; 178952b6f46bSHugh Dickins return 0; 179052b6f46bSHugh Dickins } 179152b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */ 179252b6f46bSHugh Dickins 1793d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w) 1794d1187ed2SChristoph Lameter { 17950eb77e98SChristoph Lameter if (refresh_cpu_vm_stats(true)) { 17967cc36bbdSChristoph Lameter /* 17977cc36bbdSChristoph Lameter * Counters were updated so we expect more updates 17987cc36bbdSChristoph Lameter * to occur in the future. Keep on running the 17997cc36bbdSChristoph Lameter * update worker thread. 18007cc36bbdSChristoph Lameter */ 1801ce612879SMichal Hocko queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, 1802176bed1dSLinus Torvalds this_cpu_ptr(&vmstat_work), 180398f4ebb2SAnton Blanchard round_jiffies_relative(sysctl_stat_interval)); 1804f01f17d3SMichal Hocko } 1805d1187ed2SChristoph Lameter } 1806d1187ed2SChristoph Lameter 18077cc36bbdSChristoph Lameter /* 18080eb77e98SChristoph Lameter * Switch off vmstat processing and then fold all the remaining differentials 18090eb77e98SChristoph Lameter * until the diffs stay at zero. The function is used by NOHZ and can only be 18100eb77e98SChristoph Lameter * invoked when tick processing is not active. 18110eb77e98SChristoph Lameter */ 18120eb77e98SChristoph Lameter /* 18137cc36bbdSChristoph Lameter * Check if the diffs for a certain cpu indicate that 18147cc36bbdSChristoph Lameter * an update is needed. 18157cc36bbdSChristoph Lameter */ 18167cc36bbdSChristoph Lameter static bool need_update(int cpu) 1817d1187ed2SChristoph Lameter { 18187cc36bbdSChristoph Lameter struct zone *zone; 1819d1187ed2SChristoph Lameter 18207cc36bbdSChristoph Lameter for_each_populated_zone(zone) { 18217cc36bbdSChristoph Lameter struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); 18227cc36bbdSChristoph Lameter 18237cc36bbdSChristoph Lameter BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); 18243a321d2aSKemi Wang #ifdef CONFIG_NUMA 18251d90ca89SKemi Wang BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2); 18263a321d2aSKemi Wang #endif 182763803222SKemi Wang 18287cc36bbdSChristoph Lameter /* 18297cc36bbdSChristoph Lameter * The fast way of checking if there are any vmstat diffs. 18307cc36bbdSChristoph Lameter */ 183113c9aaf7SJanne Huttunen if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * 183213c9aaf7SJanne Huttunen sizeof(p->vm_stat_diff[0]))) 18337cc36bbdSChristoph Lameter return true; 18343a321d2aSKemi Wang #ifdef CONFIG_NUMA 183513c9aaf7SJanne Huttunen if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * 183613c9aaf7SJanne Huttunen sizeof(p->vm_numa_stat_diff[0]))) 18373a321d2aSKemi Wang return true; 18383a321d2aSKemi Wang #endif 18397cc36bbdSChristoph Lameter } 18407cc36bbdSChristoph Lameter return false; 18417cc36bbdSChristoph Lameter } 18427cc36bbdSChristoph Lameter 18437b8da4c7SChristoph Lameter /* 18447b8da4c7SChristoph Lameter * Switch off vmstat processing and then fold all the remaining differentials 18457b8da4c7SChristoph Lameter * until the diffs stay at zero. The function is used by NOHZ and can only be 18467b8da4c7SChristoph Lameter * invoked when tick processing is not active. 18477b8da4c7SChristoph Lameter */ 1848f01f17d3SMichal Hocko void quiet_vmstat(void) 1849f01f17d3SMichal Hocko { 1850f01f17d3SMichal Hocko if (system_state != SYSTEM_RUNNING) 1851f01f17d3SMichal Hocko return; 1852f01f17d3SMichal Hocko 18537b8da4c7SChristoph Lameter if (!delayed_work_pending(this_cpu_ptr(&vmstat_work))) 1854f01f17d3SMichal Hocko return; 1855f01f17d3SMichal Hocko 1856f01f17d3SMichal Hocko if (!need_update(smp_processor_id())) 1857f01f17d3SMichal Hocko return; 1858f01f17d3SMichal Hocko 1859f01f17d3SMichal Hocko /* 1860f01f17d3SMichal Hocko * Just refresh counters and do not care about the pending delayed 1861f01f17d3SMichal Hocko * vmstat_update. It doesn't fire that often to matter and canceling 1862f01f17d3SMichal Hocko * it would be too expensive from this path. 1863f01f17d3SMichal Hocko * vmstat_shepherd will take care about that for us. 1864f01f17d3SMichal Hocko */ 1865f01f17d3SMichal Hocko refresh_cpu_vm_stats(false); 1866f01f17d3SMichal Hocko } 1867f01f17d3SMichal Hocko 18687cc36bbdSChristoph Lameter /* 18697cc36bbdSChristoph Lameter * Shepherd worker thread that checks the 18707cc36bbdSChristoph Lameter * differentials of processors that have their worker 18717cc36bbdSChristoph Lameter * threads for vm statistics updates disabled because of 18727cc36bbdSChristoph Lameter * inactivity. 18737cc36bbdSChristoph Lameter */ 18747cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w); 18757cc36bbdSChristoph Lameter 18760eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd); 18777cc36bbdSChristoph Lameter 18787cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w) 18797cc36bbdSChristoph Lameter { 18807cc36bbdSChristoph Lameter int cpu; 18817cc36bbdSChristoph Lameter 18827cc36bbdSChristoph Lameter get_online_cpus(); 18837cc36bbdSChristoph Lameter /* Check processors whose vmstat worker threads have been disabled */ 18847b8da4c7SChristoph Lameter for_each_online_cpu(cpu) { 1885f01f17d3SMichal Hocko struct delayed_work *dw = &per_cpu(vmstat_work, cpu); 18867cc36bbdSChristoph Lameter 18877b8da4c7SChristoph Lameter if (!delayed_work_pending(dw) && need_update(cpu)) 1888ce612879SMichal Hocko queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); 1889f01f17d3SMichal Hocko } 18907cc36bbdSChristoph Lameter put_online_cpus(); 18917cc36bbdSChristoph Lameter 18927cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 18937cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 18947cc36bbdSChristoph Lameter } 18957cc36bbdSChristoph Lameter 18967cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void) 18977cc36bbdSChristoph Lameter { 18987cc36bbdSChristoph Lameter int cpu; 18997cc36bbdSChristoph Lameter 19007cc36bbdSChristoph Lameter for_each_possible_cpu(cpu) 1901ccde8bd4SMichal Hocko INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), 19027cc36bbdSChristoph Lameter vmstat_update); 19037cc36bbdSChristoph Lameter 19047cc36bbdSChristoph Lameter schedule_delayed_work(&shepherd, 19057cc36bbdSChristoph Lameter round_jiffies_relative(sysctl_stat_interval)); 1906d1187ed2SChristoph Lameter } 1907d1187ed2SChristoph Lameter 190803e86dbaSTim Chen static void __init init_cpu_node_state(void) 190903e86dbaSTim Chen { 19104c501327SSebastian Andrzej Siewior int node; 191103e86dbaSTim Chen 19124c501327SSebastian Andrzej Siewior for_each_online_node(node) { 19134c501327SSebastian Andrzej Siewior if (cpumask_weight(cpumask_of_node(node)) > 0) 19144c501327SSebastian Andrzej Siewior node_set_state(node, N_CPU); 19154c501327SSebastian Andrzej Siewior } 191603e86dbaSTim Chen } 191703e86dbaSTim Chen 19185438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu) 1919807a1bd2SToshi Kani { 19205ee28a44SKAMEZAWA Hiroyuki refresh_zone_stat_thresholds(); 1921ad596925SChristoph Lameter node_set_state(cpu_to_node(cpu), N_CPU); 19225438da97SSebastian Andrzej Siewior return 0; 1923df9ecabaSChristoph Lameter } 1924df9ecabaSChristoph Lameter 19255438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu) 19265438da97SSebastian Andrzej Siewior { 19275438da97SSebastian Andrzej Siewior cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); 19285438da97SSebastian Andrzej Siewior return 0; 19295438da97SSebastian Andrzej Siewior } 19305438da97SSebastian Andrzej Siewior 19315438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu) 19325438da97SSebastian Andrzej Siewior { 19335438da97SSebastian Andrzej Siewior const struct cpumask *node_cpus; 19345438da97SSebastian Andrzej Siewior int node; 19355438da97SSebastian Andrzej Siewior 19365438da97SSebastian Andrzej Siewior node = cpu_to_node(cpu); 19375438da97SSebastian Andrzej Siewior 19385438da97SSebastian Andrzej Siewior refresh_zone_stat_thresholds(); 19395438da97SSebastian Andrzej Siewior node_cpus = cpumask_of_node(node); 19405438da97SSebastian Andrzej Siewior if (cpumask_weight(node_cpus) > 0) 19415438da97SSebastian Andrzej Siewior return 0; 19425438da97SSebastian Andrzej Siewior 19435438da97SSebastian Andrzej Siewior node_clear_state(node, N_CPU); 19445438da97SSebastian Andrzej Siewior return 0; 19455438da97SSebastian Andrzej Siewior } 19465438da97SSebastian Andrzej Siewior 19478f32f7e5SAlexey Dobriyan #endif 1948df9ecabaSChristoph Lameter 1949ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq; 1950ce612879SMichal Hocko 1951597b7305SMichal Hocko void __init init_mm_internals(void) 1952df9ecabaSChristoph Lameter { 1953ce612879SMichal Hocko int ret __maybe_unused; 19545438da97SSebastian Andrzej Siewior 195580d136e1SMichal Hocko mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); 1956ce612879SMichal Hocko 1957ce612879SMichal Hocko #ifdef CONFIG_SMP 19585438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", 19595438da97SSebastian Andrzej Siewior NULL, vmstat_cpu_dead); 19605438da97SSebastian Andrzej Siewior if (ret < 0) 19615438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'dead' hotplug state\n"); 19625438da97SSebastian Andrzej Siewior 19635438da97SSebastian Andrzej Siewior ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online", 19645438da97SSebastian Andrzej Siewior vmstat_cpu_online, 19655438da97SSebastian Andrzej Siewior vmstat_cpu_down_prep); 19665438da97SSebastian Andrzej Siewior if (ret < 0) 19675438da97SSebastian Andrzej Siewior pr_err("vmstat: failed to register 'online' hotplug state\n"); 19685438da97SSebastian Andrzej Siewior 19695438da97SSebastian Andrzej Siewior get_online_cpus(); 197003e86dbaSTim Chen init_cpu_node_state(); 19715438da97SSebastian Andrzej Siewior put_online_cpus(); 1972d1187ed2SChristoph Lameter 19737cc36bbdSChristoph Lameter start_shepherd_timer(); 19748f32f7e5SAlexey Dobriyan #endif 19758f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS 1976fddda2b7SChristoph Hellwig proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); 1977fddda2b7SChristoph Hellwig proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op); 1978fddda2b7SChristoph Hellwig proc_create_seq("vmstat", 0444, NULL, &vmstat_op); 1979fddda2b7SChristoph Hellwig proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); 19808f32f7e5SAlexey Dobriyan #endif 1981df9ecabaSChristoph Lameter } 1982d7a5752cSMel Gorman 1983d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 1984d7a5752cSMel Gorman 1985d7a5752cSMel Gorman /* 1986d7a5752cSMel Gorman * Return an index indicating how much of the available free memory is 1987d7a5752cSMel Gorman * unusable for an allocation of the requested size. 1988d7a5752cSMel Gorman */ 1989d7a5752cSMel Gorman static int unusable_free_index(unsigned int order, 1990d7a5752cSMel Gorman struct contig_page_info *info) 1991d7a5752cSMel Gorman { 1992d7a5752cSMel Gorman /* No free memory is interpreted as all free memory is unusable */ 1993d7a5752cSMel Gorman if (info->free_pages == 0) 1994d7a5752cSMel Gorman return 1000; 1995d7a5752cSMel Gorman 1996d7a5752cSMel Gorman /* 1997d7a5752cSMel Gorman * Index should be a value between 0 and 1. Return a value to 3 1998d7a5752cSMel Gorman * decimal places. 1999d7a5752cSMel Gorman * 2000d7a5752cSMel Gorman * 0 => no fragmentation 2001d7a5752cSMel Gorman * 1 => high fragmentation 2002d7a5752cSMel Gorman */ 2003d7a5752cSMel Gorman return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 2004d7a5752cSMel Gorman 2005d7a5752cSMel Gorman } 2006d7a5752cSMel Gorman 2007d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m, 2008d7a5752cSMel Gorman pg_data_t *pgdat, struct zone *zone) 2009d7a5752cSMel Gorman { 2010d7a5752cSMel Gorman unsigned int order; 2011d7a5752cSMel Gorman int index; 2012d7a5752cSMel Gorman struct contig_page_info info; 2013d7a5752cSMel Gorman 2014d7a5752cSMel Gorman seq_printf(m, "Node %d, zone %8s ", 2015d7a5752cSMel Gorman pgdat->node_id, 2016d7a5752cSMel Gorman zone->name); 2017d7a5752cSMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2018d7a5752cSMel Gorman fill_contig_page_info(zone, order, &info); 2019d7a5752cSMel Gorman index = unusable_free_index(order, &info); 2020d7a5752cSMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2021d7a5752cSMel Gorman } 2022d7a5752cSMel Gorman 2023d7a5752cSMel Gorman seq_putc(m, '\n'); 2024d7a5752cSMel Gorman } 2025d7a5752cSMel Gorman 2026d7a5752cSMel Gorman /* 2027d7a5752cSMel Gorman * Display unusable free space index 2028d7a5752cSMel Gorman * 2029d7a5752cSMel Gorman * The unusable free space index measures how much of the available free 2030d7a5752cSMel Gorman * memory cannot be used to satisfy an allocation of a given size and is a 2031d7a5752cSMel Gorman * value between 0 and 1. The higher the value, the more of free memory is 2032d7a5752cSMel Gorman * unusable and by implication, the worse the external fragmentation is. This 2033d7a5752cSMel Gorman * can be expressed as a percentage by multiplying by 100. 2034d7a5752cSMel Gorman */ 2035d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg) 2036d7a5752cSMel Gorman { 2037d7a5752cSMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2038d7a5752cSMel Gorman 2039d7a5752cSMel Gorman /* check memoryless node */ 2040a47b53c5SLai Jiangshan if (!node_state(pgdat->node_id, N_MEMORY)) 2041d7a5752cSMel Gorman return 0; 2042d7a5752cSMel Gorman 2043727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, unusable_show_print); 2044d7a5752cSMel Gorman 2045d7a5752cSMel Gorman return 0; 2046d7a5752cSMel Gorman } 2047d7a5752cSMel Gorman 2048d7a5752cSMel Gorman static const struct seq_operations unusable_op = { 2049d7a5752cSMel Gorman .start = frag_start, 2050d7a5752cSMel Gorman .next = frag_next, 2051d7a5752cSMel Gorman .stop = frag_stop, 2052d7a5752cSMel Gorman .show = unusable_show, 2053d7a5752cSMel Gorman }; 2054d7a5752cSMel Gorman 2055d7a5752cSMel Gorman static int unusable_open(struct inode *inode, struct file *file) 2056d7a5752cSMel Gorman { 2057d7a5752cSMel Gorman return seq_open(file, &unusable_op); 2058d7a5752cSMel Gorman } 2059d7a5752cSMel Gorman 2060d7a5752cSMel Gorman static const struct file_operations unusable_file_ops = { 2061d7a5752cSMel Gorman .open = unusable_open, 2062d7a5752cSMel Gorman .read = seq_read, 2063d7a5752cSMel Gorman .llseek = seq_lseek, 2064d7a5752cSMel Gorman .release = seq_release, 2065d7a5752cSMel Gorman }; 2066d7a5752cSMel Gorman 2067f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m, 2068f1a5ab12SMel Gorman pg_data_t *pgdat, struct zone *zone) 2069f1a5ab12SMel Gorman { 2070f1a5ab12SMel Gorman unsigned int order; 2071f1a5ab12SMel Gorman int index; 2072f1a5ab12SMel Gorman 2073f1a5ab12SMel Gorman /* Alloc on stack as interrupts are disabled for zone walk */ 2074f1a5ab12SMel Gorman struct contig_page_info info; 2075f1a5ab12SMel Gorman 2076f1a5ab12SMel Gorman seq_printf(m, "Node %d, zone %8s ", 2077f1a5ab12SMel Gorman pgdat->node_id, 2078f1a5ab12SMel Gorman zone->name); 2079f1a5ab12SMel Gorman for (order = 0; order < MAX_ORDER; ++order) { 2080f1a5ab12SMel Gorman fill_contig_page_info(zone, order, &info); 208156de7263SMel Gorman index = __fragmentation_index(order, &info); 2082f1a5ab12SMel Gorman seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2083f1a5ab12SMel Gorman } 2084f1a5ab12SMel Gorman 2085f1a5ab12SMel Gorman seq_putc(m, '\n'); 2086f1a5ab12SMel Gorman } 2087f1a5ab12SMel Gorman 2088f1a5ab12SMel Gorman /* 2089f1a5ab12SMel Gorman * Display fragmentation index for orders that allocations would fail for 2090f1a5ab12SMel Gorman */ 2091f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg) 2092f1a5ab12SMel Gorman { 2093f1a5ab12SMel Gorman pg_data_t *pgdat = (pg_data_t *)arg; 2094f1a5ab12SMel Gorman 2095727c080fSVinayak Menon walk_zones_in_node(m, pgdat, true, false, extfrag_show_print); 2096f1a5ab12SMel Gorman 2097f1a5ab12SMel Gorman return 0; 2098f1a5ab12SMel Gorman } 2099f1a5ab12SMel Gorman 2100f1a5ab12SMel Gorman static const struct seq_operations extfrag_op = { 2101f1a5ab12SMel Gorman .start = frag_start, 2102f1a5ab12SMel Gorman .next = frag_next, 2103f1a5ab12SMel Gorman .stop = frag_stop, 2104f1a5ab12SMel Gorman .show = extfrag_show, 2105f1a5ab12SMel Gorman }; 2106f1a5ab12SMel Gorman 2107f1a5ab12SMel Gorman static int extfrag_open(struct inode *inode, struct file *file) 2108f1a5ab12SMel Gorman { 2109f1a5ab12SMel Gorman return seq_open(file, &extfrag_op); 2110f1a5ab12SMel Gorman } 2111f1a5ab12SMel Gorman 2112f1a5ab12SMel Gorman static const struct file_operations extfrag_file_ops = { 2113f1a5ab12SMel Gorman .open = extfrag_open, 2114f1a5ab12SMel Gorman .read = seq_read, 2115f1a5ab12SMel Gorman .llseek = seq_lseek, 2116f1a5ab12SMel Gorman .release = seq_release, 2117f1a5ab12SMel Gorman }; 2118f1a5ab12SMel Gorman 2119d7a5752cSMel Gorman static int __init extfrag_debug_init(void) 2120d7a5752cSMel Gorman { 2121bde8bd8aSSasikantha babu struct dentry *extfrag_debug_root; 2122bde8bd8aSSasikantha babu 2123d7a5752cSMel Gorman extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 2124d7a5752cSMel Gorman if (!extfrag_debug_root) 2125d7a5752cSMel Gorman return -ENOMEM; 2126d7a5752cSMel Gorman 2127d7a5752cSMel Gorman if (!debugfs_create_file("unusable_index", 0444, 2128d7a5752cSMel Gorman extfrag_debug_root, NULL, &unusable_file_ops)) 2129bde8bd8aSSasikantha babu goto fail; 2130d7a5752cSMel Gorman 2131f1a5ab12SMel Gorman if (!debugfs_create_file("extfrag_index", 0444, 2132f1a5ab12SMel Gorman extfrag_debug_root, NULL, &extfrag_file_ops)) 2133bde8bd8aSSasikantha babu goto fail; 2134f1a5ab12SMel Gorman 2135d7a5752cSMel Gorman return 0; 2136bde8bd8aSSasikantha babu fail: 2137bde8bd8aSSasikantha babu debugfs_remove_recursive(extfrag_debug_root); 2138bde8bd8aSSasikantha babu return -ENOMEM; 2139d7a5752cSMel Gorman } 2140d7a5752cSMel Gorman 2141d7a5752cSMel Gorman module_init(extfrag_debug_init); 2142d7a5752cSMel Gorman #endif 2143