xref: /linux/mm/vmstat.c (revision 9705bea5f833f4fc21d5bef5fce7348427f76ea4)
1f6ac2354SChristoph Lameter /*
2f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
3f6ac2354SChristoph Lameter  *
4f6ac2354SChristoph Lameter  *  Manages VM statistics
5f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
62244b95aSChristoph Lameter  *
72244b95aSChristoph Lameter  *  zoned VM statistics
82244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
92244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
107cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
11f6ac2354SChristoph Lameter  */
128f32f7e5SAlexey Dobriyan #include <linux/fs.h>
13f6ac2354SChristoph Lameter #include <linux/mm.h>
144e950f6fSAlexey Dobriyan #include <linux/err.h>
152244b95aSChristoph Lameter #include <linux/module.h>
165a0e3ad6STejun Heo #include <linux/slab.h>
17df9ecabaSChristoph Lameter #include <linux/cpu.h>
187cc36bbdSChristoph Lameter #include <linux/cpumask.h>
19c748e134SAdrian Bunk #include <linux/vmstat.h>
203c486871SAndrew Morton #include <linux/proc_fs.h>
213c486871SAndrew Morton #include <linux/seq_file.h>
223c486871SAndrew Morton #include <linux/debugfs.h>
23e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
24f1a5ab12SMel Gorman #include <linux/math64.h>
2579da826aSMichael Rubin #include <linux/writeback.h>
2636deb0beSNamhyung Kim #include <linux/compaction.h>
276e543d57SLisa Du #include <linux/mm_inline.h>
2848c96a36SJoonsoo Kim #include <linux/page_ext.h>
2948c96a36SJoonsoo Kim #include <linux/page_owner.h>
306e543d57SLisa Du 
316e543d57SLisa Du #include "internal.h"
32f6ac2354SChristoph Lameter 
331d90ca89SKemi Wang #define NUMA_STATS_THRESHOLD (U16_MAX - 2)
341d90ca89SKemi Wang 
354518085eSKemi Wang #ifdef CONFIG_NUMA
364518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
374518085eSKemi Wang 
384518085eSKemi Wang /* zero numa counters within a zone */
394518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone)
404518085eSKemi Wang {
414518085eSKemi Wang 	int item, cpu;
424518085eSKemi Wang 
434518085eSKemi Wang 	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
444518085eSKemi Wang 		atomic_long_set(&zone->vm_numa_stat[item], 0);
454518085eSKemi Wang 		for_each_online_cpu(cpu)
464518085eSKemi Wang 			per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
474518085eSKemi Wang 						= 0;
484518085eSKemi Wang 	}
494518085eSKemi Wang }
504518085eSKemi Wang 
514518085eSKemi Wang /* zero numa counters of all the populated zones */
524518085eSKemi Wang static void zero_zones_numa_counters(void)
534518085eSKemi Wang {
544518085eSKemi Wang 	struct zone *zone;
554518085eSKemi Wang 
564518085eSKemi Wang 	for_each_populated_zone(zone)
574518085eSKemi Wang 		zero_zone_numa_counters(zone);
584518085eSKemi Wang }
594518085eSKemi Wang 
604518085eSKemi Wang /* zero global numa counters */
614518085eSKemi Wang static void zero_global_numa_counters(void)
624518085eSKemi Wang {
634518085eSKemi Wang 	int item;
644518085eSKemi Wang 
654518085eSKemi Wang 	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
664518085eSKemi Wang 		atomic_long_set(&vm_numa_stat[item], 0);
674518085eSKemi Wang }
684518085eSKemi Wang 
694518085eSKemi Wang static void invalid_numa_statistics(void)
704518085eSKemi Wang {
714518085eSKemi Wang 	zero_zones_numa_counters();
724518085eSKemi Wang 	zero_global_numa_counters();
734518085eSKemi Wang }
744518085eSKemi Wang 
754518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock);
764518085eSKemi Wang 
774518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
784518085eSKemi Wang 		void __user *buffer, size_t *length, loff_t *ppos)
794518085eSKemi Wang {
804518085eSKemi Wang 	int ret, oldval;
814518085eSKemi Wang 
824518085eSKemi Wang 	mutex_lock(&vm_numa_stat_lock);
834518085eSKemi Wang 	if (write)
844518085eSKemi Wang 		oldval = sysctl_vm_numa_stat;
854518085eSKemi Wang 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
864518085eSKemi Wang 	if (ret || !write)
874518085eSKemi Wang 		goto out;
884518085eSKemi Wang 
894518085eSKemi Wang 	if (oldval == sysctl_vm_numa_stat)
904518085eSKemi Wang 		goto out;
914518085eSKemi Wang 	else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
924518085eSKemi Wang 		static_branch_enable(&vm_numa_stat_key);
934518085eSKemi Wang 		pr_info("enable numa statistics\n");
944518085eSKemi Wang 	} else {
954518085eSKemi Wang 		static_branch_disable(&vm_numa_stat_key);
964518085eSKemi Wang 		invalid_numa_statistics();
974518085eSKemi Wang 		pr_info("disable numa statistics, and clear numa counters\n");
984518085eSKemi Wang 	}
994518085eSKemi Wang 
1004518085eSKemi Wang out:
1014518085eSKemi Wang 	mutex_unlock(&vm_numa_stat_lock);
1024518085eSKemi Wang 	return ret;
1034518085eSKemi Wang }
1044518085eSKemi Wang #endif
1054518085eSKemi Wang 
106f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
107f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
109f8891e5eSChristoph Lameter 
11031f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
111f8891e5eSChristoph Lameter {
1129eccf2a8SChristoph Lameter 	int cpu;
113f8891e5eSChristoph Lameter 	int i;
114f8891e5eSChristoph Lameter 
115f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116f8891e5eSChristoph Lameter 
11731f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
118f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119f8891e5eSChristoph Lameter 
120f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
122f8891e5eSChristoph Lameter 	}
123f8891e5eSChristoph Lameter }
124f8891e5eSChristoph Lameter 
125f8891e5eSChristoph Lameter /*
126f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
127f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
128f8891e5eSChristoph Lameter  * during and after execution of this function.
129f8891e5eSChristoph Lameter */
130f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
131f8891e5eSChristoph Lameter {
132b5be1132SKOSAKI Motohiro 	get_online_cpus();
13331f961a8SMinchan Kim 	sum_vm_events(ret);
134b5be1132SKOSAKI Motohiro 	put_online_cpus();
135f8891e5eSChristoph Lameter }
13632dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
137f8891e5eSChristoph Lameter 
138f8891e5eSChristoph Lameter /*
139f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
140f8891e5eSChristoph Lameter  *
141f8891e5eSChristoph Lameter  * This is adding to the events on one processor
142f8891e5eSChristoph Lameter  * but keeps the global counts constant.
143f8891e5eSChristoph Lameter  */
144f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
145f8891e5eSChristoph Lameter {
146f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147f8891e5eSChristoph Lameter 	int i;
148f8891e5eSChristoph Lameter 
149f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
151f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
152f8891e5eSChristoph Lameter 	}
153f8891e5eSChristoph Lameter }
154f8891e5eSChristoph Lameter 
155f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
156f8891e5eSChristoph Lameter 
1572244b95aSChristoph Lameter /*
1582244b95aSChristoph Lameter  * Manage combined zone based / global counters
1592244b95aSChristoph Lameter  *
1602244b95aSChristoph Lameter  * vm_stat contains the global counters
1612244b95aSChristoph Lameter  */
16275ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
1633a321d2aSKemi Wang atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
16475ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
16575ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat);
1663a321d2aSKemi Wang EXPORT_SYMBOL(vm_numa_stat);
16775ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat);
1682244b95aSChristoph Lameter 
1692244b95aSChristoph Lameter #ifdef CONFIG_SMP
1702244b95aSChristoph Lameter 
171b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
17288f5acf8SMel Gorman {
17388f5acf8SMel Gorman 	int threshold;
17488f5acf8SMel Gorman 	int watermark_distance;
17588f5acf8SMel Gorman 
17688f5acf8SMel Gorman 	/*
17788f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
17888f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
17988f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
18088f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
18188f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
18288f5acf8SMel Gorman 	 * the min watermark
18388f5acf8SMel Gorman 	 */
18488f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
18588f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
18688f5acf8SMel Gorman 
18788f5acf8SMel Gorman 	/*
18888f5acf8SMel Gorman 	 * Maximum threshold is 125
18988f5acf8SMel Gorman 	 */
19088f5acf8SMel Gorman 	threshold = min(125, threshold);
19188f5acf8SMel Gorman 
19288f5acf8SMel Gorman 	return threshold;
19388f5acf8SMel Gorman }
19488f5acf8SMel Gorman 
195b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
196df9ecabaSChristoph Lameter {
197df9ecabaSChristoph Lameter 	int threshold;
198df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
1992244b95aSChristoph Lameter 
2002244b95aSChristoph Lameter 	/*
201df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
202df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
203df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
204df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
2052244b95aSChristoph Lameter 	 *
206df9ecabaSChristoph Lameter 	 * Some sample thresholds:
207df9ecabaSChristoph Lameter 	 *
208df9ecabaSChristoph Lameter 	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
209df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
210df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
211df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
212df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
213df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
214df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
215df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
216df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
217df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
218df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
219df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
220df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
221df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
222df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
223df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
224df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
225df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
226df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
227df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
2282244b95aSChristoph Lameter 	 */
229df9ecabaSChristoph Lameter 
230*9705bea5SArun KS 	mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
231df9ecabaSChristoph Lameter 
232df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
233df9ecabaSChristoph Lameter 
234df9ecabaSChristoph Lameter 	/*
235df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
236df9ecabaSChristoph Lameter 	 */
237df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
238df9ecabaSChristoph Lameter 
239df9ecabaSChristoph Lameter 	return threshold;
240df9ecabaSChristoph Lameter }
241df9ecabaSChristoph Lameter 
242df9ecabaSChristoph Lameter /*
243df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
244df9ecabaSChristoph Lameter  */
245a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
2462244b95aSChristoph Lameter {
24775ef7184SMel Gorman 	struct pglist_data *pgdat;
248df9ecabaSChristoph Lameter 	struct zone *zone;
249df9ecabaSChristoph Lameter 	int cpu;
250df9ecabaSChristoph Lameter 	int threshold;
251df9ecabaSChristoph Lameter 
25275ef7184SMel Gorman 	/* Zero current pgdat thresholds */
25375ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
25475ef7184SMel Gorman 		for_each_online_cpu(cpu) {
25575ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
25675ef7184SMel Gorman 		}
25775ef7184SMel Gorman 	}
25875ef7184SMel Gorman 
259ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
26075ef7184SMel Gorman 		struct pglist_data *pgdat = zone->zone_pgdat;
261aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
262aa454840SChristoph Lameter 
263b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
264df9ecabaSChristoph Lameter 
26575ef7184SMel Gorman 		for_each_online_cpu(cpu) {
26675ef7184SMel Gorman 			int pgdat_threshold;
26775ef7184SMel Gorman 
26899dcc3e5SChristoph Lameter 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
26999dcc3e5SChristoph Lameter 							= threshold;
2701d90ca89SKemi Wang 
27175ef7184SMel Gorman 			/* Base nodestat threshold on the largest populated zone. */
27275ef7184SMel Gorman 			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
27375ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
27475ef7184SMel Gorman 				= max(threshold, pgdat_threshold);
27575ef7184SMel Gorman 		}
27675ef7184SMel Gorman 
277aa454840SChristoph Lameter 		/*
278aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
279aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
280aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
281aa454840SChristoph Lameter 		 */
282aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
283aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
284aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
285aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
286aa454840SChristoph Lameter 					max_drift;
287df9ecabaSChristoph Lameter 	}
2882244b95aSChristoph Lameter }
2892244b95aSChristoph Lameter 
290b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
291b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
29288f5acf8SMel Gorman {
29388f5acf8SMel Gorman 	struct zone *zone;
29488f5acf8SMel Gorman 	int cpu;
29588f5acf8SMel Gorman 	int threshold;
29688f5acf8SMel Gorman 	int i;
29788f5acf8SMel Gorman 
29888f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
29988f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
30088f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
30188f5acf8SMel Gorman 			continue;
30288f5acf8SMel Gorman 
303b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
3041d90ca89SKemi Wang 		for_each_online_cpu(cpu)
30588f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
30688f5acf8SMel Gorman 							= threshold;
30788f5acf8SMel Gorman 	}
30888f5acf8SMel Gorman }
30988f5acf8SMel Gorman 
3102244b95aSChristoph Lameter /*
311bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
312bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
313bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
3142244b95aSChristoph Lameter  */
3152244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3166cdb18adSHeiko Carstens 			   long delta)
3172244b95aSChristoph Lameter {
31812938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
31912938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
3202244b95aSChristoph Lameter 	long x;
32112938a92SChristoph Lameter 	long t;
3222244b95aSChristoph Lameter 
32312938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
3242244b95aSChristoph Lameter 
32512938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
32612938a92SChristoph Lameter 
32712938a92SChristoph Lameter 	if (unlikely(x > t || x < -t)) {
3282244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
3292244b95aSChristoph Lameter 		x = 0;
3302244b95aSChristoph Lameter 	}
33112938a92SChristoph Lameter 	__this_cpu_write(*p, x);
3322244b95aSChristoph Lameter }
3332244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
3342244b95aSChristoph Lameter 
33575ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
33675ef7184SMel Gorman 				long delta)
33775ef7184SMel Gorman {
33875ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
33975ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
34075ef7184SMel Gorman 	long x;
34175ef7184SMel Gorman 	long t;
34275ef7184SMel Gorman 
34375ef7184SMel Gorman 	x = delta + __this_cpu_read(*p);
34475ef7184SMel Gorman 
34575ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
34675ef7184SMel Gorman 
34775ef7184SMel Gorman 	if (unlikely(x > t || x < -t)) {
34875ef7184SMel Gorman 		node_page_state_add(x, pgdat, item);
34975ef7184SMel Gorman 		x = 0;
35075ef7184SMel Gorman 	}
35175ef7184SMel Gorman 	__this_cpu_write(*p, x);
35275ef7184SMel Gorman }
35375ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state);
35475ef7184SMel Gorman 
3552244b95aSChristoph Lameter /*
3562244b95aSChristoph Lameter  * Optimized increment and decrement functions.
3572244b95aSChristoph Lameter  *
3582244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
3592244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
3602244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
3612244b95aSChristoph Lameter  *
3622244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
3632244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
3642244b95aSChristoph Lameter  * generate better code.
3652244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
3662244b95aSChristoph Lameter  * be omitted.
3672244b95aSChristoph Lameter  *
368df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
369df9ecabaSChristoph Lameter  * with care.
370df9ecabaSChristoph Lameter  *
3712244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
3722244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
3732244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
3742244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
3752244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
3762244b95aSChristoph Lameter  * in a useful way here.
3772244b95aSChristoph Lameter  */
378c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
3792244b95aSChristoph Lameter {
38012938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
38112938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
38212938a92SChristoph Lameter 	s8 v, t;
3832244b95aSChristoph Lameter 
384908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
38512938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
38612938a92SChristoph Lameter 	if (unlikely(v > t)) {
38712938a92SChristoph Lameter 		s8 overstep = t >> 1;
3882244b95aSChristoph Lameter 
38912938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
39012938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
3912244b95aSChristoph Lameter 	}
3922244b95aSChristoph Lameter }
393ca889e6cSChristoph Lameter 
39475ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
39575ef7184SMel Gorman {
39675ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
39775ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
39875ef7184SMel Gorman 	s8 v, t;
39975ef7184SMel Gorman 
40075ef7184SMel Gorman 	v = __this_cpu_inc_return(*p);
40175ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
40275ef7184SMel Gorman 	if (unlikely(v > t)) {
40375ef7184SMel Gorman 		s8 overstep = t >> 1;
40475ef7184SMel Gorman 
40575ef7184SMel Gorman 		node_page_state_add(v + overstep, pgdat, item);
40675ef7184SMel Gorman 		__this_cpu_write(*p, -overstep);
40775ef7184SMel Gorman 	}
40875ef7184SMel Gorman }
40975ef7184SMel Gorman 
410ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
411ca889e6cSChristoph Lameter {
412ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
413ca889e6cSChristoph Lameter }
4142244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
4152244b95aSChristoph Lameter 
41675ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item)
41775ef7184SMel Gorman {
41875ef7184SMel Gorman 	__inc_node_state(page_pgdat(page), item);
41975ef7184SMel Gorman }
42075ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state);
42175ef7184SMel Gorman 
422c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
4232244b95aSChristoph Lameter {
42412938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
42512938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
42612938a92SChristoph Lameter 	s8 v, t;
4272244b95aSChristoph Lameter 
428908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
42912938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
43012938a92SChristoph Lameter 	if (unlikely(v < - t)) {
43112938a92SChristoph Lameter 		s8 overstep = t >> 1;
4322244b95aSChristoph Lameter 
43312938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
43412938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
4352244b95aSChristoph Lameter 	}
4362244b95aSChristoph Lameter }
437c8785385SChristoph Lameter 
43875ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
43975ef7184SMel Gorman {
44075ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
44175ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
44275ef7184SMel Gorman 	s8 v, t;
44375ef7184SMel Gorman 
44475ef7184SMel Gorman 	v = __this_cpu_dec_return(*p);
44575ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
44675ef7184SMel Gorman 	if (unlikely(v < - t)) {
44775ef7184SMel Gorman 		s8 overstep = t >> 1;
44875ef7184SMel Gorman 
44975ef7184SMel Gorman 		node_page_state_add(v - overstep, pgdat, item);
45075ef7184SMel Gorman 		__this_cpu_write(*p, overstep);
45175ef7184SMel Gorman 	}
45275ef7184SMel Gorman }
45375ef7184SMel Gorman 
454c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
455c8785385SChristoph Lameter {
456c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
457c8785385SChristoph Lameter }
4582244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
4592244b95aSChristoph Lameter 
46075ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item)
46175ef7184SMel Gorman {
46275ef7184SMel Gorman 	__dec_node_state(page_pgdat(page), item);
46375ef7184SMel Gorman }
46475ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state);
46575ef7184SMel Gorman 
4664156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
4677c839120SChristoph Lameter /*
4687c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
4697c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
4707c839120SChristoph Lameter  *
4717c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
4727c839120SChristoph Lameter  * operations.
4737c839120SChristoph Lameter  *
4747c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
4757c839120SChristoph Lameter  *     0       No overstepping
4767c839120SChristoph Lameter  *     1       Overstepping half of threshold
4777c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
4787c839120SChristoph Lameter */
47975ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone,
48075ef7184SMel Gorman        enum zone_stat_item item, long delta, int overstep_mode)
4817c839120SChristoph Lameter {
4827c839120SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
4837c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
4847c839120SChristoph Lameter 	long o, n, t, z;
4857c839120SChristoph Lameter 
4867c839120SChristoph Lameter 	do {
4877c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
4887c839120SChristoph Lameter 
4897c839120SChristoph Lameter 		/*
4907c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
4917c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
492d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
493d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
494d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
495d3bc2367SChristoph Lameter 		 *
496d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
497d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
4987c839120SChristoph Lameter 		 */
4997c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
5007c839120SChristoph Lameter 
5017c839120SChristoph Lameter 		o = this_cpu_read(*p);
5027c839120SChristoph Lameter 		n = delta + o;
5037c839120SChristoph Lameter 
5047c839120SChristoph Lameter 		if (n > t || n < -t) {
5057c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
5067c839120SChristoph Lameter 
5077c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
5087c839120SChristoph Lameter 			z = n + os;
5097c839120SChristoph Lameter 			n = -os;
5107c839120SChristoph Lameter 		}
5117c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
5127c839120SChristoph Lameter 
5137c839120SChristoph Lameter 	if (z)
5147c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
5157c839120SChristoph Lameter }
5167c839120SChristoph Lameter 
5177c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
5186cdb18adSHeiko Carstens 			 long delta)
5197c839120SChristoph Lameter {
52075ef7184SMel Gorman 	mod_zone_state(zone, item, delta, 0);
5217c839120SChristoph Lameter }
5227c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
5237c839120SChristoph Lameter 
5247c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
5257c839120SChristoph Lameter {
52675ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, 1, 1);
5277c839120SChristoph Lameter }
5287c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
5297c839120SChristoph Lameter 
5307c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
5317c839120SChristoph Lameter {
53275ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, -1, -1);
5337c839120SChristoph Lameter }
5347c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
53575ef7184SMel Gorman 
53675ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat,
53775ef7184SMel Gorman        enum node_stat_item item, int delta, int overstep_mode)
53875ef7184SMel Gorman {
53975ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
54075ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
54175ef7184SMel Gorman 	long o, n, t, z;
54275ef7184SMel Gorman 
54375ef7184SMel Gorman 	do {
54475ef7184SMel Gorman 		z = 0;  /* overflow to node counters */
54575ef7184SMel Gorman 
54675ef7184SMel Gorman 		/*
54775ef7184SMel Gorman 		 * The fetching of the stat_threshold is racy. We may apply
54875ef7184SMel Gorman 		 * a counter threshold to the wrong the cpu if we get
54975ef7184SMel Gorman 		 * rescheduled while executing here. However, the next
55075ef7184SMel Gorman 		 * counter update will apply the threshold again and
55175ef7184SMel Gorman 		 * therefore bring the counter under the threshold again.
55275ef7184SMel Gorman 		 *
55375ef7184SMel Gorman 		 * Most of the time the thresholds are the same anyways
55475ef7184SMel Gorman 		 * for all cpus in a node.
55575ef7184SMel Gorman 		 */
55675ef7184SMel Gorman 		t = this_cpu_read(pcp->stat_threshold);
55775ef7184SMel Gorman 
55875ef7184SMel Gorman 		o = this_cpu_read(*p);
55975ef7184SMel Gorman 		n = delta + o;
56075ef7184SMel Gorman 
56175ef7184SMel Gorman 		if (n > t || n < -t) {
56275ef7184SMel Gorman 			int os = overstep_mode * (t >> 1) ;
56375ef7184SMel Gorman 
56475ef7184SMel Gorman 			/* Overflow must be added to node counters */
56575ef7184SMel Gorman 			z = n + os;
56675ef7184SMel Gorman 			n = -os;
56775ef7184SMel Gorman 		}
56875ef7184SMel Gorman 	} while (this_cpu_cmpxchg(*p, o, n) != o);
56975ef7184SMel Gorman 
57075ef7184SMel Gorman 	if (z)
57175ef7184SMel Gorman 		node_page_state_add(z, pgdat, item);
57275ef7184SMel Gorman }
57375ef7184SMel Gorman 
57475ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
57575ef7184SMel Gorman 					long delta)
57675ef7184SMel Gorman {
57775ef7184SMel Gorman 	mod_node_state(pgdat, item, delta, 0);
57875ef7184SMel Gorman }
57975ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
58075ef7184SMel Gorman 
58175ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
58275ef7184SMel Gorman {
58375ef7184SMel Gorman 	mod_node_state(pgdat, item, 1, 1);
58475ef7184SMel Gorman }
58575ef7184SMel Gorman 
58675ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
58775ef7184SMel Gorman {
58875ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, 1, 1);
58975ef7184SMel Gorman }
59075ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
59175ef7184SMel Gorman 
59275ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
59375ef7184SMel Gorman {
59475ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, -1, -1);
59575ef7184SMel Gorman }
59675ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
5977c839120SChristoph Lameter #else
5987c839120SChristoph Lameter /*
5997c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
6007c839120SChristoph Lameter  */
6017c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6026cdb18adSHeiko Carstens 			 long delta)
6037c839120SChristoph Lameter {
6047c839120SChristoph Lameter 	unsigned long flags;
6057c839120SChristoph Lameter 
6067c839120SChristoph Lameter 	local_irq_save(flags);
6077c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
6087c839120SChristoph Lameter 	local_irq_restore(flags);
6097c839120SChristoph Lameter }
6107c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
6117c839120SChristoph Lameter 
6122244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
6132244b95aSChristoph Lameter {
6142244b95aSChristoph Lameter 	unsigned long flags;
6152244b95aSChristoph Lameter 	struct zone *zone;
6162244b95aSChristoph Lameter 
6172244b95aSChristoph Lameter 	zone = page_zone(page);
6182244b95aSChristoph Lameter 	local_irq_save(flags);
619ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
6202244b95aSChristoph Lameter 	local_irq_restore(flags);
6212244b95aSChristoph Lameter }
6222244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
6232244b95aSChristoph Lameter 
6242244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
6252244b95aSChristoph Lameter {
6262244b95aSChristoph Lameter 	unsigned long flags;
6272244b95aSChristoph Lameter 
6282244b95aSChristoph Lameter 	local_irq_save(flags);
629a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
6302244b95aSChristoph Lameter 	local_irq_restore(flags);
6312244b95aSChristoph Lameter }
6322244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
6332244b95aSChristoph Lameter 
63475ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
63575ef7184SMel Gorman {
63675ef7184SMel Gorman 	unsigned long flags;
63775ef7184SMel Gorman 
63875ef7184SMel Gorman 	local_irq_save(flags);
63975ef7184SMel Gorman 	__inc_node_state(pgdat, item);
64075ef7184SMel Gorman 	local_irq_restore(flags);
64175ef7184SMel Gorman }
64275ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state);
64375ef7184SMel Gorman 
64475ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
64575ef7184SMel Gorman 					long delta)
64675ef7184SMel Gorman {
64775ef7184SMel Gorman 	unsigned long flags;
64875ef7184SMel Gorman 
64975ef7184SMel Gorman 	local_irq_save(flags);
65075ef7184SMel Gorman 	__mod_node_page_state(pgdat, item, delta);
65175ef7184SMel Gorman 	local_irq_restore(flags);
65275ef7184SMel Gorman }
65375ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
65475ef7184SMel Gorman 
65575ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
65675ef7184SMel Gorman {
65775ef7184SMel Gorman 	unsigned long flags;
65875ef7184SMel Gorman 	struct pglist_data *pgdat;
65975ef7184SMel Gorman 
66075ef7184SMel Gorman 	pgdat = page_pgdat(page);
66175ef7184SMel Gorman 	local_irq_save(flags);
66275ef7184SMel Gorman 	__inc_node_state(pgdat, item);
66375ef7184SMel Gorman 	local_irq_restore(flags);
66475ef7184SMel Gorman }
66575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
66675ef7184SMel Gorman 
66775ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
66875ef7184SMel Gorman {
66975ef7184SMel Gorman 	unsigned long flags;
67075ef7184SMel Gorman 
67175ef7184SMel Gorman 	local_irq_save(flags);
67275ef7184SMel Gorman 	__dec_node_page_state(page, item);
67375ef7184SMel Gorman 	local_irq_restore(flags);
67475ef7184SMel Gorman }
67575ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
67675ef7184SMel Gorman #endif
6777cc36bbdSChristoph Lameter 
6787cc36bbdSChristoph Lameter /*
6797cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
6807cc36bbdSChristoph Lameter  * Returns the number of counters updated.
6817cc36bbdSChristoph Lameter  */
6823a321d2aSKemi Wang #ifdef CONFIG_NUMA
6833a321d2aSKemi Wang static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
6843a321d2aSKemi Wang {
6853a321d2aSKemi Wang 	int i;
6863a321d2aSKemi Wang 	int changes = 0;
6873a321d2aSKemi Wang 
6883a321d2aSKemi Wang 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
6893a321d2aSKemi Wang 		if (zone_diff[i]) {
6903a321d2aSKemi Wang 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
6913a321d2aSKemi Wang 			changes++;
6923a321d2aSKemi Wang 	}
6933a321d2aSKemi Wang 
6943a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
6953a321d2aSKemi Wang 		if (numa_diff[i]) {
6963a321d2aSKemi Wang 			atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
6973a321d2aSKemi Wang 			changes++;
6983a321d2aSKemi Wang 	}
6993a321d2aSKemi Wang 
7003a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
7013a321d2aSKemi Wang 		if (node_diff[i]) {
7023a321d2aSKemi Wang 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7033a321d2aSKemi Wang 			changes++;
7043a321d2aSKemi Wang 	}
7053a321d2aSKemi Wang 	return changes;
7063a321d2aSKemi Wang }
7073a321d2aSKemi Wang #else
70875ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff)
7094edb0748SChristoph Lameter {
7104edb0748SChristoph Lameter 	int i;
7117cc36bbdSChristoph Lameter 	int changes = 0;
7124edb0748SChristoph Lameter 
7134edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
71475ef7184SMel Gorman 		if (zone_diff[i]) {
71575ef7184SMel Gorman 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
71675ef7184SMel Gorman 			changes++;
71775ef7184SMel Gorman 	}
71875ef7184SMel Gorman 
71975ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
72075ef7184SMel Gorman 		if (node_diff[i]) {
72175ef7184SMel Gorman 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7227cc36bbdSChristoph Lameter 			changes++;
7237cc36bbdSChristoph Lameter 	}
7247cc36bbdSChristoph Lameter 	return changes;
7254edb0748SChristoph Lameter }
7263a321d2aSKemi Wang #endif /* CONFIG_NUMA */
7274edb0748SChristoph Lameter 
7282244b95aSChristoph Lameter /*
7292bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
730a7f75e25SChristoph Lameter  *
7314037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
7324037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
7334037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
7344037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
7354037d452SChristoph Lameter  * the processor.
7364037d452SChristoph Lameter  *
7374037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
7384037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
7394037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
7404037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
7417cc36bbdSChristoph Lameter  *
7427cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
7432244b95aSChristoph Lameter  */
7440eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
7452244b95aSChristoph Lameter {
74675ef7184SMel Gorman 	struct pglist_data *pgdat;
7472244b95aSChristoph Lameter 	struct zone *zone;
7482244b95aSChristoph Lameter 	int i;
74975ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
7503a321d2aSKemi Wang #ifdef CONFIG_NUMA
7513a321d2aSKemi Wang 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
7523a321d2aSKemi Wang #endif
75375ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7547cc36bbdSChristoph Lameter 	int changes = 0;
7552244b95aSChristoph Lameter 
756ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
757fbc2edb0SChristoph Lameter 		struct per_cpu_pageset __percpu *p = zone->pageset;
7582244b95aSChristoph Lameter 
759fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
760a7f75e25SChristoph Lameter 			int v;
761a7f75e25SChristoph Lameter 
762fbc2edb0SChristoph Lameter 			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
763fbc2edb0SChristoph Lameter 			if (v) {
764fbc2edb0SChristoph Lameter 
765a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
76675ef7184SMel Gorman 				global_zone_diff[i] += v;
7674037d452SChristoph Lameter #ifdef CONFIG_NUMA
7684037d452SChristoph Lameter 				/* 3 seconds idle till flush */
769fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 3);
7704037d452SChristoph Lameter #endif
7712244b95aSChristoph Lameter 			}
772fbc2edb0SChristoph Lameter 		}
7734037d452SChristoph Lameter #ifdef CONFIG_NUMA
7743a321d2aSKemi Wang 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
7753a321d2aSKemi Wang 			int v;
7763a321d2aSKemi Wang 
7773a321d2aSKemi Wang 			v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
7783a321d2aSKemi Wang 			if (v) {
7793a321d2aSKemi Wang 
7803a321d2aSKemi Wang 				atomic_long_add(v, &zone->vm_numa_stat[i]);
7813a321d2aSKemi Wang 				global_numa_diff[i] += v;
7823a321d2aSKemi Wang 				__this_cpu_write(p->expire, 3);
7833a321d2aSKemi Wang 			}
7843a321d2aSKemi Wang 		}
7853a321d2aSKemi Wang 
7860eb77e98SChristoph Lameter 		if (do_pagesets) {
7870eb77e98SChristoph Lameter 			cond_resched();
7884037d452SChristoph Lameter 			/*
7894037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
7904037d452SChristoph Lameter 			 * processor
7914037d452SChristoph Lameter 			 *
7924037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
7934037d452SChristoph Lameter 			 * if not then there is nothing to expire.
7944037d452SChristoph Lameter 			 */
795fbc2edb0SChristoph Lameter 			if (!__this_cpu_read(p->expire) ||
796fbc2edb0SChristoph Lameter 			       !__this_cpu_read(p->pcp.count))
7974037d452SChristoph Lameter 				continue;
7984037d452SChristoph Lameter 
7994037d452SChristoph Lameter 			/*
8004037d452SChristoph Lameter 			 * We never drain zones local to this processor.
8014037d452SChristoph Lameter 			 */
8024037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
803fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 0);
8044037d452SChristoph Lameter 				continue;
8054037d452SChristoph Lameter 			}
8064037d452SChristoph Lameter 
807fbc2edb0SChristoph Lameter 			if (__this_cpu_dec_return(p->expire))
8084037d452SChristoph Lameter 				continue;
8094037d452SChristoph Lameter 
8107cc36bbdSChristoph Lameter 			if (__this_cpu_read(p->pcp.count)) {
8117c8e0181SChristoph Lameter 				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
8127cc36bbdSChristoph Lameter 				changes++;
8137cc36bbdSChristoph Lameter 			}
8140eb77e98SChristoph Lameter 		}
8154037d452SChristoph Lameter #endif
8162244b95aSChristoph Lameter 	}
81775ef7184SMel Gorman 
81875ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
81975ef7184SMel Gorman 		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
82075ef7184SMel Gorman 
82175ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
82275ef7184SMel Gorman 			int v;
82375ef7184SMel Gorman 
82475ef7184SMel Gorman 			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
82575ef7184SMel Gorman 			if (v) {
82675ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
82775ef7184SMel Gorman 				global_node_diff[i] += v;
82875ef7184SMel Gorman 			}
82975ef7184SMel Gorman 		}
83075ef7184SMel Gorman 	}
83175ef7184SMel Gorman 
8323a321d2aSKemi Wang #ifdef CONFIG_NUMA
8333a321d2aSKemi Wang 	changes += fold_diff(global_zone_diff, global_numa_diff,
8343a321d2aSKemi Wang 			     global_node_diff);
8353a321d2aSKemi Wang #else
83675ef7184SMel Gorman 	changes += fold_diff(global_zone_diff, global_node_diff);
8373a321d2aSKemi Wang #endif
8387cc36bbdSChristoph Lameter 	return changes;
8392244b95aSChristoph Lameter }
8402244b95aSChristoph Lameter 
84140f4b1eaSCody P Schafer /*
8422bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
8432bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
8442bb921e5SChristoph Lameter  * synchronization is simplified.
8452bb921e5SChristoph Lameter  */
8462bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
8472bb921e5SChristoph Lameter {
84875ef7184SMel Gorman 	struct pglist_data *pgdat;
8492bb921e5SChristoph Lameter 	struct zone *zone;
8502bb921e5SChristoph Lameter 	int i;
85175ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
8523a321d2aSKemi Wang #ifdef CONFIG_NUMA
8533a321d2aSKemi Wang 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
8543a321d2aSKemi Wang #endif
85575ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
8562bb921e5SChristoph Lameter 
8572bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
8582bb921e5SChristoph Lameter 		struct per_cpu_pageset *p;
8592bb921e5SChristoph Lameter 
8602bb921e5SChristoph Lameter 		p = per_cpu_ptr(zone->pageset, cpu);
8612bb921e5SChristoph Lameter 
8622bb921e5SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
8632bb921e5SChristoph Lameter 			if (p->vm_stat_diff[i]) {
8642bb921e5SChristoph Lameter 				int v;
8652bb921e5SChristoph Lameter 
8662bb921e5SChristoph Lameter 				v = p->vm_stat_diff[i];
8672bb921e5SChristoph Lameter 				p->vm_stat_diff[i] = 0;
8682bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
86975ef7184SMel Gorman 				global_zone_diff[i] += v;
8702bb921e5SChristoph Lameter 			}
8713a321d2aSKemi Wang 
8723a321d2aSKemi Wang #ifdef CONFIG_NUMA
8733a321d2aSKemi Wang 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
8743a321d2aSKemi Wang 			if (p->vm_numa_stat_diff[i]) {
8753a321d2aSKemi Wang 				int v;
8763a321d2aSKemi Wang 
8773a321d2aSKemi Wang 				v = p->vm_numa_stat_diff[i];
8783a321d2aSKemi Wang 				p->vm_numa_stat_diff[i] = 0;
8793a321d2aSKemi Wang 				atomic_long_add(v, &zone->vm_numa_stat[i]);
8803a321d2aSKemi Wang 				global_numa_diff[i] += v;
8813a321d2aSKemi Wang 			}
8823a321d2aSKemi Wang #endif
8832bb921e5SChristoph Lameter 	}
8842bb921e5SChristoph Lameter 
88575ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
88675ef7184SMel Gorman 		struct per_cpu_nodestat *p;
88775ef7184SMel Gorman 
88875ef7184SMel Gorman 		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
88975ef7184SMel Gorman 
89075ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
89175ef7184SMel Gorman 			if (p->vm_node_stat_diff[i]) {
89275ef7184SMel Gorman 				int v;
89375ef7184SMel Gorman 
89475ef7184SMel Gorman 				v = p->vm_node_stat_diff[i];
89575ef7184SMel Gorman 				p->vm_node_stat_diff[i] = 0;
89675ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
89775ef7184SMel Gorman 				global_node_diff[i] += v;
89875ef7184SMel Gorman 			}
89975ef7184SMel Gorman 	}
90075ef7184SMel Gorman 
9013a321d2aSKemi Wang #ifdef CONFIG_NUMA
9023a321d2aSKemi Wang 	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
9033a321d2aSKemi Wang #else
90475ef7184SMel Gorman 	fold_diff(global_zone_diff, global_node_diff);
9053a321d2aSKemi Wang #endif
9062bb921e5SChristoph Lameter }
9072bb921e5SChristoph Lameter 
9082bb921e5SChristoph Lameter /*
90940f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
91040f4b1eaSCody P Schafer  * pset->vm_stat_diff[] exsist.
91140f4b1eaSCody P Schafer  */
9125a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
9135a883813SMinchan Kim {
9145a883813SMinchan Kim 	int i;
9155a883813SMinchan Kim 
9165a883813SMinchan Kim 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
9175a883813SMinchan Kim 		if (pset->vm_stat_diff[i]) {
9185a883813SMinchan Kim 			int v = pset->vm_stat_diff[i];
9195a883813SMinchan Kim 			pset->vm_stat_diff[i] = 0;
9205a883813SMinchan Kim 			atomic_long_add(v, &zone->vm_stat[i]);
92175ef7184SMel Gorman 			atomic_long_add(v, &vm_zone_stat[i]);
9225a883813SMinchan Kim 		}
9233a321d2aSKemi Wang 
9243a321d2aSKemi Wang #ifdef CONFIG_NUMA
9253a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
9263a321d2aSKemi Wang 		if (pset->vm_numa_stat_diff[i]) {
9273a321d2aSKemi Wang 			int v = pset->vm_numa_stat_diff[i];
9283a321d2aSKemi Wang 
9293a321d2aSKemi Wang 			pset->vm_numa_stat_diff[i] = 0;
9303a321d2aSKemi Wang 			atomic_long_add(v, &zone->vm_numa_stat[i]);
9313a321d2aSKemi Wang 			atomic_long_add(v, &vm_numa_stat[i]);
9323a321d2aSKemi Wang 		}
9333a321d2aSKemi Wang #endif
9345a883813SMinchan Kim }
9352244b95aSChristoph Lameter #endif
9362244b95aSChristoph Lameter 
937ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
9383a321d2aSKemi Wang void __inc_numa_state(struct zone *zone,
9393a321d2aSKemi Wang 				 enum numa_stat_item item)
9403a321d2aSKemi Wang {
9413a321d2aSKemi Wang 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
9421d90ca89SKemi Wang 	u16 __percpu *p = pcp->vm_numa_stat_diff + item;
9431d90ca89SKemi Wang 	u16 v;
9443a321d2aSKemi Wang 
9453a321d2aSKemi Wang 	v = __this_cpu_inc_return(*p);
9463a321d2aSKemi Wang 
9471d90ca89SKemi Wang 	if (unlikely(v > NUMA_STATS_THRESHOLD)) {
9481d90ca89SKemi Wang 		zone_numa_state_add(v, zone, item);
9491d90ca89SKemi Wang 		__this_cpu_write(*p, 0);
9503a321d2aSKemi Wang 	}
9513a321d2aSKemi Wang }
9523a321d2aSKemi Wang 
953ca889e6cSChristoph Lameter /*
95475ef7184SMel Gorman  * Determine the per node value of a stat item. This function
95575ef7184SMel Gorman  * is called frequently in a NUMA machine, so try to be as
95675ef7184SMel Gorman  * frugal as possible.
957c2d42c16SAndrew Morton  */
95875ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node,
95975ef7184SMel Gorman 				 enum zone_stat_item item)
960c2d42c16SAndrew Morton {
961c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
962e87d59f7SJoonsoo Kim 	int i;
963e87d59f7SJoonsoo Kim 	unsigned long count = 0;
964c2d42c16SAndrew Morton 
965e87d59f7SJoonsoo Kim 	for (i = 0; i < MAX_NR_ZONES; i++)
966e87d59f7SJoonsoo Kim 		count += zone_page_state(zones + i, item);
967e87d59f7SJoonsoo Kim 
968e87d59f7SJoonsoo Kim 	return count;
969c2d42c16SAndrew Morton }
970c2d42c16SAndrew Morton 
97163803222SKemi Wang /*
97263803222SKemi Wang  * Determine the per node value of a numa stat item. To avoid deviation,
97363803222SKemi Wang  * the per cpu stat number in vm_numa_stat_diff[] is also included.
97463803222SKemi Wang  */
9753a321d2aSKemi Wang unsigned long sum_zone_numa_state(int node,
9763a321d2aSKemi Wang 				 enum numa_stat_item item)
9773a321d2aSKemi Wang {
9783a321d2aSKemi Wang 	struct zone *zones = NODE_DATA(node)->node_zones;
9793a321d2aSKemi Wang 	int i;
9803a321d2aSKemi Wang 	unsigned long count = 0;
9813a321d2aSKemi Wang 
9823a321d2aSKemi Wang 	for (i = 0; i < MAX_NR_ZONES; i++)
98363803222SKemi Wang 		count += zone_numa_state_snapshot(zones + i, item);
9843a321d2aSKemi Wang 
9853a321d2aSKemi Wang 	return count;
9863a321d2aSKemi Wang }
9873a321d2aSKemi Wang 
98875ef7184SMel Gorman /*
98975ef7184SMel Gorman  * Determine the per node value of a stat item.
99075ef7184SMel Gorman  */
99175ef7184SMel Gorman unsigned long node_page_state(struct pglist_data *pgdat,
99275ef7184SMel Gorman 				enum node_stat_item item)
99375ef7184SMel Gorman {
99475ef7184SMel Gorman 	long x = atomic_long_read(&pgdat->vm_stat[item]);
99575ef7184SMel Gorman #ifdef CONFIG_SMP
99675ef7184SMel Gorman 	if (x < 0)
99775ef7184SMel Gorman 		x = 0;
99875ef7184SMel Gorman #endif
99975ef7184SMel Gorman 	return x;
100075ef7184SMel Gorman }
1001ca889e6cSChristoph Lameter #endif
1002ca889e6cSChristoph Lameter 
1003d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
100436deb0beSNamhyung Kim 
1005d7a5752cSMel Gorman struct contig_page_info {
1006d7a5752cSMel Gorman 	unsigned long free_pages;
1007d7a5752cSMel Gorman 	unsigned long free_blocks_total;
1008d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
1009d7a5752cSMel Gorman };
1010d7a5752cSMel Gorman 
1011d7a5752cSMel Gorman /*
1012d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
1013d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
1014d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
1015d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
1016d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
1017d7a5752cSMel Gorman  * figured out from userspace
1018d7a5752cSMel Gorman  */
1019d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
1020d7a5752cSMel Gorman 				unsigned int suitable_order,
1021d7a5752cSMel Gorman 				struct contig_page_info *info)
1022d7a5752cSMel Gorman {
1023d7a5752cSMel Gorman 	unsigned int order;
1024d7a5752cSMel Gorman 
1025d7a5752cSMel Gorman 	info->free_pages = 0;
1026d7a5752cSMel Gorman 	info->free_blocks_total = 0;
1027d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
1028d7a5752cSMel Gorman 
1029d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
1030d7a5752cSMel Gorman 		unsigned long blocks;
1031d7a5752cSMel Gorman 
1032d7a5752cSMel Gorman 		/* Count number of free blocks */
1033d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
1034d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
1035d7a5752cSMel Gorman 
1036d7a5752cSMel Gorman 		/* Count free base pages */
1037d7a5752cSMel Gorman 		info->free_pages += blocks << order;
1038d7a5752cSMel Gorman 
1039d7a5752cSMel Gorman 		/* Count the suitable free blocks */
1040d7a5752cSMel Gorman 		if (order >= suitable_order)
1041d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
1042d7a5752cSMel Gorman 						(order - suitable_order);
1043d7a5752cSMel Gorman 	}
1044d7a5752cSMel Gorman }
1045f1a5ab12SMel Gorman 
1046f1a5ab12SMel Gorman /*
1047f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
1048f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
1049f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
1050f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
1051f1a5ab12SMel Gorman  * should be used
1052f1a5ab12SMel Gorman  */
105356de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1054f1a5ab12SMel Gorman {
1055f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
1056f1a5ab12SMel Gorman 
105788d6ac40SWen Yang 	if (WARN_ON_ONCE(order >= MAX_ORDER))
105888d6ac40SWen Yang 		return 0;
105988d6ac40SWen Yang 
1060f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
1061f1a5ab12SMel Gorman 		return 0;
1062f1a5ab12SMel Gorman 
1063f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
1064f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
1065f1a5ab12SMel Gorman 		return -1000;
1066f1a5ab12SMel Gorman 
1067f1a5ab12SMel Gorman 	/*
1068f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
1069f1a5ab12SMel Gorman 	 *
1070f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
1071f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
1072f1a5ab12SMel Gorman 	 */
1073f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1074f1a5ab12SMel Gorman }
107556de7263SMel Gorman 
107656de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
107756de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
107856de7263SMel Gorman {
107956de7263SMel Gorman 	struct contig_page_info info;
108056de7263SMel Gorman 
108156de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
108256de7263SMel Gorman 	return __fragmentation_index(order, &info);
108356de7263SMel Gorman }
1084d7a5752cSMel Gorman #endif
1085d7a5752cSMel Gorman 
10860d6617c7SDavid Rientjes #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
1087fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
1088fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
1089fa25c503SKOSAKI Motohiro #else
1090fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
1091fa25c503SKOSAKI Motohiro #endif
1092fa25c503SKOSAKI Motohiro 
1093fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
1094fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
1095fa25c503SKOSAKI Motohiro #else
1096fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
1097fa25c503SKOSAKI Motohiro #endif
1098fa25c503SKOSAKI Motohiro 
1099fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
1100fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1101fa25c503SKOSAKI Motohiro #else
1102fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
1103fa25c503SKOSAKI Motohiro #endif
1104fa25c503SKOSAKI Motohiro 
1105fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1106fa25c503SKOSAKI Motohiro 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
1107fa25c503SKOSAKI Motohiro 
1108fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
110909316c09SKonstantin Khlebnikov 	/* enum zone_stat_item countes */
1110fa25c503SKOSAKI Motohiro 	"nr_free_pages",
111171c799f4SMinchan Kim 	"nr_zone_inactive_anon",
111271c799f4SMinchan Kim 	"nr_zone_active_anon",
111371c799f4SMinchan Kim 	"nr_zone_inactive_file",
111471c799f4SMinchan Kim 	"nr_zone_active_file",
111571c799f4SMinchan Kim 	"nr_zone_unevictable",
11165a1c84b4SMel Gorman 	"nr_zone_write_pending",
1117fa25c503SKOSAKI Motohiro 	"nr_mlock",
1118fa25c503SKOSAKI Motohiro 	"nr_page_table_pages",
1119fa25c503SKOSAKI Motohiro 	"nr_kernel_stack",
1120fa25c503SKOSAKI Motohiro 	"nr_bounce",
112191537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC)
112291537feeSMinchan Kim 	"nr_zspages",
112391537feeSMinchan Kim #endif
11243a321d2aSKemi Wang 	"nr_free_cma",
11253a321d2aSKemi Wang 
11263a321d2aSKemi Wang 	/* enum numa_stat_item counters */
1127fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1128fa25c503SKOSAKI Motohiro 	"numa_hit",
1129fa25c503SKOSAKI Motohiro 	"numa_miss",
1130fa25c503SKOSAKI Motohiro 	"numa_foreign",
1131fa25c503SKOSAKI Motohiro 	"numa_interleave",
1132fa25c503SKOSAKI Motohiro 	"numa_local",
1133fa25c503SKOSAKI Motohiro 	"numa_other",
1134fa25c503SKOSAKI Motohiro #endif
113509316c09SKonstantin Khlebnikov 
1136599d0c95SMel Gorman 	/* Node-based counters */
1137599d0c95SMel Gorman 	"nr_inactive_anon",
1138599d0c95SMel Gorman 	"nr_active_anon",
1139599d0c95SMel Gorman 	"nr_inactive_file",
1140599d0c95SMel Gorman 	"nr_active_file",
1141599d0c95SMel Gorman 	"nr_unevictable",
1142385386cfSJohannes Weiner 	"nr_slab_reclaimable",
1143385386cfSJohannes Weiner 	"nr_slab_unreclaimable",
1144599d0c95SMel Gorman 	"nr_isolated_anon",
1145599d0c95SMel Gorman 	"nr_isolated_file",
114668d48e6aSJohannes Weiner 	"workingset_nodes",
11471e6b1085SMel Gorman 	"workingset_refault",
11481e6b1085SMel Gorman 	"workingset_activate",
11491899ad18SJohannes Weiner 	"workingset_restore",
11501e6b1085SMel Gorman 	"workingset_nodereclaim",
115150658e2eSMel Gorman 	"nr_anon_pages",
115250658e2eSMel Gorman 	"nr_mapped",
115311fb9989SMel Gorman 	"nr_file_pages",
115411fb9989SMel Gorman 	"nr_dirty",
115511fb9989SMel Gorman 	"nr_writeback",
115611fb9989SMel Gorman 	"nr_writeback_temp",
115711fb9989SMel Gorman 	"nr_shmem",
115811fb9989SMel Gorman 	"nr_shmem_hugepages",
115911fb9989SMel Gorman 	"nr_shmem_pmdmapped",
116011fb9989SMel Gorman 	"nr_anon_transparent_hugepages",
116111fb9989SMel Gorman 	"nr_unstable",
1162c4a25635SMel Gorman 	"nr_vmscan_write",
1163c4a25635SMel Gorman 	"nr_vmscan_immediate_reclaim",
1164c4a25635SMel Gorman 	"nr_dirtied",
1165c4a25635SMel Gorman 	"nr_written",
1166b29940c1SVlastimil Babka 	"nr_kernel_misc_reclaimable",
1167599d0c95SMel Gorman 
116809316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
1169fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
1170fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
1171fa25c503SKOSAKI Motohiro 
1172fa25c503SKOSAKI Motohiro #ifdef CONFIG_VM_EVENT_COUNTERS
117309316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
1174fa25c503SKOSAKI Motohiro 	"pgpgin",
1175fa25c503SKOSAKI Motohiro 	"pgpgout",
1176fa25c503SKOSAKI Motohiro 	"pswpin",
1177fa25c503SKOSAKI Motohiro 	"pswpout",
1178fa25c503SKOSAKI Motohiro 
1179fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
11807cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("allocstall")
11817cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("pgskip")
1182fa25c503SKOSAKI Motohiro 
1183fa25c503SKOSAKI Motohiro 	"pgfree",
1184fa25c503SKOSAKI Motohiro 	"pgactivate",
1185fa25c503SKOSAKI Motohiro 	"pgdeactivate",
1186f7ad2a6cSShaohua Li 	"pglazyfree",
1187fa25c503SKOSAKI Motohiro 
1188fa25c503SKOSAKI Motohiro 	"pgfault",
1189fa25c503SKOSAKI Motohiro 	"pgmajfault",
1190854e9ed0SMinchan Kim 	"pglazyfreed",
1191fa25c503SKOSAKI Motohiro 
1192599d0c95SMel Gorman 	"pgrefill",
1193599d0c95SMel Gorman 	"pgsteal_kswapd",
1194599d0c95SMel Gorman 	"pgsteal_direct",
1195599d0c95SMel Gorman 	"pgscan_kswapd",
1196599d0c95SMel Gorman 	"pgscan_direct",
119768243e76SMel Gorman 	"pgscan_direct_throttle",
1198fa25c503SKOSAKI Motohiro 
1199fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1200fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
1201fa25c503SKOSAKI Motohiro #endif
1202fa25c503SKOSAKI Motohiro 	"pginodesteal",
1203fa25c503SKOSAKI Motohiro 	"slabs_scanned",
1204fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
1205fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
1206fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
1207fa25c503SKOSAKI Motohiro 	"pageoutrun",
1208fa25c503SKOSAKI Motohiro 
1209fa25c503SKOSAKI Motohiro 	"pgrotated",
1210fa25c503SKOSAKI Motohiro 
12115509a5d2SDave Hansen 	"drop_pagecache",
12125509a5d2SDave Hansen 	"drop_slab",
12138e675f7aSKonstantin Khlebnikov 	"oom_kill",
12145509a5d2SDave Hansen 
121503c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
121603c5a6e1SMel Gorman 	"numa_pte_updates",
121772403b4aSMel Gorman 	"numa_huge_pte_updates",
121803c5a6e1SMel Gorman 	"numa_hint_faults",
121903c5a6e1SMel Gorman 	"numa_hint_faults_local",
122003c5a6e1SMel Gorman 	"numa_pages_migrated",
122103c5a6e1SMel Gorman #endif
12225647bc29SMel Gorman #ifdef CONFIG_MIGRATION
12235647bc29SMel Gorman 	"pgmigrate_success",
12245647bc29SMel Gorman 	"pgmigrate_fail",
12255647bc29SMel Gorman #endif
1226fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
1227397487dbSMel Gorman 	"compact_migrate_scanned",
1228397487dbSMel Gorman 	"compact_free_scanned",
1229397487dbSMel Gorman 	"compact_isolated",
1230fa25c503SKOSAKI Motohiro 	"compact_stall",
1231fa25c503SKOSAKI Motohiro 	"compact_fail",
1232fa25c503SKOSAKI Motohiro 	"compact_success",
1233698b1b30SVlastimil Babka 	"compact_daemon_wake",
12347f354a54SDavid Rientjes 	"compact_daemon_migrate_scanned",
12357f354a54SDavid Rientjes 	"compact_daemon_free_scanned",
1236fa25c503SKOSAKI Motohiro #endif
1237fa25c503SKOSAKI Motohiro 
1238fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
1239fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
1240fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
1241fa25c503SKOSAKI Motohiro #endif
1242fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
1243fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
1244fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
1245fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
1246fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
1247fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
1248fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
1249fa25c503SKOSAKI Motohiro 
1250fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1251fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
1252fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
1253fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
1254fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
125595ecedcdSKirill A. Shutemov 	"thp_file_alloc",
125695ecedcdSKirill A. Shutemov 	"thp_file_mapped",
1257122afea9SKirill A. Shutemov 	"thp_split_page",
1258122afea9SKirill A. Shutemov 	"thp_split_page_failed",
1259f9719a03SKirill A. Shutemov 	"thp_deferred_split_page",
1260122afea9SKirill A. Shutemov 	"thp_split_pmd",
1261ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1262ce9311cfSYisheng Xie 	"thp_split_pud",
1263ce9311cfSYisheng Xie #endif
1264d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
1265d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
1266225311a4SHuang Ying 	"thp_swpout",
1267fe490cc0SHuang Ying 	"thp_swpout_fallback",
1268fa25c503SKOSAKI Motohiro #endif
126909316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
127009316c09SKonstantin Khlebnikov 	"balloon_inflate",
127109316c09SKonstantin Khlebnikov 	"balloon_deflate",
127209316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
127309316c09SKonstantin Khlebnikov 	"balloon_migrate",
127409316c09SKonstantin Khlebnikov #endif
127509316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
1276ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
12776df46865SDave Hansen #ifdef CONFIG_SMP
12789824cf97SDave Hansen 	"nr_tlb_remote_flush",
12799824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
128058bc4c34SJann Horn #else
128158bc4c34SJann Horn 	"", /* nr_tlb_remote_flush */
128258bc4c34SJann Horn 	"", /* nr_tlb_remote_flush_received */
1283ec659934SMel Gorman #endif /* CONFIG_SMP */
12849824cf97SDave Hansen 	"nr_tlb_local_flush_all",
12859824cf97SDave Hansen 	"nr_tlb_local_flush_one",
1286ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
1287fa25c503SKOSAKI Motohiro 
12884f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE
12894f115147SDavidlohr Bueso 	"vmacache_find_calls",
12904f115147SDavidlohr Bueso 	"vmacache_find_hits",
12914f115147SDavidlohr Bueso #endif
1292cbc65df2SHuang Ying #ifdef CONFIG_SWAP
1293cbc65df2SHuang Ying 	"swap_ra",
1294cbc65df2SHuang Ying 	"swap_ra_hit",
1295cbc65df2SHuang Ying #endif
1296fa25c503SKOSAKI Motohiro #endif /* CONFIG_VM_EVENTS_COUNTERS */
1297fa25c503SKOSAKI Motohiro };
12980d6617c7SDavid Rientjes #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
1299fa25c503SKOSAKI Motohiro 
13003c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
13013c486871SAndrew Morton      defined(CONFIG_PROC_FS)
13023c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
13033c486871SAndrew Morton {
13043c486871SAndrew Morton 	pg_data_t *pgdat;
13053c486871SAndrew Morton 	loff_t node = *pos;
13063c486871SAndrew Morton 
13073c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
13083c486871SAndrew Morton 	     pgdat && node;
13093c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
13103c486871SAndrew Morton 		--node;
13113c486871SAndrew Morton 
13123c486871SAndrew Morton 	return pgdat;
13133c486871SAndrew Morton }
13143c486871SAndrew Morton 
13153c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
13163c486871SAndrew Morton {
13173c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
13183c486871SAndrew Morton 
13193c486871SAndrew Morton 	(*pos)++;
13203c486871SAndrew Morton 	return next_online_pgdat(pgdat);
13213c486871SAndrew Morton }
13223c486871SAndrew Morton 
13233c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
13243c486871SAndrew Morton {
13253c486871SAndrew Morton }
13263c486871SAndrew Morton 
1327b2bd8598SDavid Rientjes /*
1328b2bd8598SDavid Rientjes  * Walk zones in a node and print using a callback.
1329b2bd8598SDavid Rientjes  * If @assert_populated is true, only use callback for zones that are populated.
1330b2bd8598SDavid Rientjes  */
13313c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1332727c080fSVinayak Menon 		bool assert_populated, bool nolock,
13333c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
13343c486871SAndrew Morton {
13353c486871SAndrew Morton 	struct zone *zone;
13363c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
13373c486871SAndrew Morton 	unsigned long flags;
13383c486871SAndrew Morton 
13393c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1340b2bd8598SDavid Rientjes 		if (assert_populated && !populated_zone(zone))
13413c486871SAndrew Morton 			continue;
13423c486871SAndrew Morton 
1343727c080fSVinayak Menon 		if (!nolock)
13443c486871SAndrew Morton 			spin_lock_irqsave(&zone->lock, flags);
13453c486871SAndrew Morton 		print(m, pgdat, zone);
1346727c080fSVinayak Menon 		if (!nolock)
13473c486871SAndrew Morton 			spin_unlock_irqrestore(&zone->lock, flags);
13483c486871SAndrew Morton 	}
13493c486871SAndrew Morton }
13503c486871SAndrew Morton #endif
13513c486871SAndrew Morton 
1352d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
1353467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1354467c996cSMel Gorman 						struct zone *zone)
1355467c996cSMel Gorman {
1356467c996cSMel Gorman 	int order;
1357467c996cSMel Gorman 
1358f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1359f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
1360f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1361f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1362f6ac2354SChristoph Lameter }
1363467c996cSMel Gorman 
1364467c996cSMel Gorman /*
1365467c996cSMel Gorman  * This walks the free areas for each zone.
1366467c996cSMel Gorman  */
1367467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
1368467c996cSMel Gorman {
1369467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1370727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1371467c996cSMel Gorman 	return 0;
1372467c996cSMel Gorman }
1373467c996cSMel Gorman 
1374467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
1375467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1376467c996cSMel Gorman {
1377467c996cSMel Gorman 	int order, mtype;
1378467c996cSMel Gorman 
1379467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1380467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
1381467c996cSMel Gorman 					pgdat->node_id,
1382467c996cSMel Gorman 					zone->name,
1383467c996cSMel Gorman 					migratetype_names[mtype]);
1384467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
1385467c996cSMel Gorman 			unsigned long freecount = 0;
1386467c996cSMel Gorman 			struct free_area *area;
1387467c996cSMel Gorman 			struct list_head *curr;
1388467c996cSMel Gorman 
1389467c996cSMel Gorman 			area = &(zone->free_area[order]);
1390467c996cSMel Gorman 
1391467c996cSMel Gorman 			list_for_each(curr, &area->free_list[mtype])
1392467c996cSMel Gorman 				freecount++;
1393467c996cSMel Gorman 			seq_printf(m, "%6lu ", freecount);
1394467c996cSMel Gorman 		}
1395467c996cSMel Gorman 		seq_putc(m, '\n');
1396467c996cSMel Gorman 	}
1397467c996cSMel Gorman }
1398467c996cSMel Gorman 
1399467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
1400467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1401467c996cSMel Gorman {
1402467c996cSMel Gorman 	int order;
1403467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1404467c996cSMel Gorman 
1405467c996cSMel Gorman 	/* Print header */
1406467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1407467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
1408467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
1409467c996cSMel Gorman 	seq_putc(m, '\n');
1410467c996cSMel Gorman 
1411727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1412467c996cSMel Gorman 
1413467c996cSMel Gorman 	return 0;
1414467c996cSMel Gorman }
1415467c996cSMel Gorman 
1416467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1417467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1418467c996cSMel Gorman {
1419467c996cSMel Gorman 	int mtype;
1420467c996cSMel Gorman 	unsigned long pfn;
1421467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1422108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1423467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1424467c996cSMel Gorman 
1425467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1426467c996cSMel Gorman 		struct page *page;
1427467c996cSMel Gorman 
1428d336e94eSMichal Hocko 		page = pfn_to_online_page(pfn);
1429d336e94eSMichal Hocko 		if (!page)
1430467c996cSMel Gorman 			continue;
1431467c996cSMel Gorman 
1432eb33575cSMel Gorman 		/* Watch for unexpected holes punched in the memmap */
1433eb33575cSMel Gorman 		if (!memmap_valid_within(pfn, page, zone))
1434e80d6a24SMel Gorman 			continue;
1435eb33575cSMel Gorman 
1436a91c43c7SJoonsoo Kim 		if (page_zone(page) != zone)
1437a91c43c7SJoonsoo Kim 			continue;
1438a91c43c7SJoonsoo Kim 
1439467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1440467c996cSMel Gorman 
1441e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1442467c996cSMel Gorman 			count[mtype]++;
1443467c996cSMel Gorman 	}
1444467c996cSMel Gorman 
1445467c996cSMel Gorman 	/* Print counts */
1446467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1447467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1448467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1449467c996cSMel Gorman 	seq_putc(m, '\n');
1450467c996cSMel Gorman }
1451467c996cSMel Gorman 
1452f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */
1453467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1454467c996cSMel Gorman {
1455467c996cSMel Gorman 	int mtype;
1456467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1457467c996cSMel Gorman 
1458467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1459467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1460467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1461467c996cSMel Gorman 	seq_putc(m, '\n');
1462727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false,
1463727c080fSVinayak Menon 		pagetypeinfo_showblockcount_print);
1464467c996cSMel Gorman 
1465467c996cSMel Gorman 	return 0;
1466467c996cSMel Gorman }
1467467c996cSMel Gorman 
146848c96a36SJoonsoo Kim /*
146948c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
147048c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
147148c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
147248c96a36SJoonsoo Kim  * to determine what is going on
147348c96a36SJoonsoo Kim  */
147448c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
147548c96a36SJoonsoo Kim {
147648c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
147748c96a36SJoonsoo Kim 	int mtype;
147848c96a36SJoonsoo Kim 
14797dd80b8aSVlastimil Babka 	if (!static_branch_unlikely(&page_owner_inited))
148048c96a36SJoonsoo Kim 		return;
148148c96a36SJoonsoo Kim 
148248c96a36SJoonsoo Kim 	drain_all_pages(NULL);
148348c96a36SJoonsoo Kim 
148448c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
148548c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
148648c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
148748c96a36SJoonsoo Kim 	seq_putc(m, '\n');
148848c96a36SJoonsoo Kim 
1489727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, true,
1490727c080fSVinayak Menon 		pagetypeinfo_showmixedcount_print);
149148c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
149248c96a36SJoonsoo Kim }
149348c96a36SJoonsoo Kim 
1494467c996cSMel Gorman /*
1495467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1496467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1497467c996cSMel Gorman  */
1498467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1499467c996cSMel Gorman {
1500467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1501467c996cSMel Gorman 
150241b25a37SKOSAKI Motohiro 	/* check memoryless node */
1503a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
150441b25a37SKOSAKI Motohiro 		return 0;
150541b25a37SKOSAKI Motohiro 
1506467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1507467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1508467c996cSMel Gorman 	seq_putc(m, '\n');
1509467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1510467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
151148c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1512467c996cSMel Gorman 
1513f6ac2354SChristoph Lameter 	return 0;
1514f6ac2354SChristoph Lameter }
1515f6ac2354SChristoph Lameter 
15168f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1517f6ac2354SChristoph Lameter 	.start	= frag_start,
1518f6ac2354SChristoph Lameter 	.next	= frag_next,
1519f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1520f6ac2354SChristoph Lameter 	.show	= frag_show,
1521f6ac2354SChristoph Lameter };
1522f6ac2354SChristoph Lameter 
152374e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1524467c996cSMel Gorman 	.start	= frag_start,
1525467c996cSMel Gorman 	.next	= frag_next,
1526467c996cSMel Gorman 	.stop	= frag_stop,
1527467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1528467c996cSMel Gorman };
1529467c996cSMel Gorman 
1530e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1531e2ecc8a7SMel Gorman {
1532e2ecc8a7SMel Gorman 	int zid;
1533e2ecc8a7SMel Gorman 
1534e2ecc8a7SMel Gorman 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1535e2ecc8a7SMel Gorman 		struct zone *compare = &pgdat->node_zones[zid];
1536e2ecc8a7SMel Gorman 
1537e2ecc8a7SMel Gorman 		if (populated_zone(compare))
1538e2ecc8a7SMel Gorman 			return zone == compare;
1539e2ecc8a7SMel Gorman 	}
1540e2ecc8a7SMel Gorman 
1541e2ecc8a7SMel Gorman 	return false;
1542e2ecc8a7SMel Gorman }
1543e2ecc8a7SMel Gorman 
1544467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1545467c996cSMel Gorman 							struct zone *zone)
1546f6ac2354SChristoph Lameter {
1547f6ac2354SChristoph Lameter 	int i;
1548f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1549e2ecc8a7SMel Gorman 	if (is_zone_first_populated(pgdat, zone)) {
1550e2ecc8a7SMel Gorman 		seq_printf(m, "\n  per-node stats");
1551e2ecc8a7SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1552e2ecc8a7SMel Gorman 			seq_printf(m, "\n      %-12s %lu",
15533a321d2aSKemi Wang 				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
15543a321d2aSKemi Wang 				NR_VM_NUMA_STAT_ITEMS],
1555e2ecc8a7SMel Gorman 				node_page_state(pgdat, i));
1556e2ecc8a7SMel Gorman 		}
1557e2ecc8a7SMel Gorman 	}
1558f6ac2354SChristoph Lameter 	seq_printf(m,
1559f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1560f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1561f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1562f6ac2354SChristoph Lameter 		   "\n        high     %lu"
1563f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
15649feedc9dSJiang Liu 		   "\n        present  %lu"
15659feedc9dSJiang Liu 		   "\n        managed  %lu",
156688f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
156741858966SMel Gorman 		   min_wmark_pages(zone),
156841858966SMel Gorman 		   low_wmark_pages(zone),
156941858966SMel Gorman 		   high_wmark_pages(zone),
1570f6ac2354SChristoph Lameter 		   zone->spanned_pages,
15719feedc9dSJiang Liu 		   zone->present_pages,
1572*9705bea5SArun KS 		   zone_managed_pages(zone));
15732244b95aSChristoph Lameter 
1574f6ac2354SChristoph Lameter 	seq_printf(m,
15753484b2deSMel Gorman 		   "\n        protection: (%ld",
1576f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1577f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
15783484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
15797dfb8bf3SDavid Rientjes 	seq_putc(m, ')');
15807dfb8bf3SDavid Rientjes 
15817dfb8bf3SDavid Rientjes 	/* If unpopulated, no other information is useful */
15827dfb8bf3SDavid Rientjes 	if (!populated_zone(zone)) {
15837dfb8bf3SDavid Rientjes 		seq_putc(m, '\n');
15847dfb8bf3SDavid Rientjes 		return;
15857dfb8bf3SDavid Rientjes 	}
15867dfb8bf3SDavid Rientjes 
15877dfb8bf3SDavid Rientjes 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
15887dfb8bf3SDavid Rientjes 		seq_printf(m, "\n      %-12s %lu", vmstat_text[i],
15897dfb8bf3SDavid Rientjes 				zone_page_state(zone, i));
15907dfb8bf3SDavid Rientjes 
15913a321d2aSKemi Wang #ifdef CONFIG_NUMA
15923a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
15933a321d2aSKemi Wang 		seq_printf(m, "\n      %-12s %lu",
15943a321d2aSKemi Wang 				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
159563803222SKemi Wang 				zone_numa_state_snapshot(zone, i));
15963a321d2aSKemi Wang #endif
15973a321d2aSKemi Wang 
15987dfb8bf3SDavid Rientjes 	seq_printf(m, "\n  pagesets");
1599f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
1600f6ac2354SChristoph Lameter 		struct per_cpu_pageset *pageset;
1601f6ac2354SChristoph Lameter 
160299dcc3e5SChristoph Lameter 		pageset = per_cpu_ptr(zone->pageset, i);
1603f6ac2354SChristoph Lameter 		seq_printf(m,
16043dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1605f6ac2354SChristoph Lameter 			   "\n              count: %i"
1606f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1607f6ac2354SChristoph Lameter 			   "\n              batch: %i",
16083dfa5721SChristoph Lameter 			   i,
16093dfa5721SChristoph Lameter 			   pageset->pcp.count,
16103dfa5721SChristoph Lameter 			   pageset->pcp.high,
16113dfa5721SChristoph Lameter 			   pageset->pcp.batch);
1612df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1613df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
1614df9ecabaSChristoph Lameter 				pageset->stat_threshold);
1615df9ecabaSChristoph Lameter #endif
1616f6ac2354SChristoph Lameter 	}
1617f6ac2354SChristoph Lameter 	seq_printf(m,
1618599d0c95SMel Gorman 		   "\n  node_unreclaimable:  %u"
16193a50d14dSAndrey Ryabinin 		   "\n  start_pfn:           %lu",
1620c73322d0SJohannes Weiner 		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
16213a50d14dSAndrey Ryabinin 		   zone->zone_start_pfn);
1622f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1623f6ac2354SChristoph Lameter }
1624467c996cSMel Gorman 
1625467c996cSMel Gorman /*
1626b2bd8598SDavid Rientjes  * Output information about zones in @pgdat.  All zones are printed regardless
1627b2bd8598SDavid Rientjes  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1628b2bd8598SDavid Rientjes  * set of all zones and userspace would not be aware of such zones if they are
1629b2bd8598SDavid Rientjes  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1630467c996cSMel Gorman  */
1631467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1632467c996cSMel Gorman {
1633467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1634727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1635f6ac2354SChristoph Lameter 	return 0;
1636f6ac2354SChristoph Lameter }
1637f6ac2354SChristoph Lameter 
16385c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1639f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1640f6ac2354SChristoph Lameter 			       * fragmentation. */
1641f6ac2354SChristoph Lameter 	.next	= frag_next,
1642f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1643f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1644f6ac2354SChristoph Lameter };
1645f6ac2354SChristoph Lameter 
164679da826aSMichael Rubin enum writeback_stat_item {
164779da826aSMichael Rubin 	NR_DIRTY_THRESHOLD,
164879da826aSMichael Rubin 	NR_DIRTY_BG_THRESHOLD,
164979da826aSMichael Rubin 	NR_VM_WRITEBACK_STAT_ITEMS,
165079da826aSMichael Rubin };
165179da826aSMichael Rubin 
1652f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1653f6ac2354SChristoph Lameter {
16542244b95aSChristoph Lameter 	unsigned long *v;
165579da826aSMichael Rubin 	int i, stat_items_size;
1656f6ac2354SChristoph Lameter 
1657f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1658f6ac2354SChristoph Lameter 		return NULL;
165979da826aSMichael Rubin 	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
16603a321d2aSKemi Wang 			  NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
166175ef7184SMel Gorman 			  NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
166279da826aSMichael Rubin 			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1663f6ac2354SChristoph Lameter 
1664f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
166579da826aSMichael Rubin 	stat_items_size += sizeof(struct vm_event_state);
1666f8891e5eSChristoph Lameter #endif
166779da826aSMichael Rubin 
1668f0ecf25aSJann Horn 	BUILD_BUG_ON(stat_items_size !=
1669f0ecf25aSJann Horn 		     ARRAY_SIZE(vmstat_text) * sizeof(unsigned long));
167079da826aSMichael Rubin 	v = kmalloc(stat_items_size, GFP_KERNEL);
16712244b95aSChristoph Lameter 	m->private = v;
16722244b95aSChristoph Lameter 	if (!v)
1673f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
16742244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1675c41f012aSMichal Hocko 		v[i] = global_zone_page_state(i);
167679da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
167779da826aSMichael Rubin 
16783a321d2aSKemi Wang #ifdef CONFIG_NUMA
16793a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
16803a321d2aSKemi Wang 		v[i] = global_numa_state(i);
16813a321d2aSKemi Wang 	v += NR_VM_NUMA_STAT_ITEMS;
16823a321d2aSKemi Wang #endif
16833a321d2aSKemi Wang 
168475ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
168575ef7184SMel Gorman 		v[i] = global_node_page_state(i);
168675ef7184SMel Gorman 	v += NR_VM_NODE_STAT_ITEMS;
168775ef7184SMel Gorman 
168879da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
168979da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
169079da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
169179da826aSMichael Rubin 
1692f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
169379da826aSMichael Rubin 	all_vm_events(v);
169479da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
169579da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1696f8891e5eSChristoph Lameter #endif
1697ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1698f6ac2354SChristoph Lameter }
1699f6ac2354SChristoph Lameter 
1700f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1701f6ac2354SChristoph Lameter {
1702f6ac2354SChristoph Lameter 	(*pos)++;
1703f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1704f6ac2354SChristoph Lameter 		return NULL;
1705f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1706f6ac2354SChristoph Lameter }
1707f6ac2354SChristoph Lameter 
1708f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1709f6ac2354SChristoph Lameter {
1710f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1711f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
171268ba0326SAlexey Dobriyan 
171368ba0326SAlexey Dobriyan 	seq_puts(m, vmstat_text[off]);
171475ba1d07SJoe Perches 	seq_put_decimal_ull(m, " ", *l);
171568ba0326SAlexey Dobriyan 	seq_putc(m, '\n');
1716f6ac2354SChristoph Lameter 	return 0;
1717f6ac2354SChristoph Lameter }
1718f6ac2354SChristoph Lameter 
1719f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1720f6ac2354SChristoph Lameter {
1721f6ac2354SChristoph Lameter 	kfree(m->private);
1722f6ac2354SChristoph Lameter 	m->private = NULL;
1723f6ac2354SChristoph Lameter }
1724f6ac2354SChristoph Lameter 
1725b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1726f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1727f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1728f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1729f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1730f6ac2354SChristoph Lameter };
1731f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1732f6ac2354SChristoph Lameter 
1733df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1734d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
173577461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1736d1187ed2SChristoph Lameter 
173752b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS
173852b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work)
173952b6f46bSHugh Dickins {
174052b6f46bSHugh Dickins 	refresh_cpu_vm_stats(true);
174152b6f46bSHugh Dickins }
174252b6f46bSHugh Dickins 
174352b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write,
174452b6f46bSHugh Dickins 		   void __user *buffer, size_t *lenp, loff_t *ppos)
174552b6f46bSHugh Dickins {
174652b6f46bSHugh Dickins 	long val;
174752b6f46bSHugh Dickins 	int err;
174852b6f46bSHugh Dickins 	int i;
174952b6f46bSHugh Dickins 
175052b6f46bSHugh Dickins 	/*
175152b6f46bSHugh Dickins 	 * The regular update, every sysctl_stat_interval, may come later
175252b6f46bSHugh Dickins 	 * than expected: leaving a significant amount in per_cpu buckets.
175352b6f46bSHugh Dickins 	 * This is particularly misleading when checking a quantity of HUGE
175452b6f46bSHugh Dickins 	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
175552b6f46bSHugh Dickins 	 * which can equally be echo'ed to or cat'ted from (by root),
175652b6f46bSHugh Dickins 	 * can be used to update the stats just before reading them.
175752b6f46bSHugh Dickins 	 *
1758c41f012aSMichal Hocko 	 * Oh, and since global_zone_page_state() etc. are so careful to hide
175952b6f46bSHugh Dickins 	 * transiently negative values, report an error here if any of
176052b6f46bSHugh Dickins 	 * the stats is negative, so we know to go looking for imbalance.
176152b6f46bSHugh Dickins 	 */
176252b6f46bSHugh Dickins 	err = schedule_on_each_cpu(refresh_vm_stats);
176352b6f46bSHugh Dickins 	if (err)
176452b6f46bSHugh Dickins 		return err;
176552b6f46bSHugh Dickins 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
176675ef7184SMel Gorman 		val = atomic_long_read(&vm_zone_stat[i]);
176752b6f46bSHugh Dickins 		if (val < 0) {
176852b6f46bSHugh Dickins 			pr_warn("%s: %s %ld\n",
176952b6f46bSHugh Dickins 				__func__, vmstat_text[i], val);
177052b6f46bSHugh Dickins 			err = -EINVAL;
177152b6f46bSHugh Dickins 		}
177252b6f46bSHugh Dickins 	}
17733a321d2aSKemi Wang #ifdef CONFIG_NUMA
17743a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
17753a321d2aSKemi Wang 		val = atomic_long_read(&vm_numa_stat[i]);
17763a321d2aSKemi Wang 		if (val < 0) {
17773a321d2aSKemi Wang 			pr_warn("%s: %s %ld\n",
17783a321d2aSKemi Wang 				__func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
17793a321d2aSKemi Wang 			err = -EINVAL;
17803a321d2aSKemi Wang 		}
17813a321d2aSKemi Wang 	}
17823a321d2aSKemi Wang #endif
178352b6f46bSHugh Dickins 	if (err)
178452b6f46bSHugh Dickins 		return err;
178552b6f46bSHugh Dickins 	if (write)
178652b6f46bSHugh Dickins 		*ppos += *lenp;
178752b6f46bSHugh Dickins 	else
178852b6f46bSHugh Dickins 		*lenp = 0;
178952b6f46bSHugh Dickins 	return 0;
179052b6f46bSHugh Dickins }
179152b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */
179252b6f46bSHugh Dickins 
1793d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1794d1187ed2SChristoph Lameter {
17950eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
17967cc36bbdSChristoph Lameter 		/*
17977cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
17987cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
17997cc36bbdSChristoph Lameter 		 * update worker thread.
18007cc36bbdSChristoph Lameter 		 */
1801ce612879SMichal Hocko 		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1802176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
180398f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1804f01f17d3SMichal Hocko 	}
1805d1187ed2SChristoph Lameter }
1806d1187ed2SChristoph Lameter 
18077cc36bbdSChristoph Lameter /*
18080eb77e98SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
18090eb77e98SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
18100eb77e98SChristoph Lameter  * invoked when tick processing is not active.
18110eb77e98SChristoph Lameter  */
18120eb77e98SChristoph Lameter /*
18137cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
18147cc36bbdSChristoph Lameter  * an update is needed.
18157cc36bbdSChristoph Lameter  */
18167cc36bbdSChristoph Lameter static bool need_update(int cpu)
1817d1187ed2SChristoph Lameter {
18187cc36bbdSChristoph Lameter 	struct zone *zone;
1819d1187ed2SChristoph Lameter 
18207cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
18217cc36bbdSChristoph Lameter 		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
18227cc36bbdSChristoph Lameter 
18237cc36bbdSChristoph Lameter 		BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
18243a321d2aSKemi Wang #ifdef CONFIG_NUMA
18251d90ca89SKemi Wang 		BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
18263a321d2aSKemi Wang #endif
182763803222SKemi Wang 
18287cc36bbdSChristoph Lameter 		/*
18297cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
18307cc36bbdSChristoph Lameter 		 */
183113c9aaf7SJanne Huttunen 		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
183213c9aaf7SJanne Huttunen 			       sizeof(p->vm_stat_diff[0])))
18337cc36bbdSChristoph Lameter 			return true;
18343a321d2aSKemi Wang #ifdef CONFIG_NUMA
183513c9aaf7SJanne Huttunen 		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
183613c9aaf7SJanne Huttunen 			       sizeof(p->vm_numa_stat_diff[0])))
18373a321d2aSKemi Wang 			return true;
18383a321d2aSKemi Wang #endif
18397cc36bbdSChristoph Lameter 	}
18407cc36bbdSChristoph Lameter 	return false;
18417cc36bbdSChristoph Lameter }
18427cc36bbdSChristoph Lameter 
18437b8da4c7SChristoph Lameter /*
18447b8da4c7SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
18457b8da4c7SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
18467b8da4c7SChristoph Lameter  * invoked when tick processing is not active.
18477b8da4c7SChristoph Lameter  */
1848f01f17d3SMichal Hocko void quiet_vmstat(void)
1849f01f17d3SMichal Hocko {
1850f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1851f01f17d3SMichal Hocko 		return;
1852f01f17d3SMichal Hocko 
18537b8da4c7SChristoph Lameter 	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1854f01f17d3SMichal Hocko 		return;
1855f01f17d3SMichal Hocko 
1856f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
1857f01f17d3SMichal Hocko 		return;
1858f01f17d3SMichal Hocko 
1859f01f17d3SMichal Hocko 	/*
1860f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
1861f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
1862f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
1863f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
1864f01f17d3SMichal Hocko 	 */
1865f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
1866f01f17d3SMichal Hocko }
1867f01f17d3SMichal Hocko 
18687cc36bbdSChristoph Lameter /*
18697cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
18707cc36bbdSChristoph Lameter  * differentials of processors that have their worker
18717cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
18727cc36bbdSChristoph Lameter  * inactivity.
18737cc36bbdSChristoph Lameter  */
18747cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
18757cc36bbdSChristoph Lameter 
18760eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
18777cc36bbdSChristoph Lameter 
18787cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
18797cc36bbdSChristoph Lameter {
18807cc36bbdSChristoph Lameter 	int cpu;
18817cc36bbdSChristoph Lameter 
18827cc36bbdSChristoph Lameter 	get_online_cpus();
18837cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
18847b8da4c7SChristoph Lameter 	for_each_online_cpu(cpu) {
1885f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
18867cc36bbdSChristoph Lameter 
18877b8da4c7SChristoph Lameter 		if (!delayed_work_pending(dw) && need_update(cpu))
1888ce612879SMichal Hocko 			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1889f01f17d3SMichal Hocko 	}
18907cc36bbdSChristoph Lameter 	put_online_cpus();
18917cc36bbdSChristoph Lameter 
18927cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
18937cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
18947cc36bbdSChristoph Lameter }
18957cc36bbdSChristoph Lameter 
18967cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
18977cc36bbdSChristoph Lameter {
18987cc36bbdSChristoph Lameter 	int cpu;
18997cc36bbdSChristoph Lameter 
19007cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
1901ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
19027cc36bbdSChristoph Lameter 			vmstat_update);
19037cc36bbdSChristoph Lameter 
19047cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
19057cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
1906d1187ed2SChristoph Lameter }
1907d1187ed2SChristoph Lameter 
190803e86dbaSTim Chen static void __init init_cpu_node_state(void)
190903e86dbaSTim Chen {
19104c501327SSebastian Andrzej Siewior 	int node;
191103e86dbaSTim Chen 
19124c501327SSebastian Andrzej Siewior 	for_each_online_node(node) {
19134c501327SSebastian Andrzej Siewior 		if (cpumask_weight(cpumask_of_node(node)) > 0)
19144c501327SSebastian Andrzej Siewior 			node_set_state(node, N_CPU);
19154c501327SSebastian Andrzej Siewior 	}
191603e86dbaSTim Chen }
191703e86dbaSTim Chen 
19185438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu)
1919807a1bd2SToshi Kani {
19205ee28a44SKAMEZAWA Hiroyuki 	refresh_zone_stat_thresholds();
1921ad596925SChristoph Lameter 	node_set_state(cpu_to_node(cpu), N_CPU);
19225438da97SSebastian Andrzej Siewior 	return 0;
1923df9ecabaSChristoph Lameter }
1924df9ecabaSChristoph Lameter 
19255438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu)
19265438da97SSebastian Andrzej Siewior {
19275438da97SSebastian Andrzej Siewior 	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
19285438da97SSebastian Andrzej Siewior 	return 0;
19295438da97SSebastian Andrzej Siewior }
19305438da97SSebastian Andrzej Siewior 
19315438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu)
19325438da97SSebastian Andrzej Siewior {
19335438da97SSebastian Andrzej Siewior 	const struct cpumask *node_cpus;
19345438da97SSebastian Andrzej Siewior 	int node;
19355438da97SSebastian Andrzej Siewior 
19365438da97SSebastian Andrzej Siewior 	node = cpu_to_node(cpu);
19375438da97SSebastian Andrzej Siewior 
19385438da97SSebastian Andrzej Siewior 	refresh_zone_stat_thresholds();
19395438da97SSebastian Andrzej Siewior 	node_cpus = cpumask_of_node(node);
19405438da97SSebastian Andrzej Siewior 	if (cpumask_weight(node_cpus) > 0)
19415438da97SSebastian Andrzej Siewior 		return 0;
19425438da97SSebastian Andrzej Siewior 
19435438da97SSebastian Andrzej Siewior 	node_clear_state(node, N_CPU);
19445438da97SSebastian Andrzej Siewior 	return 0;
19455438da97SSebastian Andrzej Siewior }
19465438da97SSebastian Andrzej Siewior 
19478f32f7e5SAlexey Dobriyan #endif
1948df9ecabaSChristoph Lameter 
1949ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq;
1950ce612879SMichal Hocko 
1951597b7305SMichal Hocko void __init init_mm_internals(void)
1952df9ecabaSChristoph Lameter {
1953ce612879SMichal Hocko 	int ret __maybe_unused;
19545438da97SSebastian Andrzej Siewior 
195580d136e1SMichal Hocko 	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
1956ce612879SMichal Hocko 
1957ce612879SMichal Hocko #ifdef CONFIG_SMP
19585438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
19595438da97SSebastian Andrzej Siewior 					NULL, vmstat_cpu_dead);
19605438da97SSebastian Andrzej Siewior 	if (ret < 0)
19615438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'dead' hotplug state\n");
19625438da97SSebastian Andrzej Siewior 
19635438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
19645438da97SSebastian Andrzej Siewior 					vmstat_cpu_online,
19655438da97SSebastian Andrzej Siewior 					vmstat_cpu_down_prep);
19665438da97SSebastian Andrzej Siewior 	if (ret < 0)
19675438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'online' hotplug state\n");
19685438da97SSebastian Andrzej Siewior 
19695438da97SSebastian Andrzej Siewior 	get_online_cpus();
197003e86dbaSTim Chen 	init_cpu_node_state();
19715438da97SSebastian Andrzej Siewior 	put_online_cpus();
1972d1187ed2SChristoph Lameter 
19737cc36bbdSChristoph Lameter 	start_shepherd_timer();
19748f32f7e5SAlexey Dobriyan #endif
19758f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
1976fddda2b7SChristoph Hellwig 	proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
1977fddda2b7SChristoph Hellwig 	proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
1978fddda2b7SChristoph Hellwig 	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
1979fddda2b7SChristoph Hellwig 	proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
19808f32f7e5SAlexey Dobriyan #endif
1981df9ecabaSChristoph Lameter }
1982d7a5752cSMel Gorman 
1983d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1984d7a5752cSMel Gorman 
1985d7a5752cSMel Gorman /*
1986d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
1987d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
1988d7a5752cSMel Gorman  */
1989d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
1990d7a5752cSMel Gorman 				struct contig_page_info *info)
1991d7a5752cSMel Gorman {
1992d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
1993d7a5752cSMel Gorman 	if (info->free_pages == 0)
1994d7a5752cSMel Gorman 		return 1000;
1995d7a5752cSMel Gorman 
1996d7a5752cSMel Gorman 	/*
1997d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
1998d7a5752cSMel Gorman 	 * decimal places.
1999d7a5752cSMel Gorman 	 *
2000d7a5752cSMel Gorman 	 * 0 => no fragmentation
2001d7a5752cSMel Gorman 	 * 1 => high fragmentation
2002d7a5752cSMel Gorman 	 */
2003d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2004d7a5752cSMel Gorman 
2005d7a5752cSMel Gorman }
2006d7a5752cSMel Gorman 
2007d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
2008d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2009d7a5752cSMel Gorman {
2010d7a5752cSMel Gorman 	unsigned int order;
2011d7a5752cSMel Gorman 	int index;
2012d7a5752cSMel Gorman 	struct contig_page_info info;
2013d7a5752cSMel Gorman 
2014d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2015d7a5752cSMel Gorman 				pgdat->node_id,
2016d7a5752cSMel Gorman 				zone->name);
2017d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2018d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
2019d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
2020d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2021d7a5752cSMel Gorman 	}
2022d7a5752cSMel Gorman 
2023d7a5752cSMel Gorman 	seq_putc(m, '\n');
2024d7a5752cSMel Gorman }
2025d7a5752cSMel Gorman 
2026d7a5752cSMel Gorman /*
2027d7a5752cSMel Gorman  * Display unusable free space index
2028d7a5752cSMel Gorman  *
2029d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
2030d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
2031d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
2032d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
2033d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
2034d7a5752cSMel Gorman  */
2035d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
2036d7a5752cSMel Gorman {
2037d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2038d7a5752cSMel Gorman 
2039d7a5752cSMel Gorman 	/* check memoryless node */
2040a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
2041d7a5752cSMel Gorman 		return 0;
2042d7a5752cSMel Gorman 
2043727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2044d7a5752cSMel Gorman 
2045d7a5752cSMel Gorman 	return 0;
2046d7a5752cSMel Gorman }
2047d7a5752cSMel Gorman 
2048d7a5752cSMel Gorman static const struct seq_operations unusable_op = {
2049d7a5752cSMel Gorman 	.start	= frag_start,
2050d7a5752cSMel Gorman 	.next	= frag_next,
2051d7a5752cSMel Gorman 	.stop	= frag_stop,
2052d7a5752cSMel Gorman 	.show	= unusable_show,
2053d7a5752cSMel Gorman };
2054d7a5752cSMel Gorman 
2055d7a5752cSMel Gorman static int unusable_open(struct inode *inode, struct file *file)
2056d7a5752cSMel Gorman {
2057d7a5752cSMel Gorman 	return seq_open(file, &unusable_op);
2058d7a5752cSMel Gorman }
2059d7a5752cSMel Gorman 
2060d7a5752cSMel Gorman static const struct file_operations unusable_file_ops = {
2061d7a5752cSMel Gorman 	.open		= unusable_open,
2062d7a5752cSMel Gorman 	.read		= seq_read,
2063d7a5752cSMel Gorman 	.llseek		= seq_lseek,
2064d7a5752cSMel Gorman 	.release	= seq_release,
2065d7a5752cSMel Gorman };
2066d7a5752cSMel Gorman 
2067f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
2068f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2069f1a5ab12SMel Gorman {
2070f1a5ab12SMel Gorman 	unsigned int order;
2071f1a5ab12SMel Gorman 	int index;
2072f1a5ab12SMel Gorman 
2073f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
2074f1a5ab12SMel Gorman 	struct contig_page_info info;
2075f1a5ab12SMel Gorman 
2076f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2077f1a5ab12SMel Gorman 				pgdat->node_id,
2078f1a5ab12SMel Gorman 				zone->name);
2079f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2080f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
208156de7263SMel Gorman 		index = __fragmentation_index(order, &info);
2082f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2083f1a5ab12SMel Gorman 	}
2084f1a5ab12SMel Gorman 
2085f1a5ab12SMel Gorman 	seq_putc(m, '\n');
2086f1a5ab12SMel Gorman }
2087f1a5ab12SMel Gorman 
2088f1a5ab12SMel Gorman /*
2089f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
2090f1a5ab12SMel Gorman  */
2091f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
2092f1a5ab12SMel Gorman {
2093f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2094f1a5ab12SMel Gorman 
2095727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2096f1a5ab12SMel Gorman 
2097f1a5ab12SMel Gorman 	return 0;
2098f1a5ab12SMel Gorman }
2099f1a5ab12SMel Gorman 
2100f1a5ab12SMel Gorman static const struct seq_operations extfrag_op = {
2101f1a5ab12SMel Gorman 	.start	= frag_start,
2102f1a5ab12SMel Gorman 	.next	= frag_next,
2103f1a5ab12SMel Gorman 	.stop	= frag_stop,
2104f1a5ab12SMel Gorman 	.show	= extfrag_show,
2105f1a5ab12SMel Gorman };
2106f1a5ab12SMel Gorman 
2107f1a5ab12SMel Gorman static int extfrag_open(struct inode *inode, struct file *file)
2108f1a5ab12SMel Gorman {
2109f1a5ab12SMel Gorman 	return seq_open(file, &extfrag_op);
2110f1a5ab12SMel Gorman }
2111f1a5ab12SMel Gorman 
2112f1a5ab12SMel Gorman static const struct file_operations extfrag_file_ops = {
2113f1a5ab12SMel Gorman 	.open		= extfrag_open,
2114f1a5ab12SMel Gorman 	.read		= seq_read,
2115f1a5ab12SMel Gorman 	.llseek		= seq_lseek,
2116f1a5ab12SMel Gorman 	.release	= seq_release,
2117f1a5ab12SMel Gorman };
2118f1a5ab12SMel Gorman 
2119d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
2120d7a5752cSMel Gorman {
2121bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
2122bde8bd8aSSasikantha babu 
2123d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2124d7a5752cSMel Gorman 	if (!extfrag_debug_root)
2125d7a5752cSMel Gorman 		return -ENOMEM;
2126d7a5752cSMel Gorman 
2127d7a5752cSMel Gorman 	if (!debugfs_create_file("unusable_index", 0444,
2128d7a5752cSMel Gorman 			extfrag_debug_root, NULL, &unusable_file_ops))
2129bde8bd8aSSasikantha babu 		goto fail;
2130d7a5752cSMel Gorman 
2131f1a5ab12SMel Gorman 	if (!debugfs_create_file("extfrag_index", 0444,
2132f1a5ab12SMel Gorman 			extfrag_debug_root, NULL, &extfrag_file_ops))
2133bde8bd8aSSasikantha babu 		goto fail;
2134f1a5ab12SMel Gorman 
2135d7a5752cSMel Gorman 	return 0;
2136bde8bd8aSSasikantha babu fail:
2137bde8bd8aSSasikantha babu 	debugfs_remove_recursive(extfrag_debug_root);
2138bde8bd8aSSasikantha babu 	return -ENOMEM;
2139d7a5752cSMel Gorman }
2140d7a5752cSMel Gorman 
2141d7a5752cSMel Gorman module_init(extfrag_debug_init);
2142d7a5752cSMel Gorman #endif
2143