xref: /linux/mm/vmstat.c (revision cf79f291f985662150363b4a93d16f88f12643bc)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2f6ac2354SChristoph Lameter /*
3f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
4f6ac2354SChristoph Lameter  *
5f6ac2354SChristoph Lameter  *  Manages VM statistics
6f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
72244b95aSChristoph Lameter  *
82244b95aSChristoph Lameter  *  zoned VM statistics
92244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
102244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
117cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
12f6ac2354SChristoph Lameter  */
138f32f7e5SAlexey Dobriyan #include <linux/fs.h>
14f6ac2354SChristoph Lameter #include <linux/mm.h>
154e950f6fSAlexey Dobriyan #include <linux/err.h>
162244b95aSChristoph Lameter #include <linux/module.h>
175a0e3ad6STejun Heo #include <linux/slab.h>
18df9ecabaSChristoph Lameter #include <linux/cpu.h>
197cc36bbdSChristoph Lameter #include <linux/cpumask.h>
20c748e134SAdrian Bunk #include <linux/vmstat.h>
213c486871SAndrew Morton #include <linux/proc_fs.h>
223c486871SAndrew Morton #include <linux/seq_file.h>
233c486871SAndrew Morton #include <linux/debugfs.h>
24e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
25f1a5ab12SMel Gorman #include <linux/math64.h>
2679da826aSMichael Rubin #include <linux/writeback.h>
2736deb0beSNamhyung Kim #include <linux/compaction.h>
286e543d57SLisa Du #include <linux/mm_inline.h>
2948c96a36SJoonsoo Kim #include <linux/page_owner.h>
30be5e015dSMarcelo Tosatti #include <linux/sched/isolation.h>
316e543d57SLisa Du 
326e543d57SLisa Du #include "internal.h"
33f6ac2354SChristoph Lameter 
344518085eSKemi Wang #ifdef CONFIG_NUMA
354518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
364518085eSKemi Wang 
374518085eSKemi Wang /* zero numa counters within a zone */
zero_zone_numa_counters(struct zone * zone)384518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone)
394518085eSKemi Wang {
404518085eSKemi Wang 	int item, cpu;
414518085eSKemi Wang 
42f19298b9SMel Gorman 	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
43f19298b9SMel Gorman 		atomic_long_set(&zone->vm_numa_event[item], 0);
44f19298b9SMel Gorman 		for_each_online_cpu(cpu) {
45f19298b9SMel Gorman 			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
464518085eSKemi Wang 						= 0;
474518085eSKemi Wang 		}
484518085eSKemi Wang 	}
49f19298b9SMel Gorman }
504518085eSKemi Wang 
514518085eSKemi Wang /* zero numa counters of all the populated zones */
zero_zones_numa_counters(void)524518085eSKemi Wang static void zero_zones_numa_counters(void)
534518085eSKemi Wang {
544518085eSKemi Wang 	struct zone *zone;
554518085eSKemi Wang 
564518085eSKemi Wang 	for_each_populated_zone(zone)
574518085eSKemi Wang 		zero_zone_numa_counters(zone);
584518085eSKemi Wang }
594518085eSKemi Wang 
604518085eSKemi Wang /* zero global numa counters */
zero_global_numa_counters(void)614518085eSKemi Wang static void zero_global_numa_counters(void)
624518085eSKemi Wang {
634518085eSKemi Wang 	int item;
644518085eSKemi Wang 
65f19298b9SMel Gorman 	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
66f19298b9SMel Gorman 		atomic_long_set(&vm_numa_event[item], 0);
674518085eSKemi Wang }
684518085eSKemi Wang 
invalid_numa_statistics(void)694518085eSKemi Wang static void invalid_numa_statistics(void)
704518085eSKemi Wang {
714518085eSKemi Wang 	zero_zones_numa_counters();
724518085eSKemi Wang 	zero_global_numa_counters();
734518085eSKemi Wang }
744518085eSKemi Wang 
754518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock);
764518085eSKemi Wang 
sysctl_vm_numa_stat_handler(struct ctl_table * table,int write,void * buffer,size_t * length,loff_t * ppos)774518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
7832927393SChristoph Hellwig 		void *buffer, size_t *length, loff_t *ppos)
794518085eSKemi Wang {
804518085eSKemi Wang 	int ret, oldval;
814518085eSKemi Wang 
824518085eSKemi Wang 	mutex_lock(&vm_numa_stat_lock);
834518085eSKemi Wang 	if (write)
844518085eSKemi Wang 		oldval = sysctl_vm_numa_stat;
854518085eSKemi Wang 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
864518085eSKemi Wang 	if (ret || !write)
874518085eSKemi Wang 		goto out;
884518085eSKemi Wang 
894518085eSKemi Wang 	if (oldval == sysctl_vm_numa_stat)
904518085eSKemi Wang 		goto out;
914518085eSKemi Wang 	else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
924518085eSKemi Wang 		static_branch_enable(&vm_numa_stat_key);
934518085eSKemi Wang 		pr_info("enable numa statistics\n");
944518085eSKemi Wang 	} else {
954518085eSKemi Wang 		static_branch_disable(&vm_numa_stat_key);
964518085eSKemi Wang 		invalid_numa_statistics();
974518085eSKemi Wang 		pr_info("disable numa statistics, and clear numa counters\n");
984518085eSKemi Wang 	}
994518085eSKemi Wang 
1004518085eSKemi Wang out:
1014518085eSKemi Wang 	mutex_unlock(&vm_numa_stat_lock);
1024518085eSKemi Wang 	return ret;
1034518085eSKemi Wang }
1044518085eSKemi Wang #endif
1054518085eSKemi Wang 
106f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
107f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
109f8891e5eSChristoph Lameter 
sum_vm_events(unsigned long * ret)11031f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
111f8891e5eSChristoph Lameter {
1129eccf2a8SChristoph Lameter 	int cpu;
113f8891e5eSChristoph Lameter 	int i;
114f8891e5eSChristoph Lameter 
115f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116f8891e5eSChristoph Lameter 
11731f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
118f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119f8891e5eSChristoph Lameter 
120f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
122f8891e5eSChristoph Lameter 	}
123f8891e5eSChristoph Lameter }
124f8891e5eSChristoph Lameter 
125f8891e5eSChristoph Lameter /*
126f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
127f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
128f8891e5eSChristoph Lameter  * during and after execution of this function.
129f8891e5eSChristoph Lameter */
all_vm_events(unsigned long * ret)130f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
131f8891e5eSChristoph Lameter {
1327625eccdSSebastian Andrzej Siewior 	cpus_read_lock();
13331f961a8SMinchan Kim 	sum_vm_events(ret);
1347625eccdSSebastian Andrzej Siewior 	cpus_read_unlock();
135f8891e5eSChristoph Lameter }
13632dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
137f8891e5eSChristoph Lameter 
138f8891e5eSChristoph Lameter /*
139f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
140f8891e5eSChristoph Lameter  *
141f8891e5eSChristoph Lameter  * This is adding to the events on one processor
142f8891e5eSChristoph Lameter  * but keeps the global counts constant.
143f8891e5eSChristoph Lameter  */
vm_events_fold_cpu(int cpu)144f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
145f8891e5eSChristoph Lameter {
146f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147f8891e5eSChristoph Lameter 	int i;
148f8891e5eSChristoph Lameter 
149f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
151f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
152f8891e5eSChristoph Lameter 	}
153f8891e5eSChristoph Lameter }
154f8891e5eSChristoph Lameter 
155f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
156f8891e5eSChristoph Lameter 
1572244b95aSChristoph Lameter /*
1582244b95aSChristoph Lameter  * Manage combined zone based / global counters
1592244b95aSChristoph Lameter  *
1602244b95aSChristoph Lameter  * vm_stat contains the global counters
1612244b95aSChristoph Lameter  */
16275ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
16375ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
164f19298b9SMel Gorman atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
16575ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat);
16675ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat);
1672244b95aSChristoph Lameter 
168ebeac3eaSGeert Uytterhoeven #ifdef CONFIG_NUMA
fold_vm_zone_numa_events(struct zone * zone)169ebeac3eaSGeert Uytterhoeven static void fold_vm_zone_numa_events(struct zone *zone)
170ebeac3eaSGeert Uytterhoeven {
171ebeac3eaSGeert Uytterhoeven 	unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
172ebeac3eaSGeert Uytterhoeven 	int cpu;
173ebeac3eaSGeert Uytterhoeven 	enum numa_stat_item item;
174ebeac3eaSGeert Uytterhoeven 
175ebeac3eaSGeert Uytterhoeven 	for_each_online_cpu(cpu) {
176ebeac3eaSGeert Uytterhoeven 		struct per_cpu_zonestat *pzstats;
177ebeac3eaSGeert Uytterhoeven 
178ebeac3eaSGeert Uytterhoeven 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
179ebeac3eaSGeert Uytterhoeven 		for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
180ebeac3eaSGeert Uytterhoeven 			zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
181ebeac3eaSGeert Uytterhoeven 	}
182ebeac3eaSGeert Uytterhoeven 
183ebeac3eaSGeert Uytterhoeven 	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
184ebeac3eaSGeert Uytterhoeven 		zone_numa_event_add(zone_numa_events[item], zone, item);
185ebeac3eaSGeert Uytterhoeven }
186ebeac3eaSGeert Uytterhoeven 
fold_vm_numa_events(void)187ebeac3eaSGeert Uytterhoeven void fold_vm_numa_events(void)
188ebeac3eaSGeert Uytterhoeven {
189ebeac3eaSGeert Uytterhoeven 	struct zone *zone;
190ebeac3eaSGeert Uytterhoeven 
191ebeac3eaSGeert Uytterhoeven 	for_each_populated_zone(zone)
192ebeac3eaSGeert Uytterhoeven 		fold_vm_zone_numa_events(zone);
193ebeac3eaSGeert Uytterhoeven }
194ebeac3eaSGeert Uytterhoeven #endif
195ebeac3eaSGeert Uytterhoeven 
1962244b95aSChristoph Lameter #ifdef CONFIG_SMP
1972244b95aSChristoph Lameter 
calculate_pressure_threshold(struct zone * zone)198b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
19988f5acf8SMel Gorman {
20088f5acf8SMel Gorman 	int threshold;
20188f5acf8SMel Gorman 	int watermark_distance;
20288f5acf8SMel Gorman 
20388f5acf8SMel Gorman 	/*
20488f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
20588f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
20688f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
20788f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
20888f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
20988f5acf8SMel Gorman 	 * the min watermark
21088f5acf8SMel Gorman 	 */
21188f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
21288f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
21388f5acf8SMel Gorman 
21488f5acf8SMel Gorman 	/*
21588f5acf8SMel Gorman 	 * Maximum threshold is 125
21688f5acf8SMel Gorman 	 */
21788f5acf8SMel Gorman 	threshold = min(125, threshold);
21888f5acf8SMel Gorman 
21988f5acf8SMel Gorman 	return threshold;
22088f5acf8SMel Gorman }
22188f5acf8SMel Gorman 
calculate_normal_threshold(struct zone * zone)222b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
223df9ecabaSChristoph Lameter {
224df9ecabaSChristoph Lameter 	int threshold;
225df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
2262244b95aSChristoph Lameter 
2272244b95aSChristoph Lameter 	/*
228df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
229df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
230df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
231df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
2322244b95aSChristoph Lameter 	 *
233df9ecabaSChristoph Lameter 	 * Some sample thresholds:
234df9ecabaSChristoph Lameter 	 *
235ea15ba17SMiaohe Lin 	 * Threshold	Processors	(fls)	Zonesize	fls(mem)+1
236df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
237df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
238df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
239df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
240df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
241df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
242df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
243df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
244df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
245df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
246df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
247df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
248df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
249df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
250df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
251df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
252df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
253df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
254df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
2552244b95aSChristoph Lameter 	 */
256df9ecabaSChristoph Lameter 
2579705bea5SArun KS 	mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
258df9ecabaSChristoph Lameter 
259df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
260df9ecabaSChristoph Lameter 
261df9ecabaSChristoph Lameter 	/*
262df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
263df9ecabaSChristoph Lameter 	 */
264df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
265df9ecabaSChristoph Lameter 
266df9ecabaSChristoph Lameter 	return threshold;
267df9ecabaSChristoph Lameter }
268df9ecabaSChristoph Lameter 
269df9ecabaSChristoph Lameter /*
270df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
271df9ecabaSChristoph Lameter  */
refresh_zone_stat_thresholds(void)272a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
2732244b95aSChristoph Lameter {
27475ef7184SMel Gorman 	struct pglist_data *pgdat;
275df9ecabaSChristoph Lameter 	struct zone *zone;
276df9ecabaSChristoph Lameter 	int cpu;
277df9ecabaSChristoph Lameter 	int threshold;
278df9ecabaSChristoph Lameter 
27975ef7184SMel Gorman 	/* Zero current pgdat thresholds */
28075ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
28175ef7184SMel Gorman 		for_each_online_cpu(cpu) {
28275ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
28375ef7184SMel Gorman 		}
28475ef7184SMel Gorman 	}
28575ef7184SMel Gorman 
286ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
28775ef7184SMel Gorman 		struct pglist_data *pgdat = zone->zone_pgdat;
288aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
289aa454840SChristoph Lameter 
290b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
291df9ecabaSChristoph Lameter 
29275ef7184SMel Gorman 		for_each_online_cpu(cpu) {
29375ef7184SMel Gorman 			int pgdat_threshold;
29475ef7184SMel Gorman 
29528f836b6SMel Gorman 			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
29699dcc3e5SChristoph Lameter 							= threshold;
2971d90ca89SKemi Wang 
29875ef7184SMel Gorman 			/* Base nodestat threshold on the largest populated zone. */
29975ef7184SMel Gorman 			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
30075ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
30175ef7184SMel Gorman 				= max(threshold, pgdat_threshold);
30275ef7184SMel Gorman 		}
30375ef7184SMel Gorman 
304aa454840SChristoph Lameter 		/*
305aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
306aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
307aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
308aa454840SChristoph Lameter 		 */
309aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
310aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
311aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
312aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
313aa454840SChristoph Lameter 					max_drift;
314df9ecabaSChristoph Lameter 	}
3152244b95aSChristoph Lameter }
3162244b95aSChristoph Lameter 
set_pgdat_percpu_threshold(pg_data_t * pgdat,int (* calculate_pressure)(struct zone *))317b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
318b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
31988f5acf8SMel Gorman {
32088f5acf8SMel Gorman 	struct zone *zone;
32188f5acf8SMel Gorman 	int cpu;
32288f5acf8SMel Gorman 	int threshold;
32388f5acf8SMel Gorman 	int i;
32488f5acf8SMel Gorman 
32588f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
32688f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
32788f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
32888f5acf8SMel Gorman 			continue;
32988f5acf8SMel Gorman 
330b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
3311d90ca89SKemi Wang 		for_each_online_cpu(cpu)
33228f836b6SMel Gorman 			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
33388f5acf8SMel Gorman 							= threshold;
33488f5acf8SMel Gorman 	}
33588f5acf8SMel Gorman }
33688f5acf8SMel Gorman 
3372244b95aSChristoph Lameter /*
338bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
339bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
340bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
3412244b95aSChristoph Lameter  */
__mod_zone_page_state(struct zone * zone,enum zone_stat_item item,long delta)3422244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3436cdb18adSHeiko Carstens 			   long delta)
3442244b95aSChristoph Lameter {
34528f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
34612938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
3472244b95aSChristoph Lameter 	long x;
34812938a92SChristoph Lameter 	long t;
3492244b95aSChristoph Lameter 
350c68ed794SIngo Molnar 	/*
351c68ed794SIngo Molnar 	 * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
352c68ed794SIngo Molnar 	 * atomicity is provided by IRQs being disabled -- either explicitly
353c68ed794SIngo Molnar 	 * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
354c68ed794SIngo Molnar 	 * CPU migrations and preemption potentially corrupts a counter so
355c68ed794SIngo Molnar 	 * disable preemption.
356c68ed794SIngo Molnar 	 */
3577a025e91SThomas Gleixner 	preempt_disable_nested();
358c68ed794SIngo Molnar 
35912938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
3602244b95aSChristoph Lameter 
36112938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
36212938a92SChristoph Lameter 
36340610076SMiaohe Lin 	if (unlikely(abs(x) > t)) {
3642244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
3652244b95aSChristoph Lameter 		x = 0;
3662244b95aSChristoph Lameter 	}
36712938a92SChristoph Lameter 	__this_cpu_write(*p, x);
368c68ed794SIngo Molnar 
3697a025e91SThomas Gleixner 	preempt_enable_nested();
3702244b95aSChristoph Lameter }
3712244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
3722244b95aSChristoph Lameter 
__mod_node_page_state(struct pglist_data * pgdat,enum node_stat_item item,long delta)37375ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
37475ef7184SMel Gorman 				long delta)
37575ef7184SMel Gorman {
37675ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
37775ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
37875ef7184SMel Gorman 	long x;
37975ef7184SMel Gorman 	long t;
38075ef7184SMel Gorman 
381ea426c2aSRoman Gushchin 	if (vmstat_item_in_bytes(item)) {
382629484aeSJohannes Weiner 		/*
383629484aeSJohannes Weiner 		 * Only cgroups use subpage accounting right now; at
384629484aeSJohannes Weiner 		 * the global level, these items still change in
385629484aeSJohannes Weiner 		 * multiples of whole pages. Store them as pages
386629484aeSJohannes Weiner 		 * internally to keep the per-cpu counters compact.
387629484aeSJohannes Weiner 		 */
388ea426c2aSRoman Gushchin 		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
389ea426c2aSRoman Gushchin 		delta >>= PAGE_SHIFT;
390ea426c2aSRoman Gushchin 	}
391ea426c2aSRoman Gushchin 
392c68ed794SIngo Molnar 	/* See __mod_node_page_state */
3937a025e91SThomas Gleixner 	preempt_disable_nested();
394c68ed794SIngo Molnar 
39575ef7184SMel Gorman 	x = delta + __this_cpu_read(*p);
39675ef7184SMel Gorman 
39775ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
39875ef7184SMel Gorman 
39940610076SMiaohe Lin 	if (unlikely(abs(x) > t)) {
40075ef7184SMel Gorman 		node_page_state_add(x, pgdat, item);
40175ef7184SMel Gorman 		x = 0;
40275ef7184SMel Gorman 	}
40375ef7184SMel Gorman 	__this_cpu_write(*p, x);
404c68ed794SIngo Molnar 
4057a025e91SThomas Gleixner 	preempt_enable_nested();
40675ef7184SMel Gorman }
40775ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state);
40875ef7184SMel Gorman 
4092244b95aSChristoph Lameter /*
4102244b95aSChristoph Lameter  * Optimized increment and decrement functions.
4112244b95aSChristoph Lameter  *
4122244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
4132244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
4142244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
4152244b95aSChristoph Lameter  *
4162244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
4172244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
4182244b95aSChristoph Lameter  * generate better code.
4192244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
4202244b95aSChristoph Lameter  * be omitted.
4212244b95aSChristoph Lameter  *
422df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
423df9ecabaSChristoph Lameter  * with care.
424df9ecabaSChristoph Lameter  *
4252244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
4262244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
4272244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
4282244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
4292244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
4302244b95aSChristoph Lameter  * in a useful way here.
4312244b95aSChristoph Lameter  */
__inc_zone_state(struct zone * zone,enum zone_stat_item item)432c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
4332244b95aSChristoph Lameter {
43428f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
43512938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
43612938a92SChristoph Lameter 	s8 v, t;
4372244b95aSChristoph Lameter 
438c68ed794SIngo Molnar 	/* See __mod_node_page_state */
4397a025e91SThomas Gleixner 	preempt_disable_nested();
440c68ed794SIngo Molnar 
441908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
44212938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
44312938a92SChristoph Lameter 	if (unlikely(v > t)) {
44412938a92SChristoph Lameter 		s8 overstep = t >> 1;
4452244b95aSChristoph Lameter 
44612938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
44712938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
4482244b95aSChristoph Lameter 	}
449c68ed794SIngo Molnar 
4507a025e91SThomas Gleixner 	preempt_enable_nested();
4512244b95aSChristoph Lameter }
452ca889e6cSChristoph Lameter 
__inc_node_state(struct pglist_data * pgdat,enum node_stat_item item)45375ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
45475ef7184SMel Gorman {
45575ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
45675ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
45775ef7184SMel Gorman 	s8 v, t;
45875ef7184SMel Gorman 
459ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
460ea426c2aSRoman Gushchin 
461c68ed794SIngo Molnar 	/* See __mod_node_page_state */
4627a025e91SThomas Gleixner 	preempt_disable_nested();
463c68ed794SIngo Molnar 
46475ef7184SMel Gorman 	v = __this_cpu_inc_return(*p);
46575ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
46675ef7184SMel Gorman 	if (unlikely(v > t)) {
46775ef7184SMel Gorman 		s8 overstep = t >> 1;
46875ef7184SMel Gorman 
46975ef7184SMel Gorman 		node_page_state_add(v + overstep, pgdat, item);
47075ef7184SMel Gorman 		__this_cpu_write(*p, -overstep);
47175ef7184SMel Gorman 	}
472c68ed794SIngo Molnar 
4737a025e91SThomas Gleixner 	preempt_enable_nested();
47475ef7184SMel Gorman }
47575ef7184SMel Gorman 
__inc_zone_page_state(struct page * page,enum zone_stat_item item)476ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
477ca889e6cSChristoph Lameter {
478ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
479ca889e6cSChristoph Lameter }
4802244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
4812244b95aSChristoph Lameter 
__inc_node_page_state(struct page * page,enum node_stat_item item)48275ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item)
48375ef7184SMel Gorman {
48475ef7184SMel Gorman 	__inc_node_state(page_pgdat(page), item);
48575ef7184SMel Gorman }
48675ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state);
48775ef7184SMel Gorman 
__dec_zone_state(struct zone * zone,enum zone_stat_item item)488c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
4892244b95aSChristoph Lameter {
49028f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
49112938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
49212938a92SChristoph Lameter 	s8 v, t;
4932244b95aSChristoph Lameter 
494c68ed794SIngo Molnar 	/* See __mod_node_page_state */
4957a025e91SThomas Gleixner 	preempt_disable_nested();
496c68ed794SIngo Molnar 
497908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
49812938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
49912938a92SChristoph Lameter 	if (unlikely(v < - t)) {
50012938a92SChristoph Lameter 		s8 overstep = t >> 1;
5012244b95aSChristoph Lameter 
50212938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
50312938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
5042244b95aSChristoph Lameter 	}
505c68ed794SIngo Molnar 
5067a025e91SThomas Gleixner 	preempt_enable_nested();
5072244b95aSChristoph Lameter }
508c8785385SChristoph Lameter 
__dec_node_state(struct pglist_data * pgdat,enum node_stat_item item)50975ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
51075ef7184SMel Gorman {
51175ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
51275ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
51375ef7184SMel Gorman 	s8 v, t;
51475ef7184SMel Gorman 
515ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
516ea426c2aSRoman Gushchin 
517c68ed794SIngo Molnar 	/* See __mod_node_page_state */
5187a025e91SThomas Gleixner 	preempt_disable_nested();
519c68ed794SIngo Molnar 
52075ef7184SMel Gorman 	v = __this_cpu_dec_return(*p);
52175ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
52275ef7184SMel Gorman 	if (unlikely(v < - t)) {
52375ef7184SMel Gorman 		s8 overstep = t >> 1;
52475ef7184SMel Gorman 
52575ef7184SMel Gorman 		node_page_state_add(v - overstep, pgdat, item);
52675ef7184SMel Gorman 		__this_cpu_write(*p, overstep);
52775ef7184SMel Gorman 	}
528c68ed794SIngo Molnar 
5297a025e91SThomas Gleixner 	preempt_enable_nested();
53075ef7184SMel Gorman }
53175ef7184SMel Gorman 
__dec_zone_page_state(struct page * page,enum zone_stat_item item)532c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
533c8785385SChristoph Lameter {
534c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
535c8785385SChristoph Lameter }
5362244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
5372244b95aSChristoph Lameter 
__dec_node_page_state(struct page * page,enum node_stat_item item)53875ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item)
53975ef7184SMel Gorman {
54075ef7184SMel Gorman 	__dec_node_state(page_pgdat(page), item);
54175ef7184SMel Gorman }
54275ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state);
54375ef7184SMel Gorman 
5444156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
5457c839120SChristoph Lameter /*
5467c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
5477c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
5487c839120SChristoph Lameter  *
5497c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
5507c839120SChristoph Lameter  * operations.
5517c839120SChristoph Lameter  *
5527c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
5537c839120SChristoph Lameter  *     0       No overstepping
5547c839120SChristoph Lameter  *     1       Overstepping half of threshold
5557c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
5567c839120SChristoph Lameter */
mod_zone_state(struct zone * zone,enum zone_stat_item item,long delta,int overstep_mode)55775ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone,
55875ef7184SMel Gorman        enum zone_stat_item item, long delta, int overstep_mode)
5597c839120SChristoph Lameter {
56028f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
5617c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
56277cd8148SUros Bizjak 	long n, t, z;
56377cd8148SUros Bizjak 	s8 o;
5647c839120SChristoph Lameter 
56577cd8148SUros Bizjak 	o = this_cpu_read(*p);
5667c839120SChristoph Lameter 	do {
5677c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
5687c839120SChristoph Lameter 
5697c839120SChristoph Lameter 		/*
5707c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
5717c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
572d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
573d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
574d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
575d3bc2367SChristoph Lameter 		 *
576d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
577d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
5787c839120SChristoph Lameter 		 */
5797c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
5807c839120SChristoph Lameter 
58177cd8148SUros Bizjak 		n = delta + (long)o;
5827c839120SChristoph Lameter 
58340610076SMiaohe Lin 		if (abs(n) > t) {
5847c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
5857c839120SChristoph Lameter 
5867c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
5877c839120SChristoph Lameter 			z = n + os;
5887c839120SChristoph Lameter 			n = -os;
5897c839120SChristoph Lameter 		}
59077cd8148SUros Bizjak 	} while (!this_cpu_try_cmpxchg(*p, &o, n));
5917c839120SChristoph Lameter 
5927c839120SChristoph Lameter 	if (z)
5937c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
5947c839120SChristoph Lameter }
5957c839120SChristoph Lameter 
mod_zone_page_state(struct zone * zone,enum zone_stat_item item,long delta)5967c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
5976cdb18adSHeiko Carstens 			 long delta)
5987c839120SChristoph Lameter {
59975ef7184SMel Gorman 	mod_zone_state(zone, item, delta, 0);
6007c839120SChristoph Lameter }
6017c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
6027c839120SChristoph Lameter 
inc_zone_page_state(struct page * page,enum zone_stat_item item)6037c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
6047c839120SChristoph Lameter {
60575ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, 1, 1);
6067c839120SChristoph Lameter }
6077c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
6087c839120SChristoph Lameter 
dec_zone_page_state(struct page * page,enum zone_stat_item item)6097c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
6107c839120SChristoph Lameter {
61175ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, -1, -1);
6127c839120SChristoph Lameter }
6137c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
61475ef7184SMel Gorman 
mod_node_state(struct pglist_data * pgdat,enum node_stat_item item,int delta,int overstep_mode)61575ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat,
61675ef7184SMel Gorman        enum node_stat_item item, int delta, int overstep_mode)
61775ef7184SMel Gorman {
61875ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
61975ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
62077cd8148SUros Bizjak 	long n, t, z;
62177cd8148SUros Bizjak 	s8 o;
62275ef7184SMel Gorman 
623ea426c2aSRoman Gushchin 	if (vmstat_item_in_bytes(item)) {
624629484aeSJohannes Weiner 		/*
625629484aeSJohannes Weiner 		 * Only cgroups use subpage accounting right now; at
626629484aeSJohannes Weiner 		 * the global level, these items still change in
627629484aeSJohannes Weiner 		 * multiples of whole pages. Store them as pages
628629484aeSJohannes Weiner 		 * internally to keep the per-cpu counters compact.
629629484aeSJohannes Weiner 		 */
630ea426c2aSRoman Gushchin 		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
631ea426c2aSRoman Gushchin 		delta >>= PAGE_SHIFT;
632ea426c2aSRoman Gushchin 	}
633ea426c2aSRoman Gushchin 
63477cd8148SUros Bizjak 	o = this_cpu_read(*p);
63575ef7184SMel Gorman 	do {
63675ef7184SMel Gorman 		z = 0;  /* overflow to node counters */
63775ef7184SMel Gorman 
63875ef7184SMel Gorman 		/*
63975ef7184SMel Gorman 		 * The fetching of the stat_threshold is racy. We may apply
64075ef7184SMel Gorman 		 * a counter threshold to the wrong the cpu if we get
64175ef7184SMel Gorman 		 * rescheduled while executing here. However, the next
64275ef7184SMel Gorman 		 * counter update will apply the threshold again and
64375ef7184SMel Gorman 		 * therefore bring the counter under the threshold again.
64475ef7184SMel Gorman 		 *
64575ef7184SMel Gorman 		 * Most of the time the thresholds are the same anyways
64675ef7184SMel Gorman 		 * for all cpus in a node.
64775ef7184SMel Gorman 		 */
64875ef7184SMel Gorman 		t = this_cpu_read(pcp->stat_threshold);
64975ef7184SMel Gorman 
65077cd8148SUros Bizjak 		n = delta + (long)o;
65175ef7184SMel Gorman 
65240610076SMiaohe Lin 		if (abs(n) > t) {
65375ef7184SMel Gorman 			int os = overstep_mode * (t >> 1) ;
65475ef7184SMel Gorman 
65575ef7184SMel Gorman 			/* Overflow must be added to node counters */
65675ef7184SMel Gorman 			z = n + os;
65775ef7184SMel Gorman 			n = -os;
65875ef7184SMel Gorman 		}
65977cd8148SUros Bizjak 	} while (!this_cpu_try_cmpxchg(*p, &o, n));
66075ef7184SMel Gorman 
66175ef7184SMel Gorman 	if (z)
66275ef7184SMel Gorman 		node_page_state_add(z, pgdat, item);
66375ef7184SMel Gorman }
66475ef7184SMel Gorman 
mod_node_page_state(struct pglist_data * pgdat,enum node_stat_item item,long delta)66575ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
66675ef7184SMel Gorman 					long delta)
66775ef7184SMel Gorman {
66875ef7184SMel Gorman 	mod_node_state(pgdat, item, delta, 0);
66975ef7184SMel Gorman }
67075ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
67175ef7184SMel Gorman 
inc_node_state(struct pglist_data * pgdat,enum node_stat_item item)67275ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
67375ef7184SMel Gorman {
67475ef7184SMel Gorman 	mod_node_state(pgdat, item, 1, 1);
67575ef7184SMel Gorman }
67675ef7184SMel Gorman 
inc_node_page_state(struct page * page,enum node_stat_item item)67775ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
67875ef7184SMel Gorman {
67975ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, 1, 1);
68075ef7184SMel Gorman }
68175ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
68275ef7184SMel Gorman 
dec_node_page_state(struct page * page,enum node_stat_item item)68375ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
68475ef7184SMel Gorman {
68575ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, -1, -1);
68675ef7184SMel Gorman }
68775ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
6887c839120SChristoph Lameter #else
6897c839120SChristoph Lameter /*
6907c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
6917c839120SChristoph Lameter  */
mod_zone_page_state(struct zone * zone,enum zone_stat_item item,long delta)6927c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6936cdb18adSHeiko Carstens 			 long delta)
6947c839120SChristoph Lameter {
6957c839120SChristoph Lameter 	unsigned long flags;
6967c839120SChristoph Lameter 
6977c839120SChristoph Lameter 	local_irq_save(flags);
6987c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
6997c839120SChristoph Lameter 	local_irq_restore(flags);
7007c839120SChristoph Lameter }
7017c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
7027c839120SChristoph Lameter 
inc_zone_page_state(struct page * page,enum zone_stat_item item)7032244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
7042244b95aSChristoph Lameter {
7052244b95aSChristoph Lameter 	unsigned long flags;
7062244b95aSChristoph Lameter 	struct zone *zone;
7072244b95aSChristoph Lameter 
7082244b95aSChristoph Lameter 	zone = page_zone(page);
7092244b95aSChristoph Lameter 	local_irq_save(flags);
710ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
7112244b95aSChristoph Lameter 	local_irq_restore(flags);
7122244b95aSChristoph Lameter }
7132244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
7142244b95aSChristoph Lameter 
dec_zone_page_state(struct page * page,enum zone_stat_item item)7152244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
7162244b95aSChristoph Lameter {
7172244b95aSChristoph Lameter 	unsigned long flags;
7182244b95aSChristoph Lameter 
7192244b95aSChristoph Lameter 	local_irq_save(flags);
720a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
7212244b95aSChristoph Lameter 	local_irq_restore(flags);
7222244b95aSChristoph Lameter }
7232244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
7242244b95aSChristoph Lameter 
inc_node_state(struct pglist_data * pgdat,enum node_stat_item item)72575ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
72675ef7184SMel Gorman {
72775ef7184SMel Gorman 	unsigned long flags;
72875ef7184SMel Gorman 
72975ef7184SMel Gorman 	local_irq_save(flags);
73075ef7184SMel Gorman 	__inc_node_state(pgdat, item);
73175ef7184SMel Gorman 	local_irq_restore(flags);
73275ef7184SMel Gorman }
73375ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state);
73475ef7184SMel Gorman 
mod_node_page_state(struct pglist_data * pgdat,enum node_stat_item item,long delta)73575ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
73675ef7184SMel Gorman 					long delta)
73775ef7184SMel Gorman {
73875ef7184SMel Gorman 	unsigned long flags;
73975ef7184SMel Gorman 
74075ef7184SMel Gorman 	local_irq_save(flags);
74175ef7184SMel Gorman 	__mod_node_page_state(pgdat, item, delta);
74275ef7184SMel Gorman 	local_irq_restore(flags);
74375ef7184SMel Gorman }
74475ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
74575ef7184SMel Gorman 
inc_node_page_state(struct page * page,enum node_stat_item item)74675ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
74775ef7184SMel Gorman {
74875ef7184SMel Gorman 	unsigned long flags;
74975ef7184SMel Gorman 	struct pglist_data *pgdat;
75075ef7184SMel Gorman 
75175ef7184SMel Gorman 	pgdat = page_pgdat(page);
75275ef7184SMel Gorman 	local_irq_save(flags);
75375ef7184SMel Gorman 	__inc_node_state(pgdat, item);
75475ef7184SMel Gorman 	local_irq_restore(flags);
75575ef7184SMel Gorman }
75675ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
75775ef7184SMel Gorman 
dec_node_page_state(struct page * page,enum node_stat_item item)75875ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
75975ef7184SMel Gorman {
76075ef7184SMel Gorman 	unsigned long flags;
76175ef7184SMel Gorman 
76275ef7184SMel Gorman 	local_irq_save(flags);
76375ef7184SMel Gorman 	__dec_node_page_state(page, item);
76475ef7184SMel Gorman 	local_irq_restore(flags);
76575ef7184SMel Gorman }
76675ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
76775ef7184SMel Gorman #endif
7687cc36bbdSChristoph Lameter 
7697cc36bbdSChristoph Lameter /*
7707cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
7717cc36bbdSChristoph Lameter  * Returns the number of counters updated.
7727cc36bbdSChristoph Lameter  */
fold_diff(int * zone_diff,int * node_diff)77375ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff)
7744edb0748SChristoph Lameter {
7754edb0748SChristoph Lameter 	int i;
7767cc36bbdSChristoph Lameter 	int changes = 0;
7774edb0748SChristoph Lameter 
7784edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
77975ef7184SMel Gorman 		if (zone_diff[i]) {
78075ef7184SMel Gorman 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
78175ef7184SMel Gorman 			changes++;
78275ef7184SMel Gorman 	}
78375ef7184SMel Gorman 
78475ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
78575ef7184SMel Gorman 		if (node_diff[i]) {
78675ef7184SMel Gorman 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7877cc36bbdSChristoph Lameter 			changes++;
7887cc36bbdSChristoph Lameter 	}
7897cc36bbdSChristoph Lameter 	return changes;
7904edb0748SChristoph Lameter }
791f19298b9SMel Gorman 
7922244b95aSChristoph Lameter /*
7932bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
794a7f75e25SChristoph Lameter  *
7954037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
7964037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
7974037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
7984037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
7994037d452SChristoph Lameter  * the processor.
8004037d452SChristoph Lameter  *
8014037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
8024037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
8034037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
8044037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
8057cc36bbdSChristoph Lameter  *
8067cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
8072244b95aSChristoph Lameter  */
refresh_cpu_vm_stats(bool do_pagesets)8080eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
8092244b95aSChristoph Lameter {
81075ef7184SMel Gorman 	struct pglist_data *pgdat;
8112244b95aSChristoph Lameter 	struct zone *zone;
8122244b95aSChristoph Lameter 	int i;
81375ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
81475ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
8157cc36bbdSChristoph Lameter 	int changes = 0;
8162244b95aSChristoph Lameter 
817ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
81828f836b6SMel Gorman 		struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
81928f836b6SMel Gorman 		struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
8202244b95aSChristoph Lameter 
821fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
822a7f75e25SChristoph Lameter 			int v;
823a7f75e25SChristoph Lameter 
82428f836b6SMel Gorman 			v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
825fbc2edb0SChristoph Lameter 			if (v) {
826fbc2edb0SChristoph Lameter 
827a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
82875ef7184SMel Gorman 				global_zone_diff[i] += v;
8294037d452SChristoph Lameter #ifdef CONFIG_NUMA
8304037d452SChristoph Lameter 				/* 3 seconds idle till flush */
83128f836b6SMel Gorman 				__this_cpu_write(pcp->expire, 3);
8324037d452SChristoph Lameter #endif
8332244b95aSChristoph Lameter 			}
834fbc2edb0SChristoph Lameter 		}
8353a321d2aSKemi Wang 
8360eb77e98SChristoph Lameter 		if (do_pagesets) {
8370eb77e98SChristoph Lameter 			cond_resched();
83851a755c5SHuang Ying 
83951a755c5SHuang Ying 			changes += decay_pcp_high(zone, this_cpu_ptr(pcp));
84051a755c5SHuang Ying #ifdef CONFIG_NUMA
8414037d452SChristoph Lameter 			/*
8424037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
8434037d452SChristoph Lameter 			 * processor
8444037d452SChristoph Lameter 			 *
8454037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
8464037d452SChristoph Lameter 			 * if not then there is nothing to expire.
8474037d452SChristoph Lameter 			 */
84828f836b6SMel Gorman 			if (!__this_cpu_read(pcp->expire) ||
84928f836b6SMel Gorman 			       !__this_cpu_read(pcp->count))
8504037d452SChristoph Lameter 				continue;
8514037d452SChristoph Lameter 
8524037d452SChristoph Lameter 			/*
8534037d452SChristoph Lameter 			 * We never drain zones local to this processor.
8544037d452SChristoph Lameter 			 */
8554037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
85628f836b6SMel Gorman 				__this_cpu_write(pcp->expire, 0);
8574037d452SChristoph Lameter 				continue;
8584037d452SChristoph Lameter 			}
8594037d452SChristoph Lameter 
860fa8c4f9aSHuang Ying 			if (__this_cpu_dec_return(pcp->expire)) {
861fa8c4f9aSHuang Ying 				changes++;
8624037d452SChristoph Lameter 				continue;
863fa8c4f9aSHuang Ying 			}
8644037d452SChristoph Lameter 
86528f836b6SMel Gorman 			if (__this_cpu_read(pcp->count)) {
86628f836b6SMel Gorman 				drain_zone_pages(zone, this_cpu_ptr(pcp));
8677cc36bbdSChristoph Lameter 				changes++;
8687cc36bbdSChristoph Lameter 			}
8694037d452SChristoph Lameter #endif
8702244b95aSChristoph Lameter 		}
87151a755c5SHuang Ying 	}
87275ef7184SMel Gorman 
87375ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
87475ef7184SMel Gorman 		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
87575ef7184SMel Gorman 
87675ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
87775ef7184SMel Gorman 			int v;
87875ef7184SMel Gorman 
87975ef7184SMel Gorman 			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
88075ef7184SMel Gorman 			if (v) {
88175ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
88275ef7184SMel Gorman 				global_node_diff[i] += v;
88375ef7184SMel Gorman 			}
88475ef7184SMel Gorman 		}
88575ef7184SMel Gorman 	}
88675ef7184SMel Gorman 
88775ef7184SMel Gorman 	changes += fold_diff(global_zone_diff, global_node_diff);
8887cc36bbdSChristoph Lameter 	return changes;
8892244b95aSChristoph Lameter }
8902244b95aSChristoph Lameter 
89140f4b1eaSCody P Schafer /*
8922bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
8932bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
8942bb921e5SChristoph Lameter  * synchronization is simplified.
8952bb921e5SChristoph Lameter  */
cpu_vm_stats_fold(int cpu)8962bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
8972bb921e5SChristoph Lameter {
89875ef7184SMel Gorman 	struct pglist_data *pgdat;
8992bb921e5SChristoph Lameter 	struct zone *zone;
9002bb921e5SChristoph Lameter 	int i;
90175ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
90275ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
9032bb921e5SChristoph Lameter 
9042bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
90528f836b6SMel Gorman 		struct per_cpu_zonestat *pzstats;
9062bb921e5SChristoph Lameter 
90728f836b6SMel Gorman 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
9082bb921e5SChristoph Lameter 
909f19298b9SMel Gorman 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
91028f836b6SMel Gorman 			if (pzstats->vm_stat_diff[i]) {
9112bb921e5SChristoph Lameter 				int v;
9122bb921e5SChristoph Lameter 
91328f836b6SMel Gorman 				v = pzstats->vm_stat_diff[i];
91428f836b6SMel Gorman 				pzstats->vm_stat_diff[i] = 0;
9152bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
91675ef7184SMel Gorman 				global_zone_diff[i] += v;
9172bb921e5SChristoph Lameter 			}
918f19298b9SMel Gorman 		}
9193a321d2aSKemi Wang #ifdef CONFIG_NUMA
920f19298b9SMel Gorman 		for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
921f19298b9SMel Gorman 			if (pzstats->vm_numa_event[i]) {
922f19298b9SMel Gorman 				unsigned long v;
9233a321d2aSKemi Wang 
924f19298b9SMel Gorman 				v = pzstats->vm_numa_event[i];
925f19298b9SMel Gorman 				pzstats->vm_numa_event[i] = 0;
926f19298b9SMel Gorman 				zone_numa_event_add(v, zone, i);
927f19298b9SMel Gorman 			}
9283a321d2aSKemi Wang 		}
9293a321d2aSKemi Wang #endif
9302bb921e5SChristoph Lameter 	}
9312bb921e5SChristoph Lameter 
93275ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
93375ef7184SMel Gorman 		struct per_cpu_nodestat *p;
93475ef7184SMel Gorman 
93575ef7184SMel Gorman 		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
93675ef7184SMel Gorman 
93775ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
93875ef7184SMel Gorman 			if (p->vm_node_stat_diff[i]) {
93975ef7184SMel Gorman 				int v;
94075ef7184SMel Gorman 
94175ef7184SMel Gorman 				v = p->vm_node_stat_diff[i];
94275ef7184SMel Gorman 				p->vm_node_stat_diff[i] = 0;
94375ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
94475ef7184SMel Gorman 				global_node_diff[i] += v;
94575ef7184SMel Gorman 			}
94675ef7184SMel Gorman 	}
94775ef7184SMel Gorman 
94875ef7184SMel Gorman 	fold_diff(global_zone_diff, global_node_diff);
9492bb921e5SChristoph Lameter }
9502bb921e5SChristoph Lameter 
9512bb921e5SChristoph Lameter /*
95240f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
953f0953a1bSIngo Molnar  * pset->vm_stat_diff[] exist.
95440f4b1eaSCody P Schafer  */
drain_zonestat(struct zone * zone,struct per_cpu_zonestat * pzstats)95528f836b6SMel Gorman void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
9565a883813SMinchan Kim {
957f19298b9SMel Gorman 	unsigned long v;
9585a883813SMinchan Kim 	int i;
9595a883813SMinchan Kim 
960f19298b9SMel Gorman 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
96128f836b6SMel Gorman 		if (pzstats->vm_stat_diff[i]) {
962f19298b9SMel Gorman 			v = pzstats->vm_stat_diff[i];
96328f836b6SMel Gorman 			pzstats->vm_stat_diff[i] = 0;
964f19298b9SMel Gorman 			zone_page_state_add(v, zone, i);
965f19298b9SMel Gorman 		}
9665a883813SMinchan Kim 	}
9673a321d2aSKemi Wang 
9683a321d2aSKemi Wang #ifdef CONFIG_NUMA
969f19298b9SMel Gorman 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
970f19298b9SMel Gorman 		if (pzstats->vm_numa_event[i]) {
971f19298b9SMel Gorman 			v = pzstats->vm_numa_event[i];
972f19298b9SMel Gorman 			pzstats->vm_numa_event[i] = 0;
973f19298b9SMel Gorman 			zone_numa_event_add(v, zone, i);
974f19298b9SMel Gorman 		}
9753a321d2aSKemi Wang 	}
9763a321d2aSKemi Wang #endif
9775a883813SMinchan Kim }
9782244b95aSChristoph Lameter #endif
9792244b95aSChristoph Lameter 
980ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
981ca889e6cSChristoph Lameter /*
98275ef7184SMel Gorman  * Determine the per node value of a stat item. This function
98375ef7184SMel Gorman  * is called frequently in a NUMA machine, so try to be as
98475ef7184SMel Gorman  * frugal as possible.
985c2d42c16SAndrew Morton  */
sum_zone_node_page_state(int node,enum zone_stat_item item)98675ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node,
98775ef7184SMel Gorman 				 enum zone_stat_item item)
988c2d42c16SAndrew Morton {
989c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
990e87d59f7SJoonsoo Kim 	int i;
991e87d59f7SJoonsoo Kim 	unsigned long count = 0;
992c2d42c16SAndrew Morton 
993e87d59f7SJoonsoo Kim 	for (i = 0; i < MAX_NR_ZONES; i++)
994e87d59f7SJoonsoo Kim 		count += zone_page_state(zones + i, item);
995e87d59f7SJoonsoo Kim 
996e87d59f7SJoonsoo Kim 	return count;
997c2d42c16SAndrew Morton }
998c2d42c16SAndrew Morton 
999f19298b9SMel Gorman /* Determine the per node value of a numa stat item. */
sum_zone_numa_event_state(int node,enum numa_stat_item item)1000f19298b9SMel Gorman unsigned long sum_zone_numa_event_state(int node,
10013a321d2aSKemi Wang 				 enum numa_stat_item item)
10023a321d2aSKemi Wang {
10033a321d2aSKemi Wang 	struct zone *zones = NODE_DATA(node)->node_zones;
10043a321d2aSKemi Wang 	unsigned long count = 0;
1005f19298b9SMel Gorman 	int i;
10063a321d2aSKemi Wang 
10073a321d2aSKemi Wang 	for (i = 0; i < MAX_NR_ZONES; i++)
1008f19298b9SMel Gorman 		count += zone_numa_event_state(zones + i, item);
10093a321d2aSKemi Wang 
10103a321d2aSKemi Wang 	return count;
10113a321d2aSKemi Wang }
10123a321d2aSKemi Wang 
101375ef7184SMel Gorman /*
101475ef7184SMel Gorman  * Determine the per node value of a stat item.
101575ef7184SMel Gorman  */
node_page_state_pages(struct pglist_data * pgdat,enum node_stat_item item)1016ea426c2aSRoman Gushchin unsigned long node_page_state_pages(struct pglist_data *pgdat,
101775ef7184SMel Gorman 				    enum node_stat_item item)
101875ef7184SMel Gorman {
101975ef7184SMel Gorman 	long x = atomic_long_read(&pgdat->vm_stat[item]);
102075ef7184SMel Gorman #ifdef CONFIG_SMP
102175ef7184SMel Gorman 	if (x < 0)
102275ef7184SMel Gorman 		x = 0;
102375ef7184SMel Gorman #endif
102475ef7184SMel Gorman 	return x;
102575ef7184SMel Gorman }
1026ea426c2aSRoman Gushchin 
node_page_state(struct pglist_data * pgdat,enum node_stat_item item)1027ea426c2aSRoman Gushchin unsigned long node_page_state(struct pglist_data *pgdat,
1028ea426c2aSRoman Gushchin 			      enum node_stat_item item)
1029ea426c2aSRoman Gushchin {
1030ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1031ea426c2aSRoman Gushchin 
1032ea426c2aSRoman Gushchin 	return node_page_state_pages(pgdat, item);
1033ea426c2aSRoman Gushchin }
1034ca889e6cSChristoph Lameter #endif
1035ca889e6cSChristoph Lameter 
1036d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
103736deb0beSNamhyung Kim 
1038d7a5752cSMel Gorman struct contig_page_info {
1039d7a5752cSMel Gorman 	unsigned long free_pages;
1040d7a5752cSMel Gorman 	unsigned long free_blocks_total;
1041d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
1042d7a5752cSMel Gorman };
1043d7a5752cSMel Gorman 
1044d7a5752cSMel Gorman /*
1045d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
1046d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
1047d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
1048d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
1049d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
1050d7a5752cSMel Gorman  * figured out from userspace
1051d7a5752cSMel Gorman  */
fill_contig_page_info(struct zone * zone,unsigned int suitable_order,struct contig_page_info * info)1052d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
1053d7a5752cSMel Gorman 				unsigned int suitable_order,
1054d7a5752cSMel Gorman 				struct contig_page_info *info)
1055d7a5752cSMel Gorman {
1056d7a5752cSMel Gorman 	unsigned int order;
1057d7a5752cSMel Gorman 
1058d7a5752cSMel Gorman 	info->free_pages = 0;
1059d7a5752cSMel Gorman 	info->free_blocks_total = 0;
1060d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
1061d7a5752cSMel Gorman 
1062fd377218SKirill A. Shutemov 	for (order = 0; order < NR_PAGE_ORDERS; order++) {
1063d7a5752cSMel Gorman 		unsigned long blocks;
1064d7a5752cSMel Gorman 
1065af1c31acSLiu Shixin 		/*
1066af1c31acSLiu Shixin 		 * Count number of free blocks.
1067af1c31acSLiu Shixin 		 *
1068af1c31acSLiu Shixin 		 * Access to nr_free is lockless as nr_free is used only for
1069af1c31acSLiu Shixin 		 * diagnostic purposes. Use data_race to avoid KCSAN warning.
1070af1c31acSLiu Shixin 		 */
1071af1c31acSLiu Shixin 		blocks = data_race(zone->free_area[order].nr_free);
1072d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
1073d7a5752cSMel Gorman 
1074d7a5752cSMel Gorman 		/* Count free base pages */
1075d7a5752cSMel Gorman 		info->free_pages += blocks << order;
1076d7a5752cSMel Gorman 
1077d7a5752cSMel Gorman 		/* Count the suitable free blocks */
1078d7a5752cSMel Gorman 		if (order >= suitable_order)
1079d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
1080d7a5752cSMel Gorman 						(order - suitable_order);
1081d7a5752cSMel Gorman 	}
1082d7a5752cSMel Gorman }
1083f1a5ab12SMel Gorman 
1084f1a5ab12SMel Gorman /*
1085f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
1086f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
1087f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
1088f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
1089f1a5ab12SMel Gorman  * should be used
1090f1a5ab12SMel Gorman  */
__fragmentation_index(unsigned int order,struct contig_page_info * info)109156de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1092f1a5ab12SMel Gorman {
1093f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
1094f1a5ab12SMel Gorman 
1095*5e0a760bSKirill A. Shutemov 	if (WARN_ON_ONCE(order > MAX_PAGE_ORDER))
109688d6ac40SWen Yang 		return 0;
109788d6ac40SWen Yang 
1098f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
1099f1a5ab12SMel Gorman 		return 0;
1100f1a5ab12SMel Gorman 
1101f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
1102f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
1103f1a5ab12SMel Gorman 		return -1000;
1104f1a5ab12SMel Gorman 
1105f1a5ab12SMel Gorman 	/*
1106f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
1107f1a5ab12SMel Gorman 	 *
1108f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
1109f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
1110f1a5ab12SMel Gorman 	 */
1111f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1112f1a5ab12SMel Gorman }
111356de7263SMel Gorman 
1114facdaa91SNitin Gupta /*
1115facdaa91SNitin Gupta  * Calculates external fragmentation within a zone wrt the given order.
1116facdaa91SNitin Gupta  * It is defined as the percentage of pages found in blocks of size
1117facdaa91SNitin Gupta  * less than 1 << order. It returns values in range [0, 100].
1118facdaa91SNitin Gupta  */
extfrag_for_order(struct zone * zone,unsigned int order)1119d34c0a75SNitin Gupta unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1120facdaa91SNitin Gupta {
1121facdaa91SNitin Gupta 	struct contig_page_info info;
1122facdaa91SNitin Gupta 
1123facdaa91SNitin Gupta 	fill_contig_page_info(zone, order, &info);
1124facdaa91SNitin Gupta 	if (info.free_pages == 0)
1125facdaa91SNitin Gupta 		return 0;
1126facdaa91SNitin Gupta 
1127facdaa91SNitin Gupta 	return div_u64((info.free_pages -
1128facdaa91SNitin Gupta 			(info.free_blocks_suitable << order)) * 100,
1129facdaa91SNitin Gupta 			info.free_pages);
1130facdaa91SNitin Gupta }
1131facdaa91SNitin Gupta 
113256de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
fragmentation_index(struct zone * zone,unsigned int order)113356de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
113456de7263SMel Gorman {
113556de7263SMel Gorman 	struct contig_page_info info;
113656de7263SMel Gorman 
113756de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
113856de7263SMel Gorman 	return __fragmentation_index(order, &info);
113956de7263SMel Gorman }
1140d7a5752cSMel Gorman #endif
1141d7a5752cSMel Gorman 
1142ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1143ebc5d83dSKonstantin Khlebnikov     defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1144fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
1145fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
1146fa25c503SKOSAKI Motohiro #else
1147fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
1148fa25c503SKOSAKI Motohiro #endif
1149fa25c503SKOSAKI Motohiro 
1150fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
1151fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
1152fa25c503SKOSAKI Motohiro #else
1153fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
1154fa25c503SKOSAKI Motohiro #endif
1155fa25c503SKOSAKI Motohiro 
1156fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
1157fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1158fa25c503SKOSAKI Motohiro #else
1159fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
1160fa25c503SKOSAKI Motohiro #endif
1161fa25c503SKOSAKI Motohiro 
1162a39c5d3cSHao Lee #ifdef CONFIG_ZONE_DEVICE
1163a39c5d3cSHao Lee #define TEXT_FOR_DEVICE(xx) xx "_device",
1164a39c5d3cSHao Lee #else
1165a39c5d3cSHao Lee #define TEXT_FOR_DEVICE(xx)
1166a39c5d3cSHao Lee #endif
1167a39c5d3cSHao Lee 
1168fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1169a39c5d3cSHao Lee 					TEXT_FOR_HIGHMEM(xx) xx "_movable", \
1170a39c5d3cSHao Lee 					TEXT_FOR_DEVICE(xx)
1171fa25c503SKOSAKI Motohiro 
1172fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
11738d92890bSNeilBrown 	/* enum zone_stat_item counters */
1174fa25c503SKOSAKI Motohiro 	"nr_free_pages",
117571c799f4SMinchan Kim 	"nr_zone_inactive_anon",
117671c799f4SMinchan Kim 	"nr_zone_active_anon",
117771c799f4SMinchan Kim 	"nr_zone_inactive_file",
117871c799f4SMinchan Kim 	"nr_zone_active_file",
117971c799f4SMinchan Kim 	"nr_zone_unevictable",
11805a1c84b4SMel Gorman 	"nr_zone_write_pending",
1181fa25c503SKOSAKI Motohiro 	"nr_mlock",
1182fa25c503SKOSAKI Motohiro 	"nr_bounce",
118391537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC)
118491537feeSMinchan Kim 	"nr_zspages",
118591537feeSMinchan Kim #endif
11863a321d2aSKemi Wang 	"nr_free_cma",
1187dcdfdd40SKirill A. Shutemov #ifdef CONFIG_UNACCEPTED_MEMORY
1188dcdfdd40SKirill A. Shutemov 	"nr_unaccepted",
1189dcdfdd40SKirill A. Shutemov #endif
11903a321d2aSKemi Wang 
11913a321d2aSKemi Wang 	/* enum numa_stat_item counters */
1192fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1193fa25c503SKOSAKI Motohiro 	"numa_hit",
1194fa25c503SKOSAKI Motohiro 	"numa_miss",
1195fa25c503SKOSAKI Motohiro 	"numa_foreign",
1196fa25c503SKOSAKI Motohiro 	"numa_interleave",
1197fa25c503SKOSAKI Motohiro 	"numa_local",
1198fa25c503SKOSAKI Motohiro 	"numa_other",
1199fa25c503SKOSAKI Motohiro #endif
120009316c09SKonstantin Khlebnikov 
12019d7ea9a2SKonstantin Khlebnikov 	/* enum node_stat_item counters */
1202599d0c95SMel Gorman 	"nr_inactive_anon",
1203599d0c95SMel Gorman 	"nr_active_anon",
1204599d0c95SMel Gorman 	"nr_inactive_file",
1205599d0c95SMel Gorman 	"nr_active_file",
1206599d0c95SMel Gorman 	"nr_unevictable",
1207385386cfSJohannes Weiner 	"nr_slab_reclaimable",
1208385386cfSJohannes Weiner 	"nr_slab_unreclaimable",
1209599d0c95SMel Gorman 	"nr_isolated_anon",
1210599d0c95SMel Gorman 	"nr_isolated_file",
121168d48e6aSJohannes Weiner 	"workingset_nodes",
1212170b04b7SJoonsoo Kim 	"workingset_refault_anon",
1213170b04b7SJoonsoo Kim 	"workingset_refault_file",
1214170b04b7SJoonsoo Kim 	"workingset_activate_anon",
1215170b04b7SJoonsoo Kim 	"workingset_activate_file",
1216170b04b7SJoonsoo Kim 	"workingset_restore_anon",
1217170b04b7SJoonsoo Kim 	"workingset_restore_file",
12181e6b1085SMel Gorman 	"workingset_nodereclaim",
121950658e2eSMel Gorman 	"nr_anon_pages",
122050658e2eSMel Gorman 	"nr_mapped",
122111fb9989SMel Gorman 	"nr_file_pages",
122211fb9989SMel Gorman 	"nr_dirty",
122311fb9989SMel Gorman 	"nr_writeback",
122411fb9989SMel Gorman 	"nr_writeback_temp",
122511fb9989SMel Gorman 	"nr_shmem",
122611fb9989SMel Gorman 	"nr_shmem_hugepages",
122711fb9989SMel Gorman 	"nr_shmem_pmdmapped",
122860fbf0abSSong Liu 	"nr_file_hugepages",
122960fbf0abSSong Liu 	"nr_file_pmdmapped",
123011fb9989SMel Gorman 	"nr_anon_transparent_hugepages",
1231c4a25635SMel Gorman 	"nr_vmscan_write",
1232c4a25635SMel Gorman 	"nr_vmscan_immediate_reclaim",
1233c4a25635SMel Gorman 	"nr_dirtied",
1234c4a25635SMel Gorman 	"nr_written",
12358cd7c588SMel Gorman 	"nr_throttled_written",
1236b29940c1SVlastimil Babka 	"nr_kernel_misc_reclaimable",
12371970dc6fSJohn Hubbard 	"nr_foll_pin_acquired",
12381970dc6fSJohn Hubbard 	"nr_foll_pin_released",
1239991e7673SShakeel Butt 	"nr_kernel_stack",
1240991e7673SShakeel Butt #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1241991e7673SShakeel Butt 	"nr_shadow_call_stack",
1242991e7673SShakeel Butt #endif
1243f0c0c115SShakeel Butt 	"nr_page_table_pages",
1244ebc97a52SYosry Ahmed 	"nr_sec_page_table_pages",
1245b6038942SShakeel Butt #ifdef CONFIG_SWAP
1246b6038942SShakeel Butt 	"nr_swapcached",
1247b6038942SShakeel Butt #endif
1248e39bb6beSHuang Ying #ifdef CONFIG_NUMA_BALANCING
1249e39bb6beSHuang Ying 	"pgpromote_success",
1250c6833e10SHuang Ying 	"pgpromote_candidate",
1251b805ab3cSLi Zhijian #endif
125223e9f013SLi Zhijian 	"pgdemote_kswapd",
125323e9f013SLi Zhijian 	"pgdemote_direct",
125423e9f013SLi Zhijian 	"pgdemote_khugepaged",
1255599d0c95SMel Gorman 
125609316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
1257fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
1258fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
1259fa25c503SKOSAKI Motohiro 
1260ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
126109316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
1262fa25c503SKOSAKI Motohiro 	"pgpgin",
1263fa25c503SKOSAKI Motohiro 	"pgpgout",
1264fa25c503SKOSAKI Motohiro 	"pswpin",
1265fa25c503SKOSAKI Motohiro 	"pswpout",
1266fa25c503SKOSAKI Motohiro 
1267fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
12687cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("allocstall")
12697cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("pgskip")
1270fa25c503SKOSAKI Motohiro 
1271fa25c503SKOSAKI Motohiro 	"pgfree",
1272fa25c503SKOSAKI Motohiro 	"pgactivate",
1273fa25c503SKOSAKI Motohiro 	"pgdeactivate",
1274f7ad2a6cSShaohua Li 	"pglazyfree",
1275fa25c503SKOSAKI Motohiro 
1276fa25c503SKOSAKI Motohiro 	"pgfault",
1277fa25c503SKOSAKI Motohiro 	"pgmajfault",
1278854e9ed0SMinchan Kim 	"pglazyfreed",
1279fa25c503SKOSAKI Motohiro 
1280599d0c95SMel Gorman 	"pgrefill",
1281798a6b87SPeter Xu 	"pgreuse",
1282599d0c95SMel Gorman 	"pgsteal_kswapd",
1283599d0c95SMel Gorman 	"pgsteal_direct",
128457e9cc50SJohannes Weiner 	"pgsteal_khugepaged",
1285599d0c95SMel Gorman 	"pgscan_kswapd",
1286599d0c95SMel Gorman 	"pgscan_direct",
128757e9cc50SJohannes Weiner 	"pgscan_khugepaged",
128868243e76SMel Gorman 	"pgscan_direct_throttle",
1289497a6c1bSJohannes Weiner 	"pgscan_anon",
1290497a6c1bSJohannes Weiner 	"pgscan_file",
1291497a6c1bSJohannes Weiner 	"pgsteal_anon",
1292497a6c1bSJohannes Weiner 	"pgsteal_file",
1293fa25c503SKOSAKI Motohiro 
1294fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1295fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
1296fa25c503SKOSAKI Motohiro #endif
1297fa25c503SKOSAKI Motohiro 	"pginodesteal",
1298fa25c503SKOSAKI Motohiro 	"slabs_scanned",
1299fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
1300fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
1301fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
1302fa25c503SKOSAKI Motohiro 	"pageoutrun",
1303fa25c503SKOSAKI Motohiro 
1304fa25c503SKOSAKI Motohiro 	"pgrotated",
1305fa25c503SKOSAKI Motohiro 
13065509a5d2SDave Hansen 	"drop_pagecache",
13075509a5d2SDave Hansen 	"drop_slab",
13088e675f7aSKonstantin Khlebnikov 	"oom_kill",
13095509a5d2SDave Hansen 
131003c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
131103c5a6e1SMel Gorman 	"numa_pte_updates",
131272403b4aSMel Gorman 	"numa_huge_pte_updates",
131303c5a6e1SMel Gorman 	"numa_hint_faults",
131403c5a6e1SMel Gorman 	"numa_hint_faults_local",
131503c5a6e1SMel Gorman 	"numa_pages_migrated",
131603c5a6e1SMel Gorman #endif
13175647bc29SMel Gorman #ifdef CONFIG_MIGRATION
13185647bc29SMel Gorman 	"pgmigrate_success",
13195647bc29SMel Gorman 	"pgmigrate_fail",
13201a5bae25SAnshuman Khandual 	"thp_migration_success",
13211a5bae25SAnshuman Khandual 	"thp_migration_fail",
13221a5bae25SAnshuman Khandual 	"thp_migration_split",
13235647bc29SMel Gorman #endif
1324fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
1325397487dbSMel Gorman 	"compact_migrate_scanned",
1326397487dbSMel Gorman 	"compact_free_scanned",
1327397487dbSMel Gorman 	"compact_isolated",
1328fa25c503SKOSAKI Motohiro 	"compact_stall",
1329fa25c503SKOSAKI Motohiro 	"compact_fail",
1330fa25c503SKOSAKI Motohiro 	"compact_success",
1331698b1b30SVlastimil Babka 	"compact_daemon_wake",
13327f354a54SDavid Rientjes 	"compact_daemon_migrate_scanned",
13337f354a54SDavid Rientjes 	"compact_daemon_free_scanned",
1334fa25c503SKOSAKI Motohiro #endif
1335fa25c503SKOSAKI Motohiro 
1336fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
1337fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
1338fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
1339fa25c503SKOSAKI Motohiro #endif
1340bbb26920SMinchan Kim #ifdef CONFIG_CMA
1341bbb26920SMinchan Kim 	"cma_alloc_success",
1342bbb26920SMinchan Kim 	"cma_alloc_fail",
1343bbb26920SMinchan Kim #endif
1344fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
1345fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
1346fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
1347fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
1348fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
1349fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
1350fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
1351fa25c503SKOSAKI Motohiro 
1352fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1353fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
1354fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
135585b9f46eSDavid Rientjes 	"thp_fault_fallback_charge",
1356fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
1357fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
135895ecedcdSKirill A. Shutemov 	"thp_file_alloc",
1359dcdf11eeSDavid Rientjes 	"thp_file_fallback",
136085b9f46eSDavid Rientjes 	"thp_file_fallback_charge",
136195ecedcdSKirill A. Shutemov 	"thp_file_mapped",
1362122afea9SKirill A. Shutemov 	"thp_split_page",
1363122afea9SKirill A. Shutemov 	"thp_split_page_failed",
1364f9719a03SKirill A. Shutemov 	"thp_deferred_split_page",
1365122afea9SKirill A. Shutemov 	"thp_split_pmd",
1366e9ea874aSYang Yang 	"thp_scan_exceed_none_pte",
1367e9ea874aSYang Yang 	"thp_scan_exceed_swap_pte",
1368e9ea874aSYang Yang 	"thp_scan_exceed_share_pte",
1369ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1370ce9311cfSYisheng Xie 	"thp_split_pud",
1371ce9311cfSYisheng Xie #endif
1372d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
1373d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
1374225311a4SHuang Ying 	"thp_swpout",
1375fe490cc0SHuang Ying 	"thp_swpout_fallback",
1376fa25c503SKOSAKI Motohiro #endif
137709316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
137809316c09SKonstantin Khlebnikov 	"balloon_inflate",
137909316c09SKonstantin Khlebnikov 	"balloon_deflate",
138009316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
138109316c09SKonstantin Khlebnikov 	"balloon_migrate",
138209316c09SKonstantin Khlebnikov #endif
138309316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
1384ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
13859824cf97SDave Hansen 	"nr_tlb_remote_flush",
13869824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
13879824cf97SDave Hansen 	"nr_tlb_local_flush_all",
13889824cf97SDave Hansen 	"nr_tlb_local_flush_one",
1389ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
1390fa25c503SKOSAKI Motohiro 
1391cbc65df2SHuang Ying #ifdef CONFIG_SWAP
1392cbc65df2SHuang Ying 	"swap_ra",
1393cbc65df2SHuang Ying 	"swap_ra_hit",
13944d45c3afSYang Yang #ifdef CONFIG_KSM
13954d45c3afSYang Yang 	"ksm_swpin_copy",
13964d45c3afSYang Yang #endif
1397cbc65df2SHuang Ying #endif
139894bfe85bSYang Yang #ifdef CONFIG_KSM
139994bfe85bSYang Yang 	"cow_ksm",
140094bfe85bSYang Yang #endif
1401f6498b77SJohannes Weiner #ifdef CONFIG_ZSWAP
1402f6498b77SJohannes Weiner 	"zswpin",
1403f6498b77SJohannes Weiner 	"zswpout",
14047108cc3fSDomenico Cerasuolo 	"zswpwb",
1405f6498b77SJohannes Weiner #endif
1406575299eaSSaravanan D #ifdef CONFIG_X86
1407575299eaSSaravanan D 	"direct_map_level2_splits",
1408575299eaSSaravanan D 	"direct_map_level3_splits",
1409575299eaSSaravanan D #endif
141052f23865SSuren Baghdasaryan #ifdef CONFIG_PER_VMA_LOCK_STATS
141152f23865SSuren Baghdasaryan 	"vma_lock_success",
141252f23865SSuren Baghdasaryan 	"vma_lock_abort",
141352f23865SSuren Baghdasaryan 	"vma_lock_retry",
141452f23865SSuren Baghdasaryan 	"vma_lock_miss",
141552f23865SSuren Baghdasaryan #endif
1416ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
1417fa25c503SKOSAKI Motohiro };
1418ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
1419fa25c503SKOSAKI Motohiro 
14203c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
14213c486871SAndrew Morton      defined(CONFIG_PROC_FS)
frag_start(struct seq_file * m,loff_t * pos)14223c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
14233c486871SAndrew Morton {
14243c486871SAndrew Morton 	pg_data_t *pgdat;
14253c486871SAndrew Morton 	loff_t node = *pos;
14263c486871SAndrew Morton 
14273c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
14283c486871SAndrew Morton 	     pgdat && node;
14293c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
14303c486871SAndrew Morton 		--node;
14313c486871SAndrew Morton 
14323c486871SAndrew Morton 	return pgdat;
14333c486871SAndrew Morton }
14343c486871SAndrew Morton 
frag_next(struct seq_file * m,void * arg,loff_t * pos)14353c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
14363c486871SAndrew Morton {
14373c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
14383c486871SAndrew Morton 
14393c486871SAndrew Morton 	(*pos)++;
14403c486871SAndrew Morton 	return next_online_pgdat(pgdat);
14413c486871SAndrew Morton }
14423c486871SAndrew Morton 
frag_stop(struct seq_file * m,void * arg)14433c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
14443c486871SAndrew Morton {
14453c486871SAndrew Morton }
14463c486871SAndrew Morton 
1447b2bd8598SDavid Rientjes /*
1448b2bd8598SDavid Rientjes  * Walk zones in a node and print using a callback.
1449b2bd8598SDavid Rientjes  * If @assert_populated is true, only use callback for zones that are populated.
1450b2bd8598SDavid Rientjes  */
walk_zones_in_node(struct seq_file * m,pg_data_t * pgdat,bool assert_populated,bool nolock,void (* print)(struct seq_file * m,pg_data_t *,struct zone *))14513c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1452727c080fSVinayak Menon 		bool assert_populated, bool nolock,
14533c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
14543c486871SAndrew Morton {
14553c486871SAndrew Morton 	struct zone *zone;
14563c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
14573c486871SAndrew Morton 	unsigned long flags;
14583c486871SAndrew Morton 
14593c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1460b2bd8598SDavid Rientjes 		if (assert_populated && !populated_zone(zone))
14613c486871SAndrew Morton 			continue;
14623c486871SAndrew Morton 
1463727c080fSVinayak Menon 		if (!nolock)
14643c486871SAndrew Morton 			spin_lock_irqsave(&zone->lock, flags);
14653c486871SAndrew Morton 		print(m, pgdat, zone);
1466727c080fSVinayak Menon 		if (!nolock)
14673c486871SAndrew Morton 			spin_unlock_irqrestore(&zone->lock, flags);
14683c486871SAndrew Morton 	}
14693c486871SAndrew Morton }
14703c486871SAndrew Morton #endif
14713c486871SAndrew Morton 
1472d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
frag_show_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)1473467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1474467c996cSMel Gorman 						struct zone *zone)
1475467c996cSMel Gorman {
1476467c996cSMel Gorman 	int order;
1477467c996cSMel Gorman 
1478f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1479fd377218SKirill A. Shutemov 	for (order = 0; order < NR_PAGE_ORDERS; ++order)
1480af1c31acSLiu Shixin 		/*
1481af1c31acSLiu Shixin 		 * Access to nr_free is lockless as nr_free is used only for
1482af1c31acSLiu Shixin 		 * printing purposes. Use data_race to avoid KCSAN warning.
1483af1c31acSLiu Shixin 		 */
1484af1c31acSLiu Shixin 		seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free));
1485f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1486f6ac2354SChristoph Lameter }
1487467c996cSMel Gorman 
1488467c996cSMel Gorman /*
1489467c996cSMel Gorman  * This walks the free areas for each zone.
1490467c996cSMel Gorman  */
frag_show(struct seq_file * m,void * arg)1491467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
1492467c996cSMel Gorman {
1493467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1494727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1495467c996cSMel Gorman 	return 0;
1496467c996cSMel Gorman }
1497467c996cSMel Gorman 
pagetypeinfo_showfree_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)1498467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
1499467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1500467c996cSMel Gorman {
1501467c996cSMel Gorman 	int order, mtype;
1502467c996cSMel Gorman 
1503467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1504467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
1505467c996cSMel Gorman 					pgdat->node_id,
1506467c996cSMel Gorman 					zone->name,
1507467c996cSMel Gorman 					migratetype_names[mtype]);
1508fd377218SKirill A. Shutemov 		for (order = 0; order < NR_PAGE_ORDERS; ++order) {
1509467c996cSMel Gorman 			unsigned long freecount = 0;
1510467c996cSMel Gorman 			struct free_area *area;
1511467c996cSMel Gorman 			struct list_head *curr;
151293b3a674SMichal Hocko 			bool overflow = false;
1513467c996cSMel Gorman 
1514467c996cSMel Gorman 			area = &(zone->free_area[order]);
1515467c996cSMel Gorman 
151693b3a674SMichal Hocko 			list_for_each(curr, &area->free_list[mtype]) {
151793b3a674SMichal Hocko 				/*
151893b3a674SMichal Hocko 				 * Cap the free_list iteration because it might
151993b3a674SMichal Hocko 				 * be really large and we are under a spinlock
152093b3a674SMichal Hocko 				 * so a long time spent here could trigger a
152193b3a674SMichal Hocko 				 * hard lockup detector. Anyway this is a
152293b3a674SMichal Hocko 				 * debugging tool so knowing there is a handful
152393b3a674SMichal Hocko 				 * of pages of this order should be more than
152493b3a674SMichal Hocko 				 * sufficient.
152593b3a674SMichal Hocko 				 */
152693b3a674SMichal Hocko 				if (++freecount >= 100000) {
152793b3a674SMichal Hocko 					overflow = true;
152893b3a674SMichal Hocko 					break;
152993b3a674SMichal Hocko 				}
153093b3a674SMichal Hocko 			}
153193b3a674SMichal Hocko 			seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
153293b3a674SMichal Hocko 			spin_unlock_irq(&zone->lock);
153393b3a674SMichal Hocko 			cond_resched();
153493b3a674SMichal Hocko 			spin_lock_irq(&zone->lock);
1535467c996cSMel Gorman 		}
1536467c996cSMel Gorman 		seq_putc(m, '\n');
1537467c996cSMel Gorman 	}
1538467c996cSMel Gorman }
1539467c996cSMel Gorman 
1540467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
pagetypeinfo_showfree(struct seq_file * m,void * arg)154133090af9SMiaohe Lin static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
1542467c996cSMel Gorman {
1543467c996cSMel Gorman 	int order;
1544467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1545467c996cSMel Gorman 
1546467c996cSMel Gorman 	/* Print header */
1547467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1548fd377218SKirill A. Shutemov 	for (order = 0; order < NR_PAGE_ORDERS; ++order)
1549467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
1550467c996cSMel Gorman 	seq_putc(m, '\n');
1551467c996cSMel Gorman 
1552727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1553467c996cSMel Gorman }
1554467c996cSMel Gorman 
pagetypeinfo_showblockcount_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)1555467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1556467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1557467c996cSMel Gorman {
1558467c996cSMel Gorman 	int mtype;
1559467c996cSMel Gorman 	unsigned long pfn;
1560467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1561108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1562467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1563467c996cSMel Gorman 
1564467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1565467c996cSMel Gorman 		struct page *page;
1566467c996cSMel Gorman 
1567d336e94eSMichal Hocko 		page = pfn_to_online_page(pfn);
1568d336e94eSMichal Hocko 		if (!page)
1569467c996cSMel Gorman 			continue;
1570467c996cSMel Gorman 
1571a91c43c7SJoonsoo Kim 		if (page_zone(page) != zone)
1572a91c43c7SJoonsoo Kim 			continue;
1573a91c43c7SJoonsoo Kim 
1574467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1575467c996cSMel Gorman 
1576e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1577467c996cSMel Gorman 			count[mtype]++;
1578467c996cSMel Gorman 	}
1579467c996cSMel Gorman 
1580467c996cSMel Gorman 	/* Print counts */
1581467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1582467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1583467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1584467c996cSMel Gorman 	seq_putc(m, '\n');
1585467c996cSMel Gorman }
1586467c996cSMel Gorman 
1587f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */
pagetypeinfo_showblockcount(struct seq_file * m,void * arg)158833090af9SMiaohe Lin static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1589467c996cSMel Gorman {
1590467c996cSMel Gorman 	int mtype;
1591467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1592467c996cSMel Gorman 
1593467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1594467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1595467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1596467c996cSMel Gorman 	seq_putc(m, '\n');
1597727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false,
1598727c080fSVinayak Menon 		pagetypeinfo_showblockcount_print);
1599467c996cSMel Gorman }
1600467c996cSMel Gorman 
160148c96a36SJoonsoo Kim /*
160248c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
160348c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
160448c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
160548c96a36SJoonsoo Kim  * to determine what is going on
160648c96a36SJoonsoo Kim  */
pagetypeinfo_showmixedcount(struct seq_file * m,pg_data_t * pgdat)160748c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
160848c96a36SJoonsoo Kim {
160948c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
161048c96a36SJoonsoo Kim 	int mtype;
161148c96a36SJoonsoo Kim 
16127dd80b8aSVlastimil Babka 	if (!static_branch_unlikely(&page_owner_inited))
161348c96a36SJoonsoo Kim 		return;
161448c96a36SJoonsoo Kim 
161548c96a36SJoonsoo Kim 	drain_all_pages(NULL);
161648c96a36SJoonsoo Kim 
161748c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
161848c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
161948c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
162048c96a36SJoonsoo Kim 	seq_putc(m, '\n');
162148c96a36SJoonsoo Kim 
1622727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, true,
1623727c080fSVinayak Menon 		pagetypeinfo_showmixedcount_print);
162448c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
162548c96a36SJoonsoo Kim }
162648c96a36SJoonsoo Kim 
1627467c996cSMel Gorman /*
1628467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1629467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1630467c996cSMel Gorman  */
pagetypeinfo_show(struct seq_file * m,void * arg)1631467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1632467c996cSMel Gorman {
1633467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1634467c996cSMel Gorman 
163541b25a37SKOSAKI Motohiro 	/* check memoryless node */
1636a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
163741b25a37SKOSAKI Motohiro 		return 0;
163841b25a37SKOSAKI Motohiro 
1639467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1640467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1641467c996cSMel Gorman 	seq_putc(m, '\n');
1642467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1643467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
164448c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1645467c996cSMel Gorman 
1646f6ac2354SChristoph Lameter 	return 0;
1647f6ac2354SChristoph Lameter }
1648f6ac2354SChristoph Lameter 
16498f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1650f6ac2354SChristoph Lameter 	.start	= frag_start,
1651f6ac2354SChristoph Lameter 	.next	= frag_next,
1652f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1653f6ac2354SChristoph Lameter 	.show	= frag_show,
1654f6ac2354SChristoph Lameter };
1655f6ac2354SChristoph Lameter 
165674e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1657467c996cSMel Gorman 	.start	= frag_start,
1658467c996cSMel Gorman 	.next	= frag_next,
1659467c996cSMel Gorman 	.stop	= frag_stop,
1660467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1661467c996cSMel Gorman };
1662467c996cSMel Gorman 
is_zone_first_populated(pg_data_t * pgdat,struct zone * zone)1663e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1664e2ecc8a7SMel Gorman {
1665e2ecc8a7SMel Gorman 	int zid;
1666e2ecc8a7SMel Gorman 
1667e2ecc8a7SMel Gorman 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1668e2ecc8a7SMel Gorman 		struct zone *compare = &pgdat->node_zones[zid];
1669e2ecc8a7SMel Gorman 
1670e2ecc8a7SMel Gorman 		if (populated_zone(compare))
1671e2ecc8a7SMel Gorman 			return zone == compare;
1672e2ecc8a7SMel Gorman 	}
1673e2ecc8a7SMel Gorman 
1674e2ecc8a7SMel Gorman 	return false;
1675e2ecc8a7SMel Gorman }
1676e2ecc8a7SMel Gorman 
zoneinfo_show_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)1677467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1678467c996cSMel Gorman 							struct zone *zone)
1679f6ac2354SChristoph Lameter {
1680f6ac2354SChristoph Lameter 	int i;
1681f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1682e2ecc8a7SMel Gorman 	if (is_zone_first_populated(pgdat, zone)) {
1683e2ecc8a7SMel Gorman 		seq_printf(m, "\n  per-node stats");
1684e2ecc8a7SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
168569473e5dSMuchun Song 			unsigned long pages = node_page_state_pages(pgdat, i);
168669473e5dSMuchun Song 
168769473e5dSMuchun Song 			if (vmstat_item_print_in_thp(i))
168869473e5dSMuchun Song 				pages /= HPAGE_PMD_NR;
16899d7ea9a2SKonstantin Khlebnikov 			seq_printf(m, "\n      %-12s %lu", node_stat_name(i),
169069473e5dSMuchun Song 				   pages);
1691e2ecc8a7SMel Gorman 		}
1692e2ecc8a7SMel Gorman 	}
1693f6ac2354SChristoph Lameter 	seq_printf(m,
1694f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1695a6ea8b5bSLiangcai Fan 		   "\n        boost    %lu"
1696f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1697f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1698f6ac2354SChristoph Lameter 		   "\n        high     %lu"
1699f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
17009feedc9dSJiang Liu 		   "\n        present  %lu"
17013c381db1SDavid Hildenbrand 		   "\n        managed  %lu"
17023c381db1SDavid Hildenbrand 		   "\n        cma      %lu",
170388f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
1704a6ea8b5bSLiangcai Fan 		   zone->watermark_boost,
170541858966SMel Gorman 		   min_wmark_pages(zone),
170641858966SMel Gorman 		   low_wmark_pages(zone),
170741858966SMel Gorman 		   high_wmark_pages(zone),
1708f6ac2354SChristoph Lameter 		   zone->spanned_pages,
17099feedc9dSJiang Liu 		   zone->present_pages,
17103c381db1SDavid Hildenbrand 		   zone_managed_pages(zone),
17113c381db1SDavid Hildenbrand 		   zone_cma_pages(zone));
17122244b95aSChristoph Lameter 
1713f6ac2354SChristoph Lameter 	seq_printf(m,
17143484b2deSMel Gorman 		   "\n        protection: (%ld",
1715f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1716f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
17173484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
17187dfb8bf3SDavid Rientjes 	seq_putc(m, ')');
17197dfb8bf3SDavid Rientjes 
1720a8a4b7aeSBaoquan He 	/* If unpopulated, no other information is useful */
1721a8a4b7aeSBaoquan He 	if (!populated_zone(zone)) {
1722a8a4b7aeSBaoquan He 		seq_putc(m, '\n');
1723a8a4b7aeSBaoquan He 		return;
1724a8a4b7aeSBaoquan He 	}
1725a8a4b7aeSBaoquan He 
17267dfb8bf3SDavid Rientjes 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
17279d7ea9a2SKonstantin Khlebnikov 		seq_printf(m, "\n      %-12s %lu", zone_stat_name(i),
17287dfb8bf3SDavid Rientjes 			   zone_page_state(zone, i));
17297dfb8bf3SDavid Rientjes 
17303a321d2aSKemi Wang #ifdef CONFIG_NUMA
1731f19298b9SMel Gorman 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
17329d7ea9a2SKonstantin Khlebnikov 		seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
1733f19298b9SMel Gorman 			   zone_numa_event_state(zone, i));
17343a321d2aSKemi Wang #endif
17353a321d2aSKemi Wang 
17367dfb8bf3SDavid Rientjes 	seq_printf(m, "\n  pagesets");
1737f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
173828f836b6SMel Gorman 		struct per_cpu_pages *pcp;
173928f836b6SMel Gorman 		struct per_cpu_zonestat __maybe_unused *pzstats;
1740f6ac2354SChristoph Lameter 
174128f836b6SMel Gorman 		pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
1742f6ac2354SChristoph Lameter 		seq_printf(m,
17433dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1744f6ac2354SChristoph Lameter 			   "\n              count: %i"
1745f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1746f6ac2354SChristoph Lameter 			   "\n              batch: %i",
17473dfa5721SChristoph Lameter 			   i,
174828f836b6SMel Gorman 			   pcp->count,
174928f836b6SMel Gorman 			   pcp->high,
175028f836b6SMel Gorman 			   pcp->batch);
1751df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
175228f836b6SMel Gorman 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
1753df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
175428f836b6SMel Gorman 				pzstats->stat_threshold);
1755df9ecabaSChristoph Lameter #endif
1756f6ac2354SChristoph Lameter 	}
1757f6ac2354SChristoph Lameter 	seq_printf(m,
1758599d0c95SMel Gorman 		   "\n  node_unreclaimable:  %u"
17593a50d14dSAndrey Ryabinin 		   "\n  start_pfn:           %lu",
1760c73322d0SJohannes Weiner 		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
17613a50d14dSAndrey Ryabinin 		   zone->zone_start_pfn);
1762f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1763f6ac2354SChristoph Lameter }
1764467c996cSMel Gorman 
1765467c996cSMel Gorman /*
1766b2bd8598SDavid Rientjes  * Output information about zones in @pgdat.  All zones are printed regardless
1767b2bd8598SDavid Rientjes  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1768b2bd8598SDavid Rientjes  * set of all zones and userspace would not be aware of such zones if they are
1769b2bd8598SDavid Rientjes  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1770467c996cSMel Gorman  */
zoneinfo_show(struct seq_file * m,void * arg)1771467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1772467c996cSMel Gorman {
1773467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1774727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1775f6ac2354SChristoph Lameter 	return 0;
1776f6ac2354SChristoph Lameter }
1777f6ac2354SChristoph Lameter 
17785c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1779f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1780f6ac2354SChristoph Lameter 			       * fragmentation. */
1781f6ac2354SChristoph Lameter 	.next	= frag_next,
1782f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1783f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1784f6ac2354SChristoph Lameter };
1785f6ac2354SChristoph Lameter 
17869d7ea9a2SKonstantin Khlebnikov #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1787f19298b9SMel Gorman 			 NR_VM_NUMA_EVENT_ITEMS + \
17889d7ea9a2SKonstantin Khlebnikov 			 NR_VM_NODE_STAT_ITEMS + \
17899d7ea9a2SKonstantin Khlebnikov 			 NR_VM_WRITEBACK_STAT_ITEMS + \
17909d7ea9a2SKonstantin Khlebnikov 			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
17919d7ea9a2SKonstantin Khlebnikov 			  NR_VM_EVENT_ITEMS : 0))
179279da826aSMichael Rubin 
vmstat_start(struct seq_file * m,loff_t * pos)1793f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1794f6ac2354SChristoph Lameter {
17952244b95aSChristoph Lameter 	unsigned long *v;
17969d7ea9a2SKonstantin Khlebnikov 	int i;
1797f6ac2354SChristoph Lameter 
17989d7ea9a2SKonstantin Khlebnikov 	if (*pos >= NR_VMSTAT_ITEMS)
1799f6ac2354SChristoph Lameter 		return NULL;
1800f6ac2354SChristoph Lameter 
18019d7ea9a2SKonstantin Khlebnikov 	BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1802f19298b9SMel Gorman 	fold_vm_numa_events();
18039d7ea9a2SKonstantin Khlebnikov 	v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
18042244b95aSChristoph Lameter 	m->private = v;
18052244b95aSChristoph Lameter 	if (!v)
1806f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
18072244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1808c41f012aSMichal Hocko 		v[i] = global_zone_page_state(i);
180979da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
181079da826aSMichael Rubin 
18113a321d2aSKemi Wang #ifdef CONFIG_NUMA
1812f19298b9SMel Gorman 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1813f19298b9SMel Gorman 		v[i] = global_numa_event_state(i);
1814f19298b9SMel Gorman 	v += NR_VM_NUMA_EVENT_ITEMS;
18153a321d2aSKemi Wang #endif
18163a321d2aSKemi Wang 
181769473e5dSMuchun Song 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1818ea426c2aSRoman Gushchin 		v[i] = global_node_page_state_pages(i);
181969473e5dSMuchun Song 		if (vmstat_item_print_in_thp(i))
182069473e5dSMuchun Song 			v[i] /= HPAGE_PMD_NR;
182169473e5dSMuchun Song 	}
182275ef7184SMel Gorman 	v += NR_VM_NODE_STAT_ITEMS;
182375ef7184SMel Gorman 
182479da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
182579da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
182679da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
182779da826aSMichael Rubin 
1828f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
182979da826aSMichael Rubin 	all_vm_events(v);
183079da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
183179da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1832f8891e5eSChristoph Lameter #endif
1833ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1834f6ac2354SChristoph Lameter }
1835f6ac2354SChristoph Lameter 
vmstat_next(struct seq_file * m,void * arg,loff_t * pos)1836f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1837f6ac2354SChristoph Lameter {
1838f6ac2354SChristoph Lameter 	(*pos)++;
18399d7ea9a2SKonstantin Khlebnikov 	if (*pos >= NR_VMSTAT_ITEMS)
1840f6ac2354SChristoph Lameter 		return NULL;
1841f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1842f6ac2354SChristoph Lameter }
1843f6ac2354SChristoph Lameter 
vmstat_show(struct seq_file * m,void * arg)1844f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1845f6ac2354SChristoph Lameter {
1846f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1847f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
184868ba0326SAlexey Dobriyan 
184968ba0326SAlexey Dobriyan 	seq_puts(m, vmstat_text[off]);
185075ba1d07SJoe Perches 	seq_put_decimal_ull(m, " ", *l);
185168ba0326SAlexey Dobriyan 	seq_putc(m, '\n');
18528d92890bSNeilBrown 
18538d92890bSNeilBrown 	if (off == NR_VMSTAT_ITEMS - 1) {
18548d92890bSNeilBrown 		/*
18558d92890bSNeilBrown 		 * We've come to the end - add any deprecated counters to avoid
18568d92890bSNeilBrown 		 * breaking userspace which might depend on them being present.
18578d92890bSNeilBrown 		 */
18588d92890bSNeilBrown 		seq_puts(m, "nr_unstable 0\n");
18598d92890bSNeilBrown 	}
1860f6ac2354SChristoph Lameter 	return 0;
1861f6ac2354SChristoph Lameter }
1862f6ac2354SChristoph Lameter 
vmstat_stop(struct seq_file * m,void * arg)1863f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1864f6ac2354SChristoph Lameter {
1865f6ac2354SChristoph Lameter 	kfree(m->private);
1866f6ac2354SChristoph Lameter 	m->private = NULL;
1867f6ac2354SChristoph Lameter }
1868f6ac2354SChristoph Lameter 
1869b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1870f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1871f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1872f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1873f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1874f6ac2354SChristoph Lameter };
1875f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1876f6ac2354SChristoph Lameter 
1877df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1878d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
187977461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1880d1187ed2SChristoph Lameter 
188152b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS
refresh_vm_stats(struct work_struct * work)188252b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work)
188352b6f46bSHugh Dickins {
188452b6f46bSHugh Dickins 	refresh_cpu_vm_stats(true);
188552b6f46bSHugh Dickins }
188652b6f46bSHugh Dickins 
vmstat_refresh(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)188752b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write,
188832927393SChristoph Hellwig 		   void *buffer, size_t *lenp, loff_t *ppos)
188952b6f46bSHugh Dickins {
189052b6f46bSHugh Dickins 	long val;
189152b6f46bSHugh Dickins 	int err;
189252b6f46bSHugh Dickins 	int i;
189352b6f46bSHugh Dickins 
189452b6f46bSHugh Dickins 	/*
189552b6f46bSHugh Dickins 	 * The regular update, every sysctl_stat_interval, may come later
189652b6f46bSHugh Dickins 	 * than expected: leaving a significant amount in per_cpu buckets.
189752b6f46bSHugh Dickins 	 * This is particularly misleading when checking a quantity of HUGE
189852b6f46bSHugh Dickins 	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
189952b6f46bSHugh Dickins 	 * which can equally be echo'ed to or cat'ted from (by root),
190052b6f46bSHugh Dickins 	 * can be used to update the stats just before reading them.
190152b6f46bSHugh Dickins 	 *
1902c41f012aSMichal Hocko 	 * Oh, and since global_zone_page_state() etc. are so careful to hide
190352b6f46bSHugh Dickins 	 * transiently negative values, report an error here if any of
190452b6f46bSHugh Dickins 	 * the stats is negative, so we know to go looking for imbalance.
190552b6f46bSHugh Dickins 	 */
190652b6f46bSHugh Dickins 	err = schedule_on_each_cpu(refresh_vm_stats);
190752b6f46bSHugh Dickins 	if (err)
190852b6f46bSHugh Dickins 		return err;
190952b6f46bSHugh Dickins 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
191075083aaeSHugh Dickins 		/*
191175083aaeSHugh Dickins 		 * Skip checking stats known to go negative occasionally.
191275083aaeSHugh Dickins 		 */
191375083aaeSHugh Dickins 		switch (i) {
191475083aaeSHugh Dickins 		case NR_ZONE_WRITE_PENDING:
191575083aaeSHugh Dickins 		case NR_FREE_CMA_PAGES:
191675083aaeSHugh Dickins 			continue;
191775083aaeSHugh Dickins 		}
191875ef7184SMel Gorman 		val = atomic_long_read(&vm_zone_stat[i]);
191952b6f46bSHugh Dickins 		if (val < 0) {
192052b6f46bSHugh Dickins 			pr_warn("%s: %s %ld\n",
19219d7ea9a2SKonstantin Khlebnikov 				__func__, zone_stat_name(i), val);
192252b6f46bSHugh Dickins 		}
192352b6f46bSHugh Dickins 	}
192476d8cc3cSHugh Dickins 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
192575083aaeSHugh Dickins 		/*
192675083aaeSHugh Dickins 		 * Skip checking stats known to go negative occasionally.
192775083aaeSHugh Dickins 		 */
192875083aaeSHugh Dickins 		switch (i) {
192975083aaeSHugh Dickins 		case NR_WRITEBACK:
193075083aaeSHugh Dickins 			continue;
193175083aaeSHugh Dickins 		}
193276d8cc3cSHugh Dickins 		val = atomic_long_read(&vm_node_stat[i]);
193376d8cc3cSHugh Dickins 		if (val < 0) {
193476d8cc3cSHugh Dickins 			pr_warn("%s: %s %ld\n",
193576d8cc3cSHugh Dickins 				__func__, node_stat_name(i), val);
193676d8cc3cSHugh Dickins 		}
193776d8cc3cSHugh Dickins 	}
193852b6f46bSHugh Dickins 	if (write)
193952b6f46bSHugh Dickins 		*ppos += *lenp;
194052b6f46bSHugh Dickins 	else
194152b6f46bSHugh Dickins 		*lenp = 0;
194252b6f46bSHugh Dickins 	return 0;
194352b6f46bSHugh Dickins }
194452b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */
194552b6f46bSHugh Dickins 
vmstat_update(struct work_struct * w)1946d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1947d1187ed2SChristoph Lameter {
19480eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
19497cc36bbdSChristoph Lameter 		/*
19507cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
19517cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
19527cc36bbdSChristoph Lameter 		 * update worker thread.
19537cc36bbdSChristoph Lameter 		 */
1954ce612879SMichal Hocko 		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1955176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
195698f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1957f01f17d3SMichal Hocko 	}
1958d1187ed2SChristoph Lameter }
1959d1187ed2SChristoph Lameter 
19607cc36bbdSChristoph Lameter /*
19617cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
19627cc36bbdSChristoph Lameter  * an update is needed.
19637cc36bbdSChristoph Lameter  */
need_update(int cpu)19647cc36bbdSChristoph Lameter static bool need_update(int cpu)
1965d1187ed2SChristoph Lameter {
19662bbd00aeSJohannes Weiner 	pg_data_t *last_pgdat = NULL;
19677cc36bbdSChristoph Lameter 	struct zone *zone;
1968d1187ed2SChristoph Lameter 
19697cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
197028f836b6SMel Gorman 		struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
19712bbd00aeSJohannes Weiner 		struct per_cpu_nodestat *n;
197228f836b6SMel Gorman 
19737cc36bbdSChristoph Lameter 		/*
19747cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
19757cc36bbdSChristoph Lameter 		 */
197664632fd3SMiaohe Lin 		if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
19777cc36bbdSChristoph Lameter 			return true;
1978f19298b9SMel Gorman 
19792bbd00aeSJohannes Weiner 		if (last_pgdat == zone->zone_pgdat)
19802bbd00aeSJohannes Weiner 			continue;
19812bbd00aeSJohannes Weiner 		last_pgdat = zone->zone_pgdat;
19822bbd00aeSJohannes Weiner 		n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
198364632fd3SMiaohe Lin 		if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
19842bbd00aeSJohannes Weiner 			return true;
19857cc36bbdSChristoph Lameter 	}
19867cc36bbdSChristoph Lameter 	return false;
19877cc36bbdSChristoph Lameter }
19887cc36bbdSChristoph Lameter 
19897b8da4c7SChristoph Lameter /*
19907b8da4c7SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
19917b8da4c7SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
19927b8da4c7SChristoph Lameter  * invoked when tick processing is not active.
19937b8da4c7SChristoph Lameter  */
quiet_vmstat(void)1994f01f17d3SMichal Hocko void quiet_vmstat(void)
1995f01f17d3SMichal Hocko {
1996f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1997f01f17d3SMichal Hocko 		return;
1998f01f17d3SMichal Hocko 
19997b8da4c7SChristoph Lameter 	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
2000f01f17d3SMichal Hocko 		return;
2001f01f17d3SMichal Hocko 
2002f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
2003f01f17d3SMichal Hocko 		return;
2004f01f17d3SMichal Hocko 
2005f01f17d3SMichal Hocko 	/*
2006f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
2007f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
2008f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
2009f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
2010f01f17d3SMichal Hocko 	 */
2011f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
2012f01f17d3SMichal Hocko }
2013f01f17d3SMichal Hocko 
20147cc36bbdSChristoph Lameter /*
20157cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
20167cc36bbdSChristoph Lameter  * differentials of processors that have their worker
20177cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
20187cc36bbdSChristoph Lameter  * inactivity.
20197cc36bbdSChristoph Lameter  */
20207cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
20217cc36bbdSChristoph Lameter 
20220eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
20237cc36bbdSChristoph Lameter 
vmstat_shepherd(struct work_struct * w)20247cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
20257cc36bbdSChristoph Lameter {
20267cc36bbdSChristoph Lameter 	int cpu;
20277cc36bbdSChristoph Lameter 
20287625eccdSSebastian Andrzej Siewior 	cpus_read_lock();
20297cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
20307b8da4c7SChristoph Lameter 	for_each_online_cpu(cpu) {
2031f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
20327cc36bbdSChristoph Lameter 
2033be5e015dSMarcelo Tosatti 		/*
2034be5e015dSMarcelo Tosatti 		 * In kernel users of vmstat counters either require the precise value and
2035be5e015dSMarcelo Tosatti 		 * they are using zone_page_state_snapshot interface or they can live with
2036be5e015dSMarcelo Tosatti 		 * an imprecision as the regular flushing can happen at arbitrary time and
2037be5e015dSMarcelo Tosatti 		 * cumulative error can grow (see calculate_normal_threshold).
2038be5e015dSMarcelo Tosatti 		 *
2039be5e015dSMarcelo Tosatti 		 * From that POV the regular flushing can be postponed for CPUs that have
2040be5e015dSMarcelo Tosatti 		 * been isolated from the kernel interference without critical
2041be5e015dSMarcelo Tosatti 		 * infrastructure ever noticing. Skip regular flushing from vmstat_shepherd
2042be5e015dSMarcelo Tosatti 		 * for all isolated CPUs to avoid interference with the isolated workload.
2043be5e015dSMarcelo Tosatti 		 */
2044be5e015dSMarcelo Tosatti 		if (cpu_is_isolated(cpu))
2045be5e015dSMarcelo Tosatti 			continue;
2046be5e015dSMarcelo Tosatti 
20477b8da4c7SChristoph Lameter 		if (!delayed_work_pending(dw) && need_update(cpu))
2048ce612879SMichal Hocko 			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
2049fbcc8183SJiang Biao 
2050fbcc8183SJiang Biao 		cond_resched();
2051f01f17d3SMichal Hocko 	}
20527625eccdSSebastian Andrzej Siewior 	cpus_read_unlock();
20537cc36bbdSChristoph Lameter 
20547cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
20557cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
20567cc36bbdSChristoph Lameter }
20577cc36bbdSChristoph Lameter 
start_shepherd_timer(void)20587cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
20597cc36bbdSChristoph Lameter {
20607cc36bbdSChristoph Lameter 	int cpu;
20617cc36bbdSChristoph Lameter 
20627cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
2063ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
20647cc36bbdSChristoph Lameter 			vmstat_update);
20657cc36bbdSChristoph Lameter 
20667cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
20677cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
2068d1187ed2SChristoph Lameter }
2069d1187ed2SChristoph Lameter 
init_cpu_node_state(void)207003e86dbaSTim Chen static void __init init_cpu_node_state(void)
207103e86dbaSTim Chen {
20724c501327SSebastian Andrzej Siewior 	int node;
207303e86dbaSTim Chen 
20744c501327SSebastian Andrzej Siewior 	for_each_online_node(node) {
2075b55032f1SYury Norov 		if (!cpumask_empty(cpumask_of_node(node)))
20764c501327SSebastian Andrzej Siewior 			node_set_state(node, N_CPU);
20774c501327SSebastian Andrzej Siewior 	}
207803e86dbaSTim Chen }
207903e86dbaSTim Chen 
vmstat_cpu_online(unsigned int cpu)20805438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu)
2081807a1bd2SToshi Kani {
20825ee28a44SKAMEZAWA Hiroyuki 	refresh_zone_stat_thresholds();
2083734c1570SOscar Salvador 
2084734c1570SOscar Salvador 	if (!node_state(cpu_to_node(cpu), N_CPU)) {
2085ad596925SChristoph Lameter 		node_set_state(cpu_to_node(cpu), N_CPU);
2086734c1570SOscar Salvador 	}
2087734c1570SOscar Salvador 
20885438da97SSebastian Andrzej Siewior 	return 0;
2089df9ecabaSChristoph Lameter }
2090df9ecabaSChristoph Lameter 
vmstat_cpu_down_prep(unsigned int cpu)20915438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu)
20925438da97SSebastian Andrzej Siewior {
20935438da97SSebastian Andrzej Siewior 	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
20945438da97SSebastian Andrzej Siewior 	return 0;
20955438da97SSebastian Andrzej Siewior }
20965438da97SSebastian Andrzej Siewior 
vmstat_cpu_dead(unsigned int cpu)20975438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu)
20985438da97SSebastian Andrzej Siewior {
20995438da97SSebastian Andrzej Siewior 	const struct cpumask *node_cpus;
21005438da97SSebastian Andrzej Siewior 	int node;
21015438da97SSebastian Andrzej Siewior 
21025438da97SSebastian Andrzej Siewior 	node = cpu_to_node(cpu);
21035438da97SSebastian Andrzej Siewior 
21045438da97SSebastian Andrzej Siewior 	refresh_zone_stat_thresholds();
21055438da97SSebastian Andrzej Siewior 	node_cpus = cpumask_of_node(node);
2106b55032f1SYury Norov 	if (!cpumask_empty(node_cpus))
21075438da97SSebastian Andrzej Siewior 		return 0;
21085438da97SSebastian Andrzej Siewior 
21095438da97SSebastian Andrzej Siewior 	node_clear_state(node, N_CPU);
2110734c1570SOscar Salvador 
21115438da97SSebastian Andrzej Siewior 	return 0;
21125438da97SSebastian Andrzej Siewior }
21135438da97SSebastian Andrzej Siewior 
21148f32f7e5SAlexey Dobriyan #endif
2115df9ecabaSChristoph Lameter 
2116ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq;
2117ce612879SMichal Hocko 
init_mm_internals(void)2118597b7305SMichal Hocko void __init init_mm_internals(void)
2119df9ecabaSChristoph Lameter {
2120ce612879SMichal Hocko 	int ret __maybe_unused;
21215438da97SSebastian Andrzej Siewior 
212280d136e1SMichal Hocko 	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2123ce612879SMichal Hocko 
2124ce612879SMichal Hocko #ifdef CONFIG_SMP
21255438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
21265438da97SSebastian Andrzej Siewior 					NULL, vmstat_cpu_dead);
21275438da97SSebastian Andrzej Siewior 	if (ret < 0)
21285438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'dead' hotplug state\n");
21295438da97SSebastian Andrzej Siewior 
21305438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
21315438da97SSebastian Andrzej Siewior 					vmstat_cpu_online,
21325438da97SSebastian Andrzej Siewior 					vmstat_cpu_down_prep);
21335438da97SSebastian Andrzej Siewior 	if (ret < 0)
21345438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'online' hotplug state\n");
21355438da97SSebastian Andrzej Siewior 
21367625eccdSSebastian Andrzej Siewior 	cpus_read_lock();
213703e86dbaSTim Chen 	init_cpu_node_state();
21387625eccdSSebastian Andrzej Siewior 	cpus_read_unlock();
2139d1187ed2SChristoph Lameter 
21407cc36bbdSChristoph Lameter 	start_shepherd_timer();
21418f32f7e5SAlexey Dobriyan #endif
21428f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
2143fddda2b7SChristoph Hellwig 	proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2144abaed011SMichal Hocko 	proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2145fddda2b7SChristoph Hellwig 	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2146fddda2b7SChristoph Hellwig 	proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
21478f32f7e5SAlexey Dobriyan #endif
2148df9ecabaSChristoph Lameter }
2149d7a5752cSMel Gorman 
2150d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2151d7a5752cSMel Gorman 
2152d7a5752cSMel Gorman /*
2153d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
2154d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
2155d7a5752cSMel Gorman  */
unusable_free_index(unsigned int order,struct contig_page_info * info)2156d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
2157d7a5752cSMel Gorman 				struct contig_page_info *info)
2158d7a5752cSMel Gorman {
2159d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
2160d7a5752cSMel Gorman 	if (info->free_pages == 0)
2161d7a5752cSMel Gorman 		return 1000;
2162d7a5752cSMel Gorman 
2163d7a5752cSMel Gorman 	/*
2164d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
2165d7a5752cSMel Gorman 	 * decimal places.
2166d7a5752cSMel Gorman 	 *
2167d7a5752cSMel Gorman 	 * 0 => no fragmentation
2168d7a5752cSMel Gorman 	 * 1 => high fragmentation
2169d7a5752cSMel Gorman 	 */
2170d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2171d7a5752cSMel Gorman 
2172d7a5752cSMel Gorman }
2173d7a5752cSMel Gorman 
unusable_show_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)2174d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
2175d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2176d7a5752cSMel Gorman {
2177d7a5752cSMel Gorman 	unsigned int order;
2178d7a5752cSMel Gorman 	int index;
2179d7a5752cSMel Gorman 	struct contig_page_info info;
2180d7a5752cSMel Gorman 
2181d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2182d7a5752cSMel Gorman 				pgdat->node_id,
2183d7a5752cSMel Gorman 				zone->name);
2184fd377218SKirill A. Shutemov 	for (order = 0; order < NR_PAGE_ORDERS; ++order) {
2185d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
2186d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
2187d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2188d7a5752cSMel Gorman 	}
2189d7a5752cSMel Gorman 
2190d7a5752cSMel Gorman 	seq_putc(m, '\n');
2191d7a5752cSMel Gorman }
2192d7a5752cSMel Gorman 
2193d7a5752cSMel Gorman /*
2194d7a5752cSMel Gorman  * Display unusable free space index
2195d7a5752cSMel Gorman  *
2196d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
2197d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
2198d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
2199d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
2200d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
2201d7a5752cSMel Gorman  */
unusable_show(struct seq_file * m,void * arg)2202d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
2203d7a5752cSMel Gorman {
2204d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2205d7a5752cSMel Gorman 
2206d7a5752cSMel Gorman 	/* check memoryless node */
2207a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
2208d7a5752cSMel Gorman 		return 0;
2209d7a5752cSMel Gorman 
2210727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2211d7a5752cSMel Gorman 
2212d7a5752cSMel Gorman 	return 0;
2213d7a5752cSMel Gorman }
2214d7a5752cSMel Gorman 
221501a99560SKefeng Wang static const struct seq_operations unusable_sops = {
2216d7a5752cSMel Gorman 	.start	= frag_start,
2217d7a5752cSMel Gorman 	.next	= frag_next,
2218d7a5752cSMel Gorman 	.stop	= frag_stop,
2219d7a5752cSMel Gorman 	.show	= unusable_show,
2220d7a5752cSMel Gorman };
2221d7a5752cSMel Gorman 
222201a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(unusable);
2223d7a5752cSMel Gorman 
extfrag_show_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)2224f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
2225f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2226f1a5ab12SMel Gorman {
2227f1a5ab12SMel Gorman 	unsigned int order;
2228f1a5ab12SMel Gorman 	int index;
2229f1a5ab12SMel Gorman 
2230f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
2231f1a5ab12SMel Gorman 	struct contig_page_info info;
2232f1a5ab12SMel Gorman 
2233f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2234f1a5ab12SMel Gorman 				pgdat->node_id,
2235f1a5ab12SMel Gorman 				zone->name);
2236fd377218SKirill A. Shutemov 	for (order = 0; order < NR_PAGE_ORDERS; ++order) {
2237f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
223856de7263SMel Gorman 		index = __fragmentation_index(order, &info);
2239a9970586SLin Feng 		seq_printf(m, "%2d.%03d ", index / 1000, index % 1000);
2240f1a5ab12SMel Gorman 	}
2241f1a5ab12SMel Gorman 
2242f1a5ab12SMel Gorman 	seq_putc(m, '\n');
2243f1a5ab12SMel Gorman }
2244f1a5ab12SMel Gorman 
2245f1a5ab12SMel Gorman /*
2246f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
2247f1a5ab12SMel Gorman  */
extfrag_show(struct seq_file * m,void * arg)2248f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
2249f1a5ab12SMel Gorman {
2250f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2251f1a5ab12SMel Gorman 
2252727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2253f1a5ab12SMel Gorman 
2254f1a5ab12SMel Gorman 	return 0;
2255f1a5ab12SMel Gorman }
2256f1a5ab12SMel Gorman 
225701a99560SKefeng Wang static const struct seq_operations extfrag_sops = {
2258f1a5ab12SMel Gorman 	.start	= frag_start,
2259f1a5ab12SMel Gorman 	.next	= frag_next,
2260f1a5ab12SMel Gorman 	.stop	= frag_stop,
2261f1a5ab12SMel Gorman 	.show	= extfrag_show,
2262f1a5ab12SMel Gorman };
2263f1a5ab12SMel Gorman 
226401a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(extfrag);
2265f1a5ab12SMel Gorman 
extfrag_debug_init(void)2266d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
2267d7a5752cSMel Gorman {
2268bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
2269bde8bd8aSSasikantha babu 
2270d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2271d7a5752cSMel Gorman 
2272d9f7979cSGreg Kroah-Hartman 	debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
227301a99560SKefeng Wang 			    &unusable_fops);
2274d7a5752cSMel Gorman 
2275d9f7979cSGreg Kroah-Hartman 	debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
227601a99560SKefeng Wang 			    &extfrag_fops);
2277f1a5ab12SMel Gorman 
2278d7a5752cSMel Gorman 	return 0;
2279d7a5752cSMel Gorman }
2280d7a5752cSMel Gorman 
2281d7a5752cSMel Gorman module_init(extfrag_debug_init);
2282d7a5752cSMel Gorman #endif
2283