xref: /linux/mm/vmstat.c (revision c68ed7945701a38f2121ed74e23ff19c2052b4c2)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2f6ac2354SChristoph Lameter /*
3f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
4f6ac2354SChristoph Lameter  *
5f6ac2354SChristoph Lameter  *  Manages VM statistics
6f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
72244b95aSChristoph Lameter  *
82244b95aSChristoph Lameter  *  zoned VM statistics
92244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
102244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
117cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
12f6ac2354SChristoph Lameter  */
138f32f7e5SAlexey Dobriyan #include <linux/fs.h>
14f6ac2354SChristoph Lameter #include <linux/mm.h>
154e950f6fSAlexey Dobriyan #include <linux/err.h>
162244b95aSChristoph Lameter #include <linux/module.h>
175a0e3ad6STejun Heo #include <linux/slab.h>
18df9ecabaSChristoph Lameter #include <linux/cpu.h>
197cc36bbdSChristoph Lameter #include <linux/cpumask.h>
20c748e134SAdrian Bunk #include <linux/vmstat.h>
213c486871SAndrew Morton #include <linux/proc_fs.h>
223c486871SAndrew Morton #include <linux/seq_file.h>
233c486871SAndrew Morton #include <linux/debugfs.h>
24e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
25f1a5ab12SMel Gorman #include <linux/math64.h>
2679da826aSMichael Rubin #include <linux/writeback.h>
2736deb0beSNamhyung Kim #include <linux/compaction.h>
286e543d57SLisa Du #include <linux/mm_inline.h>
2948c96a36SJoonsoo Kim #include <linux/page_ext.h>
3048c96a36SJoonsoo Kim #include <linux/page_owner.h>
316e543d57SLisa Du 
326e543d57SLisa Du #include "internal.h"
33f6ac2354SChristoph Lameter 
344518085eSKemi Wang #ifdef CONFIG_NUMA
354518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
364518085eSKemi Wang 
374518085eSKemi Wang /* zero numa counters within a zone */
384518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone)
394518085eSKemi Wang {
404518085eSKemi Wang 	int item, cpu;
414518085eSKemi Wang 
42f19298b9SMel Gorman 	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
43f19298b9SMel Gorman 		atomic_long_set(&zone->vm_numa_event[item], 0);
44f19298b9SMel Gorman 		for_each_online_cpu(cpu) {
45f19298b9SMel Gorman 			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
464518085eSKemi Wang 						= 0;
474518085eSKemi Wang 		}
484518085eSKemi Wang 	}
49f19298b9SMel Gorman }
504518085eSKemi Wang 
514518085eSKemi Wang /* zero numa counters of all the populated zones */
524518085eSKemi Wang static void zero_zones_numa_counters(void)
534518085eSKemi Wang {
544518085eSKemi Wang 	struct zone *zone;
554518085eSKemi Wang 
564518085eSKemi Wang 	for_each_populated_zone(zone)
574518085eSKemi Wang 		zero_zone_numa_counters(zone);
584518085eSKemi Wang }
594518085eSKemi Wang 
604518085eSKemi Wang /* zero global numa counters */
614518085eSKemi Wang static void zero_global_numa_counters(void)
624518085eSKemi Wang {
634518085eSKemi Wang 	int item;
644518085eSKemi Wang 
65f19298b9SMel Gorman 	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
66f19298b9SMel Gorman 		atomic_long_set(&vm_numa_event[item], 0);
674518085eSKemi Wang }
684518085eSKemi Wang 
694518085eSKemi Wang static void invalid_numa_statistics(void)
704518085eSKemi Wang {
714518085eSKemi Wang 	zero_zones_numa_counters();
724518085eSKemi Wang 	zero_global_numa_counters();
734518085eSKemi Wang }
744518085eSKemi Wang 
754518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock);
764518085eSKemi Wang 
774518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
7832927393SChristoph Hellwig 		void *buffer, size_t *length, loff_t *ppos)
794518085eSKemi Wang {
804518085eSKemi Wang 	int ret, oldval;
814518085eSKemi Wang 
824518085eSKemi Wang 	mutex_lock(&vm_numa_stat_lock);
834518085eSKemi Wang 	if (write)
844518085eSKemi Wang 		oldval = sysctl_vm_numa_stat;
854518085eSKemi Wang 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
864518085eSKemi Wang 	if (ret || !write)
874518085eSKemi Wang 		goto out;
884518085eSKemi Wang 
894518085eSKemi Wang 	if (oldval == sysctl_vm_numa_stat)
904518085eSKemi Wang 		goto out;
914518085eSKemi Wang 	else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
924518085eSKemi Wang 		static_branch_enable(&vm_numa_stat_key);
934518085eSKemi Wang 		pr_info("enable numa statistics\n");
944518085eSKemi Wang 	} else {
954518085eSKemi Wang 		static_branch_disable(&vm_numa_stat_key);
964518085eSKemi Wang 		invalid_numa_statistics();
974518085eSKemi Wang 		pr_info("disable numa statistics, and clear numa counters\n");
984518085eSKemi Wang 	}
994518085eSKemi Wang 
1004518085eSKemi Wang out:
1014518085eSKemi Wang 	mutex_unlock(&vm_numa_stat_lock);
1024518085eSKemi Wang 	return ret;
1034518085eSKemi Wang }
1044518085eSKemi Wang #endif
1054518085eSKemi Wang 
106f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
107f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
109f8891e5eSChristoph Lameter 
11031f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
111f8891e5eSChristoph Lameter {
1129eccf2a8SChristoph Lameter 	int cpu;
113f8891e5eSChristoph Lameter 	int i;
114f8891e5eSChristoph Lameter 
115f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116f8891e5eSChristoph Lameter 
11731f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
118f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119f8891e5eSChristoph Lameter 
120f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
122f8891e5eSChristoph Lameter 	}
123f8891e5eSChristoph Lameter }
124f8891e5eSChristoph Lameter 
125f8891e5eSChristoph Lameter /*
126f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
127f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
128f8891e5eSChristoph Lameter  * during and after execution of this function.
129f8891e5eSChristoph Lameter */
130f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
131f8891e5eSChristoph Lameter {
1327625eccdSSebastian Andrzej Siewior 	cpus_read_lock();
13331f961a8SMinchan Kim 	sum_vm_events(ret);
1347625eccdSSebastian Andrzej Siewior 	cpus_read_unlock();
135f8891e5eSChristoph Lameter }
13632dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
137f8891e5eSChristoph Lameter 
138f8891e5eSChristoph Lameter /*
139f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
140f8891e5eSChristoph Lameter  *
141f8891e5eSChristoph Lameter  * This is adding to the events on one processor
142f8891e5eSChristoph Lameter  * but keeps the global counts constant.
143f8891e5eSChristoph Lameter  */
144f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
145f8891e5eSChristoph Lameter {
146f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147f8891e5eSChristoph Lameter 	int i;
148f8891e5eSChristoph Lameter 
149f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
151f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
152f8891e5eSChristoph Lameter 	}
153f8891e5eSChristoph Lameter }
154f8891e5eSChristoph Lameter 
155f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
156f8891e5eSChristoph Lameter 
1572244b95aSChristoph Lameter /*
1582244b95aSChristoph Lameter  * Manage combined zone based / global counters
1592244b95aSChristoph Lameter  *
1602244b95aSChristoph Lameter  * vm_stat contains the global counters
1612244b95aSChristoph Lameter  */
16275ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
16375ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
164f19298b9SMel Gorman atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
16575ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat);
16675ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat);
1672244b95aSChristoph Lameter 
1682244b95aSChristoph Lameter #ifdef CONFIG_SMP
1692244b95aSChristoph Lameter 
170b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
17188f5acf8SMel Gorman {
17288f5acf8SMel Gorman 	int threshold;
17388f5acf8SMel Gorman 	int watermark_distance;
17488f5acf8SMel Gorman 
17588f5acf8SMel Gorman 	/*
17688f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
17788f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
17888f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
17988f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
18088f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
18188f5acf8SMel Gorman 	 * the min watermark
18288f5acf8SMel Gorman 	 */
18388f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
18488f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
18588f5acf8SMel Gorman 
18688f5acf8SMel Gorman 	/*
18788f5acf8SMel Gorman 	 * Maximum threshold is 125
18888f5acf8SMel Gorman 	 */
18988f5acf8SMel Gorman 	threshold = min(125, threshold);
19088f5acf8SMel Gorman 
19188f5acf8SMel Gorman 	return threshold;
19288f5acf8SMel Gorman }
19388f5acf8SMel Gorman 
194b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
195df9ecabaSChristoph Lameter {
196df9ecabaSChristoph Lameter 	int threshold;
197df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
1982244b95aSChristoph Lameter 
1992244b95aSChristoph Lameter 	/*
200df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
201df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
202df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
203df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
2042244b95aSChristoph Lameter 	 *
205df9ecabaSChristoph Lameter 	 * Some sample thresholds:
206df9ecabaSChristoph Lameter 	 *
207ea15ba17SMiaohe Lin 	 * Threshold	Processors	(fls)	Zonesize	fls(mem)+1
208df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
209df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
210df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
211df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
212df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
213df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
214df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
215df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
216df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
217df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
218df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
219df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
220df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
221df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
222df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
223df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
224df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
225df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
226df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
2272244b95aSChristoph Lameter 	 */
228df9ecabaSChristoph Lameter 
2299705bea5SArun KS 	mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
230df9ecabaSChristoph Lameter 
231df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
232df9ecabaSChristoph Lameter 
233df9ecabaSChristoph Lameter 	/*
234df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
235df9ecabaSChristoph Lameter 	 */
236df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
237df9ecabaSChristoph Lameter 
238df9ecabaSChristoph Lameter 	return threshold;
239df9ecabaSChristoph Lameter }
240df9ecabaSChristoph Lameter 
241df9ecabaSChristoph Lameter /*
242df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
243df9ecabaSChristoph Lameter  */
244a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
2452244b95aSChristoph Lameter {
24675ef7184SMel Gorman 	struct pglist_data *pgdat;
247df9ecabaSChristoph Lameter 	struct zone *zone;
248df9ecabaSChristoph Lameter 	int cpu;
249df9ecabaSChristoph Lameter 	int threshold;
250df9ecabaSChristoph Lameter 
25175ef7184SMel Gorman 	/* Zero current pgdat thresholds */
25275ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
25375ef7184SMel Gorman 		for_each_online_cpu(cpu) {
25475ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
25575ef7184SMel Gorman 		}
25675ef7184SMel Gorman 	}
25775ef7184SMel Gorman 
258ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
25975ef7184SMel Gorman 		struct pglist_data *pgdat = zone->zone_pgdat;
260aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
261aa454840SChristoph Lameter 
262b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
263df9ecabaSChristoph Lameter 
26475ef7184SMel Gorman 		for_each_online_cpu(cpu) {
26575ef7184SMel Gorman 			int pgdat_threshold;
26675ef7184SMel Gorman 
26728f836b6SMel Gorman 			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
26899dcc3e5SChristoph Lameter 							= threshold;
2691d90ca89SKemi Wang 
27075ef7184SMel Gorman 			/* Base nodestat threshold on the largest populated zone. */
27175ef7184SMel Gorman 			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
27275ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
27375ef7184SMel Gorman 				= max(threshold, pgdat_threshold);
27475ef7184SMel Gorman 		}
27575ef7184SMel Gorman 
276aa454840SChristoph Lameter 		/*
277aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
278aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
279aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
280aa454840SChristoph Lameter 		 */
281aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
282aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
283aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
284aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
285aa454840SChristoph Lameter 					max_drift;
286df9ecabaSChristoph Lameter 	}
2872244b95aSChristoph Lameter }
2882244b95aSChristoph Lameter 
289b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
290b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
29188f5acf8SMel Gorman {
29288f5acf8SMel Gorman 	struct zone *zone;
29388f5acf8SMel Gorman 	int cpu;
29488f5acf8SMel Gorman 	int threshold;
29588f5acf8SMel Gorman 	int i;
29688f5acf8SMel Gorman 
29788f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
29888f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
29988f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
30088f5acf8SMel Gorman 			continue;
30188f5acf8SMel Gorman 
302b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
3031d90ca89SKemi Wang 		for_each_online_cpu(cpu)
30428f836b6SMel Gorman 			per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
30588f5acf8SMel Gorman 							= threshold;
30688f5acf8SMel Gorman 	}
30788f5acf8SMel Gorman }
30888f5acf8SMel Gorman 
3092244b95aSChristoph Lameter /*
310bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
311bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
312bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
3132244b95aSChristoph Lameter  */
3142244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3156cdb18adSHeiko Carstens 			   long delta)
3162244b95aSChristoph Lameter {
31728f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
31812938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
3192244b95aSChristoph Lameter 	long x;
32012938a92SChristoph Lameter 	long t;
3212244b95aSChristoph Lameter 
322*c68ed794SIngo Molnar 	/*
323*c68ed794SIngo Molnar 	 * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels,
324*c68ed794SIngo Molnar 	 * atomicity is provided by IRQs being disabled -- either explicitly
325*c68ed794SIngo Molnar 	 * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables
326*c68ed794SIngo Molnar 	 * CPU migrations and preemption potentially corrupts a counter so
327*c68ed794SIngo Molnar 	 * disable preemption.
328*c68ed794SIngo Molnar 	 */
329*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
330*c68ed794SIngo Molnar 		preempt_disable();
331*c68ed794SIngo Molnar 
33212938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
3332244b95aSChristoph Lameter 
33412938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
33512938a92SChristoph Lameter 
33640610076SMiaohe Lin 	if (unlikely(abs(x) > t)) {
3372244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
3382244b95aSChristoph Lameter 		x = 0;
3392244b95aSChristoph Lameter 	}
34012938a92SChristoph Lameter 	__this_cpu_write(*p, x);
341*c68ed794SIngo Molnar 
342*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
343*c68ed794SIngo Molnar 		preempt_enable();
3442244b95aSChristoph Lameter }
3452244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
3462244b95aSChristoph Lameter 
34775ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
34875ef7184SMel Gorman 				long delta)
34975ef7184SMel Gorman {
35075ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
35175ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
35275ef7184SMel Gorman 	long x;
35375ef7184SMel Gorman 	long t;
35475ef7184SMel Gorman 
355ea426c2aSRoman Gushchin 	if (vmstat_item_in_bytes(item)) {
356629484aeSJohannes Weiner 		/*
357629484aeSJohannes Weiner 		 * Only cgroups use subpage accounting right now; at
358629484aeSJohannes Weiner 		 * the global level, these items still change in
359629484aeSJohannes Weiner 		 * multiples of whole pages. Store them as pages
360629484aeSJohannes Weiner 		 * internally to keep the per-cpu counters compact.
361629484aeSJohannes Weiner 		 */
362ea426c2aSRoman Gushchin 		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
363ea426c2aSRoman Gushchin 		delta >>= PAGE_SHIFT;
364ea426c2aSRoman Gushchin 	}
365ea426c2aSRoman Gushchin 
366*c68ed794SIngo Molnar 	/* See __mod_node_page_state */
367*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
368*c68ed794SIngo Molnar 		preempt_disable();
369*c68ed794SIngo Molnar 
37075ef7184SMel Gorman 	x = delta + __this_cpu_read(*p);
37175ef7184SMel Gorman 
37275ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
37375ef7184SMel Gorman 
37440610076SMiaohe Lin 	if (unlikely(abs(x) > t)) {
37575ef7184SMel Gorman 		node_page_state_add(x, pgdat, item);
37675ef7184SMel Gorman 		x = 0;
37775ef7184SMel Gorman 	}
37875ef7184SMel Gorman 	__this_cpu_write(*p, x);
379*c68ed794SIngo Molnar 
380*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
381*c68ed794SIngo Molnar 		preempt_enable();
38275ef7184SMel Gorman }
38375ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state);
38475ef7184SMel Gorman 
3852244b95aSChristoph Lameter /*
3862244b95aSChristoph Lameter  * Optimized increment and decrement functions.
3872244b95aSChristoph Lameter  *
3882244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
3892244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
3902244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
3912244b95aSChristoph Lameter  *
3922244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
3932244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
3942244b95aSChristoph Lameter  * generate better code.
3952244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
3962244b95aSChristoph Lameter  * be omitted.
3972244b95aSChristoph Lameter  *
398df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
399df9ecabaSChristoph Lameter  * with care.
400df9ecabaSChristoph Lameter  *
4012244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
4022244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
4032244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
4042244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
4052244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
4062244b95aSChristoph Lameter  * in a useful way here.
4072244b95aSChristoph Lameter  */
408c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
4092244b95aSChristoph Lameter {
41028f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
41112938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
41212938a92SChristoph Lameter 	s8 v, t;
4132244b95aSChristoph Lameter 
414*c68ed794SIngo Molnar 	/* See __mod_node_page_state */
415*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
416*c68ed794SIngo Molnar 		preempt_disable();
417*c68ed794SIngo Molnar 
418908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
41912938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
42012938a92SChristoph Lameter 	if (unlikely(v > t)) {
42112938a92SChristoph Lameter 		s8 overstep = t >> 1;
4222244b95aSChristoph Lameter 
42312938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
42412938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
4252244b95aSChristoph Lameter 	}
426*c68ed794SIngo Molnar 
427*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
428*c68ed794SIngo Molnar 		preempt_enable();
4292244b95aSChristoph Lameter }
430ca889e6cSChristoph Lameter 
43175ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
43275ef7184SMel Gorman {
43375ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
43475ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
43575ef7184SMel Gorman 	s8 v, t;
43675ef7184SMel Gorman 
437ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
438ea426c2aSRoman Gushchin 
439*c68ed794SIngo Molnar 	/* See __mod_node_page_state */
440*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
441*c68ed794SIngo Molnar 		preempt_disable();
442*c68ed794SIngo Molnar 
44375ef7184SMel Gorman 	v = __this_cpu_inc_return(*p);
44475ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
44575ef7184SMel Gorman 	if (unlikely(v > t)) {
44675ef7184SMel Gorman 		s8 overstep = t >> 1;
44775ef7184SMel Gorman 
44875ef7184SMel Gorman 		node_page_state_add(v + overstep, pgdat, item);
44975ef7184SMel Gorman 		__this_cpu_write(*p, -overstep);
45075ef7184SMel Gorman 	}
451*c68ed794SIngo Molnar 
452*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
453*c68ed794SIngo Molnar 		preempt_enable();
45475ef7184SMel Gorman }
45575ef7184SMel Gorman 
456ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
457ca889e6cSChristoph Lameter {
458ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
459ca889e6cSChristoph Lameter }
4602244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
4612244b95aSChristoph Lameter 
46275ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item)
46375ef7184SMel Gorman {
46475ef7184SMel Gorman 	__inc_node_state(page_pgdat(page), item);
46575ef7184SMel Gorman }
46675ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state);
46775ef7184SMel Gorman 
468c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
4692244b95aSChristoph Lameter {
47028f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
47112938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
47212938a92SChristoph Lameter 	s8 v, t;
4732244b95aSChristoph Lameter 
474*c68ed794SIngo Molnar 	/* See __mod_node_page_state */
475*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
476*c68ed794SIngo Molnar 		preempt_disable();
477*c68ed794SIngo Molnar 
478908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
47912938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
48012938a92SChristoph Lameter 	if (unlikely(v < - t)) {
48112938a92SChristoph Lameter 		s8 overstep = t >> 1;
4822244b95aSChristoph Lameter 
48312938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
48412938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
4852244b95aSChristoph Lameter 	}
486*c68ed794SIngo Molnar 
487*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
488*c68ed794SIngo Molnar 		preempt_enable();
4892244b95aSChristoph Lameter }
490c8785385SChristoph Lameter 
49175ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
49275ef7184SMel Gorman {
49375ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
49475ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
49575ef7184SMel Gorman 	s8 v, t;
49675ef7184SMel Gorman 
497ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
498ea426c2aSRoman Gushchin 
499*c68ed794SIngo Molnar 	/* See __mod_node_page_state */
500*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
501*c68ed794SIngo Molnar 		preempt_disable();
502*c68ed794SIngo Molnar 
50375ef7184SMel Gorman 	v = __this_cpu_dec_return(*p);
50475ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
50575ef7184SMel Gorman 	if (unlikely(v < - t)) {
50675ef7184SMel Gorman 		s8 overstep = t >> 1;
50775ef7184SMel Gorman 
50875ef7184SMel Gorman 		node_page_state_add(v - overstep, pgdat, item);
50975ef7184SMel Gorman 		__this_cpu_write(*p, overstep);
51075ef7184SMel Gorman 	}
511*c68ed794SIngo Molnar 
512*c68ed794SIngo Molnar 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
513*c68ed794SIngo Molnar 		preempt_enable();
51475ef7184SMel Gorman }
51575ef7184SMel Gorman 
516c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
517c8785385SChristoph Lameter {
518c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
519c8785385SChristoph Lameter }
5202244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
5212244b95aSChristoph Lameter 
52275ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item)
52375ef7184SMel Gorman {
52475ef7184SMel Gorman 	__dec_node_state(page_pgdat(page), item);
52575ef7184SMel Gorman }
52675ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state);
52775ef7184SMel Gorman 
5284156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
5297c839120SChristoph Lameter /*
5307c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
5317c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
5327c839120SChristoph Lameter  *
5337c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
5347c839120SChristoph Lameter  * operations.
5357c839120SChristoph Lameter  *
5367c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
5377c839120SChristoph Lameter  *     0       No overstepping
5387c839120SChristoph Lameter  *     1       Overstepping half of threshold
5397c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
5407c839120SChristoph Lameter */
54175ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone,
54275ef7184SMel Gorman        enum zone_stat_item item, long delta, int overstep_mode)
5437c839120SChristoph Lameter {
54428f836b6SMel Gorman 	struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
5457c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
5467c839120SChristoph Lameter 	long o, n, t, z;
5477c839120SChristoph Lameter 
5487c839120SChristoph Lameter 	do {
5497c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
5507c839120SChristoph Lameter 
5517c839120SChristoph Lameter 		/*
5527c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
5537c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
554d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
555d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
556d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
557d3bc2367SChristoph Lameter 		 *
558d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
559d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
5607c839120SChristoph Lameter 		 */
5617c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
5627c839120SChristoph Lameter 
5637c839120SChristoph Lameter 		o = this_cpu_read(*p);
5647c839120SChristoph Lameter 		n = delta + o;
5657c839120SChristoph Lameter 
56640610076SMiaohe Lin 		if (abs(n) > t) {
5677c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
5687c839120SChristoph Lameter 
5697c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
5707c839120SChristoph Lameter 			z = n + os;
5717c839120SChristoph Lameter 			n = -os;
5727c839120SChristoph Lameter 		}
5737c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
5747c839120SChristoph Lameter 
5757c839120SChristoph Lameter 	if (z)
5767c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
5777c839120SChristoph Lameter }
5787c839120SChristoph Lameter 
5797c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
5806cdb18adSHeiko Carstens 			 long delta)
5817c839120SChristoph Lameter {
58275ef7184SMel Gorman 	mod_zone_state(zone, item, delta, 0);
5837c839120SChristoph Lameter }
5847c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
5857c839120SChristoph Lameter 
5867c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
5877c839120SChristoph Lameter {
58875ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, 1, 1);
5897c839120SChristoph Lameter }
5907c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
5917c839120SChristoph Lameter 
5927c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
5937c839120SChristoph Lameter {
59475ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, -1, -1);
5957c839120SChristoph Lameter }
5967c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
59775ef7184SMel Gorman 
59875ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat,
59975ef7184SMel Gorman        enum node_stat_item item, int delta, int overstep_mode)
60075ef7184SMel Gorman {
60175ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
60275ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
60375ef7184SMel Gorman 	long o, n, t, z;
60475ef7184SMel Gorman 
605ea426c2aSRoman Gushchin 	if (vmstat_item_in_bytes(item)) {
606629484aeSJohannes Weiner 		/*
607629484aeSJohannes Weiner 		 * Only cgroups use subpage accounting right now; at
608629484aeSJohannes Weiner 		 * the global level, these items still change in
609629484aeSJohannes Weiner 		 * multiples of whole pages. Store them as pages
610629484aeSJohannes Weiner 		 * internally to keep the per-cpu counters compact.
611629484aeSJohannes Weiner 		 */
612ea426c2aSRoman Gushchin 		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
613ea426c2aSRoman Gushchin 		delta >>= PAGE_SHIFT;
614ea426c2aSRoman Gushchin 	}
615ea426c2aSRoman Gushchin 
61675ef7184SMel Gorman 	do {
61775ef7184SMel Gorman 		z = 0;  /* overflow to node counters */
61875ef7184SMel Gorman 
61975ef7184SMel Gorman 		/*
62075ef7184SMel Gorman 		 * The fetching of the stat_threshold is racy. We may apply
62175ef7184SMel Gorman 		 * a counter threshold to the wrong the cpu if we get
62275ef7184SMel Gorman 		 * rescheduled while executing here. However, the next
62375ef7184SMel Gorman 		 * counter update will apply the threshold again and
62475ef7184SMel Gorman 		 * therefore bring the counter under the threshold again.
62575ef7184SMel Gorman 		 *
62675ef7184SMel Gorman 		 * Most of the time the thresholds are the same anyways
62775ef7184SMel Gorman 		 * for all cpus in a node.
62875ef7184SMel Gorman 		 */
62975ef7184SMel Gorman 		t = this_cpu_read(pcp->stat_threshold);
63075ef7184SMel Gorman 
63175ef7184SMel Gorman 		o = this_cpu_read(*p);
63275ef7184SMel Gorman 		n = delta + o;
63375ef7184SMel Gorman 
63440610076SMiaohe Lin 		if (abs(n) > t) {
63575ef7184SMel Gorman 			int os = overstep_mode * (t >> 1) ;
63675ef7184SMel Gorman 
63775ef7184SMel Gorman 			/* Overflow must be added to node counters */
63875ef7184SMel Gorman 			z = n + os;
63975ef7184SMel Gorman 			n = -os;
64075ef7184SMel Gorman 		}
64175ef7184SMel Gorman 	} while (this_cpu_cmpxchg(*p, o, n) != o);
64275ef7184SMel Gorman 
64375ef7184SMel Gorman 	if (z)
64475ef7184SMel Gorman 		node_page_state_add(z, pgdat, item);
64575ef7184SMel Gorman }
64675ef7184SMel Gorman 
64775ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
64875ef7184SMel Gorman 					long delta)
64975ef7184SMel Gorman {
65075ef7184SMel Gorman 	mod_node_state(pgdat, item, delta, 0);
65175ef7184SMel Gorman }
65275ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
65375ef7184SMel Gorman 
65475ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
65575ef7184SMel Gorman {
65675ef7184SMel Gorman 	mod_node_state(pgdat, item, 1, 1);
65775ef7184SMel Gorman }
65875ef7184SMel Gorman 
65975ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
66075ef7184SMel Gorman {
66175ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, 1, 1);
66275ef7184SMel Gorman }
66375ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
66475ef7184SMel Gorman 
66575ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
66675ef7184SMel Gorman {
66775ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, -1, -1);
66875ef7184SMel Gorman }
66975ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
6707c839120SChristoph Lameter #else
6717c839120SChristoph Lameter /*
6727c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
6737c839120SChristoph Lameter  */
6747c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6756cdb18adSHeiko Carstens 			 long delta)
6767c839120SChristoph Lameter {
6777c839120SChristoph Lameter 	unsigned long flags;
6787c839120SChristoph Lameter 
6797c839120SChristoph Lameter 	local_irq_save(flags);
6807c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
6817c839120SChristoph Lameter 	local_irq_restore(flags);
6827c839120SChristoph Lameter }
6837c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
6847c839120SChristoph Lameter 
6852244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
6862244b95aSChristoph Lameter {
6872244b95aSChristoph Lameter 	unsigned long flags;
6882244b95aSChristoph Lameter 	struct zone *zone;
6892244b95aSChristoph Lameter 
6902244b95aSChristoph Lameter 	zone = page_zone(page);
6912244b95aSChristoph Lameter 	local_irq_save(flags);
692ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
6932244b95aSChristoph Lameter 	local_irq_restore(flags);
6942244b95aSChristoph Lameter }
6952244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
6962244b95aSChristoph Lameter 
6972244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
6982244b95aSChristoph Lameter {
6992244b95aSChristoph Lameter 	unsigned long flags;
7002244b95aSChristoph Lameter 
7012244b95aSChristoph Lameter 	local_irq_save(flags);
702a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
7032244b95aSChristoph Lameter 	local_irq_restore(flags);
7042244b95aSChristoph Lameter }
7052244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
7062244b95aSChristoph Lameter 
70775ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
70875ef7184SMel Gorman {
70975ef7184SMel Gorman 	unsigned long flags;
71075ef7184SMel Gorman 
71175ef7184SMel Gorman 	local_irq_save(flags);
71275ef7184SMel Gorman 	__inc_node_state(pgdat, item);
71375ef7184SMel Gorman 	local_irq_restore(flags);
71475ef7184SMel Gorman }
71575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state);
71675ef7184SMel Gorman 
71775ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
71875ef7184SMel Gorman 					long delta)
71975ef7184SMel Gorman {
72075ef7184SMel Gorman 	unsigned long flags;
72175ef7184SMel Gorman 
72275ef7184SMel Gorman 	local_irq_save(flags);
72375ef7184SMel Gorman 	__mod_node_page_state(pgdat, item, delta);
72475ef7184SMel Gorman 	local_irq_restore(flags);
72575ef7184SMel Gorman }
72675ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
72775ef7184SMel Gorman 
72875ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
72975ef7184SMel Gorman {
73075ef7184SMel Gorman 	unsigned long flags;
73175ef7184SMel Gorman 	struct pglist_data *pgdat;
73275ef7184SMel Gorman 
73375ef7184SMel Gorman 	pgdat = page_pgdat(page);
73475ef7184SMel Gorman 	local_irq_save(flags);
73575ef7184SMel Gorman 	__inc_node_state(pgdat, item);
73675ef7184SMel Gorman 	local_irq_restore(flags);
73775ef7184SMel Gorman }
73875ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
73975ef7184SMel Gorman 
74075ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
74175ef7184SMel Gorman {
74275ef7184SMel Gorman 	unsigned long flags;
74375ef7184SMel Gorman 
74475ef7184SMel Gorman 	local_irq_save(flags);
74575ef7184SMel Gorman 	__dec_node_page_state(page, item);
74675ef7184SMel Gorman 	local_irq_restore(flags);
74775ef7184SMel Gorman }
74875ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
74975ef7184SMel Gorman #endif
7507cc36bbdSChristoph Lameter 
7517cc36bbdSChristoph Lameter /*
7527cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
7537cc36bbdSChristoph Lameter  * Returns the number of counters updated.
7547cc36bbdSChristoph Lameter  */
75575ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff)
7564edb0748SChristoph Lameter {
7574edb0748SChristoph Lameter 	int i;
7587cc36bbdSChristoph Lameter 	int changes = 0;
7594edb0748SChristoph Lameter 
7604edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
76175ef7184SMel Gorman 		if (zone_diff[i]) {
76275ef7184SMel Gorman 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
76375ef7184SMel Gorman 			changes++;
76475ef7184SMel Gorman 	}
76575ef7184SMel Gorman 
76675ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
76775ef7184SMel Gorman 		if (node_diff[i]) {
76875ef7184SMel Gorman 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7697cc36bbdSChristoph Lameter 			changes++;
7707cc36bbdSChristoph Lameter 	}
7717cc36bbdSChristoph Lameter 	return changes;
7724edb0748SChristoph Lameter }
773f19298b9SMel Gorman 
774f19298b9SMel Gorman #ifdef CONFIG_NUMA
775f19298b9SMel Gorman static void fold_vm_zone_numa_events(struct zone *zone)
776f19298b9SMel Gorman {
777f19298b9SMel Gorman 	unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
778f19298b9SMel Gorman 	int cpu;
779f19298b9SMel Gorman 	enum numa_stat_item item;
780f19298b9SMel Gorman 
781f19298b9SMel Gorman 	for_each_online_cpu(cpu) {
782f19298b9SMel Gorman 		struct per_cpu_zonestat *pzstats;
783f19298b9SMel Gorman 
784f19298b9SMel Gorman 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
785f19298b9SMel Gorman 		for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
786f19298b9SMel Gorman 			zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
787f19298b9SMel Gorman 	}
788f19298b9SMel Gorman 
789f19298b9SMel Gorman 	for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
790f19298b9SMel Gorman 		zone_numa_event_add(zone_numa_events[item], zone, item);
791f19298b9SMel Gorman }
792f19298b9SMel Gorman 
793f19298b9SMel Gorman void fold_vm_numa_events(void)
794f19298b9SMel Gorman {
795f19298b9SMel Gorman 	struct zone *zone;
796f19298b9SMel Gorman 
797f19298b9SMel Gorman 	for_each_populated_zone(zone)
798f19298b9SMel Gorman 		fold_vm_zone_numa_events(zone);
799f19298b9SMel Gorman }
800f19298b9SMel Gorman #endif
8014edb0748SChristoph Lameter 
8022244b95aSChristoph Lameter /*
8032bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
804a7f75e25SChristoph Lameter  *
8054037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
8064037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
8074037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
8084037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
8094037d452SChristoph Lameter  * the processor.
8104037d452SChristoph Lameter  *
8114037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
8124037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
8134037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
8144037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
8157cc36bbdSChristoph Lameter  *
8167cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
8172244b95aSChristoph Lameter  */
8180eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
8192244b95aSChristoph Lameter {
82075ef7184SMel Gorman 	struct pglist_data *pgdat;
8212244b95aSChristoph Lameter 	struct zone *zone;
8222244b95aSChristoph Lameter 	int i;
82375ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
82475ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
8257cc36bbdSChristoph Lameter 	int changes = 0;
8262244b95aSChristoph Lameter 
827ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
82828f836b6SMel Gorman 		struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
82928f836b6SMel Gorman #ifdef CONFIG_NUMA
83028f836b6SMel Gorman 		struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
83128f836b6SMel Gorman #endif
8322244b95aSChristoph Lameter 
833fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
834a7f75e25SChristoph Lameter 			int v;
835a7f75e25SChristoph Lameter 
83628f836b6SMel Gorman 			v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
837fbc2edb0SChristoph Lameter 			if (v) {
838fbc2edb0SChristoph Lameter 
839a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
84075ef7184SMel Gorman 				global_zone_diff[i] += v;
8414037d452SChristoph Lameter #ifdef CONFIG_NUMA
8424037d452SChristoph Lameter 				/* 3 seconds idle till flush */
84328f836b6SMel Gorman 				__this_cpu_write(pcp->expire, 3);
8444037d452SChristoph Lameter #endif
8452244b95aSChristoph Lameter 			}
846fbc2edb0SChristoph Lameter 		}
8474037d452SChristoph Lameter #ifdef CONFIG_NUMA
8483a321d2aSKemi Wang 
8490eb77e98SChristoph Lameter 		if (do_pagesets) {
8500eb77e98SChristoph Lameter 			cond_resched();
8514037d452SChristoph Lameter 			/*
8524037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
8534037d452SChristoph Lameter 			 * processor
8544037d452SChristoph Lameter 			 *
8554037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
8564037d452SChristoph Lameter 			 * if not then there is nothing to expire.
8574037d452SChristoph Lameter 			 */
85828f836b6SMel Gorman 			if (!__this_cpu_read(pcp->expire) ||
85928f836b6SMel Gorman 			       !__this_cpu_read(pcp->count))
8604037d452SChristoph Lameter 				continue;
8614037d452SChristoph Lameter 
8624037d452SChristoph Lameter 			/*
8634037d452SChristoph Lameter 			 * We never drain zones local to this processor.
8644037d452SChristoph Lameter 			 */
8654037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
86628f836b6SMel Gorman 				__this_cpu_write(pcp->expire, 0);
8674037d452SChristoph Lameter 				continue;
8684037d452SChristoph Lameter 			}
8694037d452SChristoph Lameter 
87028f836b6SMel Gorman 			if (__this_cpu_dec_return(pcp->expire))
8714037d452SChristoph Lameter 				continue;
8724037d452SChristoph Lameter 
87328f836b6SMel Gorman 			if (__this_cpu_read(pcp->count)) {
87428f836b6SMel Gorman 				drain_zone_pages(zone, this_cpu_ptr(pcp));
8757cc36bbdSChristoph Lameter 				changes++;
8767cc36bbdSChristoph Lameter 			}
8770eb77e98SChristoph Lameter 		}
8784037d452SChristoph Lameter #endif
8792244b95aSChristoph Lameter 	}
88075ef7184SMel Gorman 
88175ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
88275ef7184SMel Gorman 		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
88375ef7184SMel Gorman 
88475ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
88575ef7184SMel Gorman 			int v;
88675ef7184SMel Gorman 
88775ef7184SMel Gorman 			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
88875ef7184SMel Gorman 			if (v) {
88975ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
89075ef7184SMel Gorman 				global_node_diff[i] += v;
89175ef7184SMel Gorman 			}
89275ef7184SMel Gorman 		}
89375ef7184SMel Gorman 	}
89475ef7184SMel Gorman 
89575ef7184SMel Gorman 	changes += fold_diff(global_zone_diff, global_node_diff);
8967cc36bbdSChristoph Lameter 	return changes;
8972244b95aSChristoph Lameter }
8982244b95aSChristoph Lameter 
89940f4b1eaSCody P Schafer /*
9002bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
9012bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
9022bb921e5SChristoph Lameter  * synchronization is simplified.
9032bb921e5SChristoph Lameter  */
9042bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
9052bb921e5SChristoph Lameter {
90675ef7184SMel Gorman 	struct pglist_data *pgdat;
9072bb921e5SChristoph Lameter 	struct zone *zone;
9082bb921e5SChristoph Lameter 	int i;
90975ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
91075ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
9112bb921e5SChristoph Lameter 
9122bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
91328f836b6SMel Gorman 		struct per_cpu_zonestat *pzstats;
9142bb921e5SChristoph Lameter 
91528f836b6SMel Gorman 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
9162bb921e5SChristoph Lameter 
917f19298b9SMel Gorman 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
91828f836b6SMel Gorman 			if (pzstats->vm_stat_diff[i]) {
9192bb921e5SChristoph Lameter 				int v;
9202bb921e5SChristoph Lameter 
92128f836b6SMel Gorman 				v = pzstats->vm_stat_diff[i];
92228f836b6SMel Gorman 				pzstats->vm_stat_diff[i] = 0;
9232bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
92475ef7184SMel Gorman 				global_zone_diff[i] += v;
9252bb921e5SChristoph Lameter 			}
926f19298b9SMel Gorman 		}
9273a321d2aSKemi Wang #ifdef CONFIG_NUMA
928f19298b9SMel Gorman 		for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
929f19298b9SMel Gorman 			if (pzstats->vm_numa_event[i]) {
930f19298b9SMel Gorman 				unsigned long v;
9313a321d2aSKemi Wang 
932f19298b9SMel Gorman 				v = pzstats->vm_numa_event[i];
933f19298b9SMel Gorman 				pzstats->vm_numa_event[i] = 0;
934f19298b9SMel Gorman 				zone_numa_event_add(v, zone, i);
935f19298b9SMel Gorman 			}
9363a321d2aSKemi Wang 		}
9373a321d2aSKemi Wang #endif
9382bb921e5SChristoph Lameter 	}
9392bb921e5SChristoph Lameter 
94075ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
94175ef7184SMel Gorman 		struct per_cpu_nodestat *p;
94275ef7184SMel Gorman 
94375ef7184SMel Gorman 		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
94475ef7184SMel Gorman 
94575ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
94675ef7184SMel Gorman 			if (p->vm_node_stat_diff[i]) {
94775ef7184SMel Gorman 				int v;
94875ef7184SMel Gorman 
94975ef7184SMel Gorman 				v = p->vm_node_stat_diff[i];
95075ef7184SMel Gorman 				p->vm_node_stat_diff[i] = 0;
95175ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
95275ef7184SMel Gorman 				global_node_diff[i] += v;
95375ef7184SMel Gorman 			}
95475ef7184SMel Gorman 	}
95575ef7184SMel Gorman 
95675ef7184SMel Gorman 	fold_diff(global_zone_diff, global_node_diff);
9572bb921e5SChristoph Lameter }
9582bb921e5SChristoph Lameter 
9592bb921e5SChristoph Lameter /*
96040f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
961f0953a1bSIngo Molnar  * pset->vm_stat_diff[] exist.
96240f4b1eaSCody P Schafer  */
96328f836b6SMel Gorman void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
9645a883813SMinchan Kim {
965f19298b9SMel Gorman 	unsigned long v;
9665a883813SMinchan Kim 	int i;
9675a883813SMinchan Kim 
968f19298b9SMel Gorman 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
96928f836b6SMel Gorman 		if (pzstats->vm_stat_diff[i]) {
970f19298b9SMel Gorman 			v = pzstats->vm_stat_diff[i];
97128f836b6SMel Gorman 			pzstats->vm_stat_diff[i] = 0;
972f19298b9SMel Gorman 			zone_page_state_add(v, zone, i);
973f19298b9SMel Gorman 		}
9745a883813SMinchan Kim 	}
9753a321d2aSKemi Wang 
9763a321d2aSKemi Wang #ifdef CONFIG_NUMA
977f19298b9SMel Gorman 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
978f19298b9SMel Gorman 		if (pzstats->vm_numa_event[i]) {
979f19298b9SMel Gorman 			v = pzstats->vm_numa_event[i];
980f19298b9SMel Gorman 			pzstats->vm_numa_event[i] = 0;
981f19298b9SMel Gorman 			zone_numa_event_add(v, zone, i);
982f19298b9SMel Gorman 		}
9833a321d2aSKemi Wang 	}
9843a321d2aSKemi Wang #endif
9855a883813SMinchan Kim }
9862244b95aSChristoph Lameter #endif
9872244b95aSChristoph Lameter 
988ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
989ca889e6cSChristoph Lameter /*
99075ef7184SMel Gorman  * Determine the per node value of a stat item. This function
99175ef7184SMel Gorman  * is called frequently in a NUMA machine, so try to be as
99275ef7184SMel Gorman  * frugal as possible.
993c2d42c16SAndrew Morton  */
99475ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node,
99575ef7184SMel Gorman 				 enum zone_stat_item item)
996c2d42c16SAndrew Morton {
997c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
998e87d59f7SJoonsoo Kim 	int i;
999e87d59f7SJoonsoo Kim 	unsigned long count = 0;
1000c2d42c16SAndrew Morton 
1001e87d59f7SJoonsoo Kim 	for (i = 0; i < MAX_NR_ZONES; i++)
1002e87d59f7SJoonsoo Kim 		count += zone_page_state(zones + i, item);
1003e87d59f7SJoonsoo Kim 
1004e87d59f7SJoonsoo Kim 	return count;
1005c2d42c16SAndrew Morton }
1006c2d42c16SAndrew Morton 
1007f19298b9SMel Gorman /* Determine the per node value of a numa stat item. */
1008f19298b9SMel Gorman unsigned long sum_zone_numa_event_state(int node,
10093a321d2aSKemi Wang 				 enum numa_stat_item item)
10103a321d2aSKemi Wang {
10113a321d2aSKemi Wang 	struct zone *zones = NODE_DATA(node)->node_zones;
10123a321d2aSKemi Wang 	unsigned long count = 0;
1013f19298b9SMel Gorman 	int i;
10143a321d2aSKemi Wang 
10153a321d2aSKemi Wang 	for (i = 0; i < MAX_NR_ZONES; i++)
1016f19298b9SMel Gorman 		count += zone_numa_event_state(zones + i, item);
10173a321d2aSKemi Wang 
10183a321d2aSKemi Wang 	return count;
10193a321d2aSKemi Wang }
10203a321d2aSKemi Wang 
102175ef7184SMel Gorman /*
102275ef7184SMel Gorman  * Determine the per node value of a stat item.
102375ef7184SMel Gorman  */
1024ea426c2aSRoman Gushchin unsigned long node_page_state_pages(struct pglist_data *pgdat,
102575ef7184SMel Gorman 				    enum node_stat_item item)
102675ef7184SMel Gorman {
102775ef7184SMel Gorman 	long x = atomic_long_read(&pgdat->vm_stat[item]);
102875ef7184SMel Gorman #ifdef CONFIG_SMP
102975ef7184SMel Gorman 	if (x < 0)
103075ef7184SMel Gorman 		x = 0;
103175ef7184SMel Gorman #endif
103275ef7184SMel Gorman 	return x;
103375ef7184SMel Gorman }
1034ea426c2aSRoman Gushchin 
1035ea426c2aSRoman Gushchin unsigned long node_page_state(struct pglist_data *pgdat,
1036ea426c2aSRoman Gushchin 			      enum node_stat_item item)
1037ea426c2aSRoman Gushchin {
1038ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1039ea426c2aSRoman Gushchin 
1040ea426c2aSRoman Gushchin 	return node_page_state_pages(pgdat, item);
1041ea426c2aSRoman Gushchin }
1042ca889e6cSChristoph Lameter #endif
1043ca889e6cSChristoph Lameter 
1044d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
104536deb0beSNamhyung Kim 
1046d7a5752cSMel Gorman struct contig_page_info {
1047d7a5752cSMel Gorman 	unsigned long free_pages;
1048d7a5752cSMel Gorman 	unsigned long free_blocks_total;
1049d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
1050d7a5752cSMel Gorman };
1051d7a5752cSMel Gorman 
1052d7a5752cSMel Gorman /*
1053d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
1054d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
1055d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
1056d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
1057d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
1058d7a5752cSMel Gorman  * figured out from userspace
1059d7a5752cSMel Gorman  */
1060d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
1061d7a5752cSMel Gorman 				unsigned int suitable_order,
1062d7a5752cSMel Gorman 				struct contig_page_info *info)
1063d7a5752cSMel Gorman {
1064d7a5752cSMel Gorman 	unsigned int order;
1065d7a5752cSMel Gorman 
1066d7a5752cSMel Gorman 	info->free_pages = 0;
1067d7a5752cSMel Gorman 	info->free_blocks_total = 0;
1068d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
1069d7a5752cSMel Gorman 
1070d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
1071d7a5752cSMel Gorman 		unsigned long blocks;
1072d7a5752cSMel Gorman 
1073d7a5752cSMel Gorman 		/* Count number of free blocks */
1074d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
1075d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
1076d7a5752cSMel Gorman 
1077d7a5752cSMel Gorman 		/* Count free base pages */
1078d7a5752cSMel Gorman 		info->free_pages += blocks << order;
1079d7a5752cSMel Gorman 
1080d7a5752cSMel Gorman 		/* Count the suitable free blocks */
1081d7a5752cSMel Gorman 		if (order >= suitable_order)
1082d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
1083d7a5752cSMel Gorman 						(order - suitable_order);
1084d7a5752cSMel Gorman 	}
1085d7a5752cSMel Gorman }
1086f1a5ab12SMel Gorman 
1087f1a5ab12SMel Gorman /*
1088f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
1089f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
1090f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
1091f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
1092f1a5ab12SMel Gorman  * should be used
1093f1a5ab12SMel Gorman  */
109456de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1095f1a5ab12SMel Gorman {
1096f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
1097f1a5ab12SMel Gorman 
109888d6ac40SWen Yang 	if (WARN_ON_ONCE(order >= MAX_ORDER))
109988d6ac40SWen Yang 		return 0;
110088d6ac40SWen Yang 
1101f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
1102f1a5ab12SMel Gorman 		return 0;
1103f1a5ab12SMel Gorman 
1104f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
1105f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
1106f1a5ab12SMel Gorman 		return -1000;
1107f1a5ab12SMel Gorman 
1108f1a5ab12SMel Gorman 	/*
1109f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
1110f1a5ab12SMel Gorman 	 *
1111f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
1112f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
1113f1a5ab12SMel Gorman 	 */
1114f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1115f1a5ab12SMel Gorman }
111656de7263SMel Gorman 
1117facdaa91SNitin Gupta /*
1118facdaa91SNitin Gupta  * Calculates external fragmentation within a zone wrt the given order.
1119facdaa91SNitin Gupta  * It is defined as the percentage of pages found in blocks of size
1120facdaa91SNitin Gupta  * less than 1 << order. It returns values in range [0, 100].
1121facdaa91SNitin Gupta  */
1122d34c0a75SNitin Gupta unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1123facdaa91SNitin Gupta {
1124facdaa91SNitin Gupta 	struct contig_page_info info;
1125facdaa91SNitin Gupta 
1126facdaa91SNitin Gupta 	fill_contig_page_info(zone, order, &info);
1127facdaa91SNitin Gupta 	if (info.free_pages == 0)
1128facdaa91SNitin Gupta 		return 0;
1129facdaa91SNitin Gupta 
1130facdaa91SNitin Gupta 	return div_u64((info.free_pages -
1131facdaa91SNitin Gupta 			(info.free_blocks_suitable << order)) * 100,
1132facdaa91SNitin Gupta 			info.free_pages);
1133facdaa91SNitin Gupta }
1134facdaa91SNitin Gupta 
113556de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
113656de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
113756de7263SMel Gorman {
113856de7263SMel Gorman 	struct contig_page_info info;
113956de7263SMel Gorman 
114056de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
114156de7263SMel Gorman 	return __fragmentation_index(order, &info);
114256de7263SMel Gorman }
1143d7a5752cSMel Gorman #endif
1144d7a5752cSMel Gorman 
1145ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1146ebc5d83dSKonstantin Khlebnikov     defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1147fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
1148fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
1149fa25c503SKOSAKI Motohiro #else
1150fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
1151fa25c503SKOSAKI Motohiro #endif
1152fa25c503SKOSAKI Motohiro 
1153fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
1154fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
1155fa25c503SKOSAKI Motohiro #else
1156fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
1157fa25c503SKOSAKI Motohiro #endif
1158fa25c503SKOSAKI Motohiro 
1159fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
1160fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1161fa25c503SKOSAKI Motohiro #else
1162fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
1163fa25c503SKOSAKI Motohiro #endif
1164fa25c503SKOSAKI Motohiro 
1165fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1166fa25c503SKOSAKI Motohiro 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
1167fa25c503SKOSAKI Motohiro 
1168fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
11698d92890bSNeilBrown 	/* enum zone_stat_item counters */
1170fa25c503SKOSAKI Motohiro 	"nr_free_pages",
117171c799f4SMinchan Kim 	"nr_zone_inactive_anon",
117271c799f4SMinchan Kim 	"nr_zone_active_anon",
117371c799f4SMinchan Kim 	"nr_zone_inactive_file",
117471c799f4SMinchan Kim 	"nr_zone_active_file",
117571c799f4SMinchan Kim 	"nr_zone_unevictable",
11765a1c84b4SMel Gorman 	"nr_zone_write_pending",
1177fa25c503SKOSAKI Motohiro 	"nr_mlock",
1178fa25c503SKOSAKI Motohiro 	"nr_bounce",
117991537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC)
118091537feeSMinchan Kim 	"nr_zspages",
118191537feeSMinchan Kim #endif
11823a321d2aSKemi Wang 	"nr_free_cma",
11833a321d2aSKemi Wang 
11843a321d2aSKemi Wang 	/* enum numa_stat_item counters */
1185fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1186fa25c503SKOSAKI Motohiro 	"numa_hit",
1187fa25c503SKOSAKI Motohiro 	"numa_miss",
1188fa25c503SKOSAKI Motohiro 	"numa_foreign",
1189fa25c503SKOSAKI Motohiro 	"numa_interleave",
1190fa25c503SKOSAKI Motohiro 	"numa_local",
1191fa25c503SKOSAKI Motohiro 	"numa_other",
1192fa25c503SKOSAKI Motohiro #endif
119309316c09SKonstantin Khlebnikov 
11949d7ea9a2SKonstantin Khlebnikov 	/* enum node_stat_item counters */
1195599d0c95SMel Gorman 	"nr_inactive_anon",
1196599d0c95SMel Gorman 	"nr_active_anon",
1197599d0c95SMel Gorman 	"nr_inactive_file",
1198599d0c95SMel Gorman 	"nr_active_file",
1199599d0c95SMel Gorman 	"nr_unevictable",
1200385386cfSJohannes Weiner 	"nr_slab_reclaimable",
1201385386cfSJohannes Weiner 	"nr_slab_unreclaimable",
1202599d0c95SMel Gorman 	"nr_isolated_anon",
1203599d0c95SMel Gorman 	"nr_isolated_file",
120468d48e6aSJohannes Weiner 	"workingset_nodes",
1205170b04b7SJoonsoo Kim 	"workingset_refault_anon",
1206170b04b7SJoonsoo Kim 	"workingset_refault_file",
1207170b04b7SJoonsoo Kim 	"workingset_activate_anon",
1208170b04b7SJoonsoo Kim 	"workingset_activate_file",
1209170b04b7SJoonsoo Kim 	"workingset_restore_anon",
1210170b04b7SJoonsoo Kim 	"workingset_restore_file",
12111e6b1085SMel Gorman 	"workingset_nodereclaim",
121250658e2eSMel Gorman 	"nr_anon_pages",
121350658e2eSMel Gorman 	"nr_mapped",
121411fb9989SMel Gorman 	"nr_file_pages",
121511fb9989SMel Gorman 	"nr_dirty",
121611fb9989SMel Gorman 	"nr_writeback",
121711fb9989SMel Gorman 	"nr_writeback_temp",
121811fb9989SMel Gorman 	"nr_shmem",
121911fb9989SMel Gorman 	"nr_shmem_hugepages",
122011fb9989SMel Gorman 	"nr_shmem_pmdmapped",
122160fbf0abSSong Liu 	"nr_file_hugepages",
122260fbf0abSSong Liu 	"nr_file_pmdmapped",
122311fb9989SMel Gorman 	"nr_anon_transparent_hugepages",
1224c4a25635SMel Gorman 	"nr_vmscan_write",
1225c4a25635SMel Gorman 	"nr_vmscan_immediate_reclaim",
1226c4a25635SMel Gorman 	"nr_dirtied",
1227c4a25635SMel Gorman 	"nr_written",
1228b29940c1SVlastimil Babka 	"nr_kernel_misc_reclaimable",
12291970dc6fSJohn Hubbard 	"nr_foll_pin_acquired",
12301970dc6fSJohn Hubbard 	"nr_foll_pin_released",
1231991e7673SShakeel Butt 	"nr_kernel_stack",
1232991e7673SShakeel Butt #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1233991e7673SShakeel Butt 	"nr_shadow_call_stack",
1234991e7673SShakeel Butt #endif
1235f0c0c115SShakeel Butt 	"nr_page_table_pages",
1236b6038942SShakeel Butt #ifdef CONFIG_SWAP
1237b6038942SShakeel Butt 	"nr_swapcached",
1238b6038942SShakeel Butt #endif
1239599d0c95SMel Gorman 
124009316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
1241fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
1242fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
1243fa25c503SKOSAKI Motohiro 
1244ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
124509316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
1246fa25c503SKOSAKI Motohiro 	"pgpgin",
1247fa25c503SKOSAKI Motohiro 	"pgpgout",
1248fa25c503SKOSAKI Motohiro 	"pswpin",
1249fa25c503SKOSAKI Motohiro 	"pswpout",
1250fa25c503SKOSAKI Motohiro 
1251fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
12527cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("allocstall")
12537cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("pgskip")
1254fa25c503SKOSAKI Motohiro 
1255fa25c503SKOSAKI Motohiro 	"pgfree",
1256fa25c503SKOSAKI Motohiro 	"pgactivate",
1257fa25c503SKOSAKI Motohiro 	"pgdeactivate",
1258f7ad2a6cSShaohua Li 	"pglazyfree",
1259fa25c503SKOSAKI Motohiro 
1260fa25c503SKOSAKI Motohiro 	"pgfault",
1261fa25c503SKOSAKI Motohiro 	"pgmajfault",
1262854e9ed0SMinchan Kim 	"pglazyfreed",
1263fa25c503SKOSAKI Motohiro 
1264599d0c95SMel Gorman 	"pgrefill",
1265798a6b87SPeter Xu 	"pgreuse",
1266599d0c95SMel Gorman 	"pgsteal_kswapd",
1267599d0c95SMel Gorman 	"pgsteal_direct",
1268668e4147SYang Shi 	"pgdemote_kswapd",
1269668e4147SYang Shi 	"pgdemote_direct",
1270599d0c95SMel Gorman 	"pgscan_kswapd",
1271599d0c95SMel Gorman 	"pgscan_direct",
127268243e76SMel Gorman 	"pgscan_direct_throttle",
1273497a6c1bSJohannes Weiner 	"pgscan_anon",
1274497a6c1bSJohannes Weiner 	"pgscan_file",
1275497a6c1bSJohannes Weiner 	"pgsteal_anon",
1276497a6c1bSJohannes Weiner 	"pgsteal_file",
1277fa25c503SKOSAKI Motohiro 
1278fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1279fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
1280fa25c503SKOSAKI Motohiro #endif
1281fa25c503SKOSAKI Motohiro 	"pginodesteal",
1282fa25c503SKOSAKI Motohiro 	"slabs_scanned",
1283fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
1284fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
1285fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
1286fa25c503SKOSAKI Motohiro 	"pageoutrun",
1287fa25c503SKOSAKI Motohiro 
1288fa25c503SKOSAKI Motohiro 	"pgrotated",
1289fa25c503SKOSAKI Motohiro 
12905509a5d2SDave Hansen 	"drop_pagecache",
12915509a5d2SDave Hansen 	"drop_slab",
12928e675f7aSKonstantin Khlebnikov 	"oom_kill",
12935509a5d2SDave Hansen 
129403c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
129503c5a6e1SMel Gorman 	"numa_pte_updates",
129672403b4aSMel Gorman 	"numa_huge_pte_updates",
129703c5a6e1SMel Gorman 	"numa_hint_faults",
129803c5a6e1SMel Gorman 	"numa_hint_faults_local",
129903c5a6e1SMel Gorman 	"numa_pages_migrated",
130003c5a6e1SMel Gorman #endif
13015647bc29SMel Gorman #ifdef CONFIG_MIGRATION
13025647bc29SMel Gorman 	"pgmigrate_success",
13035647bc29SMel Gorman 	"pgmigrate_fail",
13041a5bae25SAnshuman Khandual 	"thp_migration_success",
13051a5bae25SAnshuman Khandual 	"thp_migration_fail",
13061a5bae25SAnshuman Khandual 	"thp_migration_split",
13075647bc29SMel Gorman #endif
1308fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
1309397487dbSMel Gorman 	"compact_migrate_scanned",
1310397487dbSMel Gorman 	"compact_free_scanned",
1311397487dbSMel Gorman 	"compact_isolated",
1312fa25c503SKOSAKI Motohiro 	"compact_stall",
1313fa25c503SKOSAKI Motohiro 	"compact_fail",
1314fa25c503SKOSAKI Motohiro 	"compact_success",
1315698b1b30SVlastimil Babka 	"compact_daemon_wake",
13167f354a54SDavid Rientjes 	"compact_daemon_migrate_scanned",
13177f354a54SDavid Rientjes 	"compact_daemon_free_scanned",
1318fa25c503SKOSAKI Motohiro #endif
1319fa25c503SKOSAKI Motohiro 
1320fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
1321fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
1322fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
1323fa25c503SKOSAKI Motohiro #endif
1324bbb26920SMinchan Kim #ifdef CONFIG_CMA
1325bbb26920SMinchan Kim 	"cma_alloc_success",
1326bbb26920SMinchan Kim 	"cma_alloc_fail",
1327bbb26920SMinchan Kim #endif
1328fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
1329fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
1330fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
1331fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
1332fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
1333fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
1334fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
1335fa25c503SKOSAKI Motohiro 
1336fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1337fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
1338fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
133985b9f46eSDavid Rientjes 	"thp_fault_fallback_charge",
1340fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
1341fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
134295ecedcdSKirill A. Shutemov 	"thp_file_alloc",
1343dcdf11eeSDavid Rientjes 	"thp_file_fallback",
134485b9f46eSDavid Rientjes 	"thp_file_fallback_charge",
134595ecedcdSKirill A. Shutemov 	"thp_file_mapped",
1346122afea9SKirill A. Shutemov 	"thp_split_page",
1347122afea9SKirill A. Shutemov 	"thp_split_page_failed",
1348f9719a03SKirill A. Shutemov 	"thp_deferred_split_page",
1349122afea9SKirill A. Shutemov 	"thp_split_pmd",
1350ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1351ce9311cfSYisheng Xie 	"thp_split_pud",
1352ce9311cfSYisheng Xie #endif
1353d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
1354d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
1355225311a4SHuang Ying 	"thp_swpout",
1356fe490cc0SHuang Ying 	"thp_swpout_fallback",
1357fa25c503SKOSAKI Motohiro #endif
135809316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
135909316c09SKonstantin Khlebnikov 	"balloon_inflate",
136009316c09SKonstantin Khlebnikov 	"balloon_deflate",
136109316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
136209316c09SKonstantin Khlebnikov 	"balloon_migrate",
136309316c09SKonstantin Khlebnikov #endif
136409316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
1365ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
13669824cf97SDave Hansen 	"nr_tlb_remote_flush",
13679824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
13689824cf97SDave Hansen 	"nr_tlb_local_flush_all",
13699824cf97SDave Hansen 	"nr_tlb_local_flush_one",
1370ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
1371fa25c503SKOSAKI Motohiro 
13724f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE
13734f115147SDavidlohr Bueso 	"vmacache_find_calls",
13744f115147SDavidlohr Bueso 	"vmacache_find_hits",
13754f115147SDavidlohr Bueso #endif
1376cbc65df2SHuang Ying #ifdef CONFIG_SWAP
1377cbc65df2SHuang Ying 	"swap_ra",
1378cbc65df2SHuang Ying 	"swap_ra_hit",
1379cbc65df2SHuang Ying #endif
1380575299eaSSaravanan D #ifdef CONFIG_X86
1381575299eaSSaravanan D 	"direct_map_level2_splits",
1382575299eaSSaravanan D 	"direct_map_level3_splits",
1383575299eaSSaravanan D #endif
1384ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
1385fa25c503SKOSAKI Motohiro };
1386ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
1387fa25c503SKOSAKI Motohiro 
13883c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
13893c486871SAndrew Morton      defined(CONFIG_PROC_FS)
13903c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
13913c486871SAndrew Morton {
13923c486871SAndrew Morton 	pg_data_t *pgdat;
13933c486871SAndrew Morton 	loff_t node = *pos;
13943c486871SAndrew Morton 
13953c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
13963c486871SAndrew Morton 	     pgdat && node;
13973c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
13983c486871SAndrew Morton 		--node;
13993c486871SAndrew Morton 
14003c486871SAndrew Morton 	return pgdat;
14013c486871SAndrew Morton }
14023c486871SAndrew Morton 
14033c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
14043c486871SAndrew Morton {
14053c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
14063c486871SAndrew Morton 
14073c486871SAndrew Morton 	(*pos)++;
14083c486871SAndrew Morton 	return next_online_pgdat(pgdat);
14093c486871SAndrew Morton }
14103c486871SAndrew Morton 
14113c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
14123c486871SAndrew Morton {
14133c486871SAndrew Morton }
14143c486871SAndrew Morton 
1415b2bd8598SDavid Rientjes /*
1416b2bd8598SDavid Rientjes  * Walk zones in a node and print using a callback.
1417b2bd8598SDavid Rientjes  * If @assert_populated is true, only use callback for zones that are populated.
1418b2bd8598SDavid Rientjes  */
14193c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1420727c080fSVinayak Menon 		bool assert_populated, bool nolock,
14213c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
14223c486871SAndrew Morton {
14233c486871SAndrew Morton 	struct zone *zone;
14243c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
14253c486871SAndrew Morton 	unsigned long flags;
14263c486871SAndrew Morton 
14273c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1428b2bd8598SDavid Rientjes 		if (assert_populated && !populated_zone(zone))
14293c486871SAndrew Morton 			continue;
14303c486871SAndrew Morton 
1431727c080fSVinayak Menon 		if (!nolock)
14323c486871SAndrew Morton 			spin_lock_irqsave(&zone->lock, flags);
14333c486871SAndrew Morton 		print(m, pgdat, zone);
1434727c080fSVinayak Menon 		if (!nolock)
14353c486871SAndrew Morton 			spin_unlock_irqrestore(&zone->lock, flags);
14363c486871SAndrew Morton 	}
14373c486871SAndrew Morton }
14383c486871SAndrew Morton #endif
14393c486871SAndrew Morton 
1440d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
1441467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1442467c996cSMel Gorman 						struct zone *zone)
1443467c996cSMel Gorman {
1444467c996cSMel Gorman 	int order;
1445467c996cSMel Gorman 
1446f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1447f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
1448f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1449f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1450f6ac2354SChristoph Lameter }
1451467c996cSMel Gorman 
1452467c996cSMel Gorman /*
1453467c996cSMel Gorman  * This walks the free areas for each zone.
1454467c996cSMel Gorman  */
1455467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
1456467c996cSMel Gorman {
1457467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1458727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1459467c996cSMel Gorman 	return 0;
1460467c996cSMel Gorman }
1461467c996cSMel Gorman 
1462467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
1463467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1464467c996cSMel Gorman {
1465467c996cSMel Gorman 	int order, mtype;
1466467c996cSMel Gorman 
1467467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1468467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
1469467c996cSMel Gorman 					pgdat->node_id,
1470467c996cSMel Gorman 					zone->name,
1471467c996cSMel Gorman 					migratetype_names[mtype]);
1472467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
1473467c996cSMel Gorman 			unsigned long freecount = 0;
1474467c996cSMel Gorman 			struct free_area *area;
1475467c996cSMel Gorman 			struct list_head *curr;
147693b3a674SMichal Hocko 			bool overflow = false;
1477467c996cSMel Gorman 
1478467c996cSMel Gorman 			area = &(zone->free_area[order]);
1479467c996cSMel Gorman 
148093b3a674SMichal Hocko 			list_for_each(curr, &area->free_list[mtype]) {
148193b3a674SMichal Hocko 				/*
148293b3a674SMichal Hocko 				 * Cap the free_list iteration because it might
148393b3a674SMichal Hocko 				 * be really large and we are under a spinlock
148493b3a674SMichal Hocko 				 * so a long time spent here could trigger a
148593b3a674SMichal Hocko 				 * hard lockup detector. Anyway this is a
148693b3a674SMichal Hocko 				 * debugging tool so knowing there is a handful
148793b3a674SMichal Hocko 				 * of pages of this order should be more than
148893b3a674SMichal Hocko 				 * sufficient.
148993b3a674SMichal Hocko 				 */
149093b3a674SMichal Hocko 				if (++freecount >= 100000) {
149193b3a674SMichal Hocko 					overflow = true;
149293b3a674SMichal Hocko 					break;
149393b3a674SMichal Hocko 				}
149493b3a674SMichal Hocko 			}
149593b3a674SMichal Hocko 			seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
149693b3a674SMichal Hocko 			spin_unlock_irq(&zone->lock);
149793b3a674SMichal Hocko 			cond_resched();
149893b3a674SMichal Hocko 			spin_lock_irq(&zone->lock);
1499467c996cSMel Gorman 		}
1500467c996cSMel Gorman 		seq_putc(m, '\n');
1501467c996cSMel Gorman 	}
1502467c996cSMel Gorman }
1503467c996cSMel Gorman 
1504467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
150533090af9SMiaohe Lin static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
1506467c996cSMel Gorman {
1507467c996cSMel Gorman 	int order;
1508467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1509467c996cSMel Gorman 
1510467c996cSMel Gorman 	/* Print header */
1511467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1512467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
1513467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
1514467c996cSMel Gorman 	seq_putc(m, '\n');
1515467c996cSMel Gorman 
1516727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1517467c996cSMel Gorman }
1518467c996cSMel Gorman 
1519467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1520467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1521467c996cSMel Gorman {
1522467c996cSMel Gorman 	int mtype;
1523467c996cSMel Gorman 	unsigned long pfn;
1524467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1525108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1526467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1527467c996cSMel Gorman 
1528467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1529467c996cSMel Gorman 		struct page *page;
1530467c996cSMel Gorman 
1531d336e94eSMichal Hocko 		page = pfn_to_online_page(pfn);
1532d336e94eSMichal Hocko 		if (!page)
1533467c996cSMel Gorman 			continue;
1534467c996cSMel Gorman 
1535a91c43c7SJoonsoo Kim 		if (page_zone(page) != zone)
1536a91c43c7SJoonsoo Kim 			continue;
1537a91c43c7SJoonsoo Kim 
1538467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1539467c996cSMel Gorman 
1540e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1541467c996cSMel Gorman 			count[mtype]++;
1542467c996cSMel Gorman 	}
1543467c996cSMel Gorman 
1544467c996cSMel Gorman 	/* Print counts */
1545467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1546467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1547467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1548467c996cSMel Gorman 	seq_putc(m, '\n');
1549467c996cSMel Gorman }
1550467c996cSMel Gorman 
1551f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */
155233090af9SMiaohe Lin static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1553467c996cSMel Gorman {
1554467c996cSMel Gorman 	int mtype;
1555467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1556467c996cSMel Gorman 
1557467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1558467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1559467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1560467c996cSMel Gorman 	seq_putc(m, '\n');
1561727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false,
1562727c080fSVinayak Menon 		pagetypeinfo_showblockcount_print);
1563467c996cSMel Gorman }
1564467c996cSMel Gorman 
156548c96a36SJoonsoo Kim /*
156648c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
156748c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
156848c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
156948c96a36SJoonsoo Kim  * to determine what is going on
157048c96a36SJoonsoo Kim  */
157148c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
157248c96a36SJoonsoo Kim {
157348c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
157448c96a36SJoonsoo Kim 	int mtype;
157548c96a36SJoonsoo Kim 
15767dd80b8aSVlastimil Babka 	if (!static_branch_unlikely(&page_owner_inited))
157748c96a36SJoonsoo Kim 		return;
157848c96a36SJoonsoo Kim 
157948c96a36SJoonsoo Kim 	drain_all_pages(NULL);
158048c96a36SJoonsoo Kim 
158148c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
158248c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
158348c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
158448c96a36SJoonsoo Kim 	seq_putc(m, '\n');
158548c96a36SJoonsoo Kim 
1586727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, true,
1587727c080fSVinayak Menon 		pagetypeinfo_showmixedcount_print);
158848c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
158948c96a36SJoonsoo Kim }
159048c96a36SJoonsoo Kim 
1591467c996cSMel Gorman /*
1592467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1593467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1594467c996cSMel Gorman  */
1595467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1596467c996cSMel Gorman {
1597467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1598467c996cSMel Gorman 
159941b25a37SKOSAKI Motohiro 	/* check memoryless node */
1600a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
160141b25a37SKOSAKI Motohiro 		return 0;
160241b25a37SKOSAKI Motohiro 
1603467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1604467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1605467c996cSMel Gorman 	seq_putc(m, '\n');
1606467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1607467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
160848c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1609467c996cSMel Gorman 
1610f6ac2354SChristoph Lameter 	return 0;
1611f6ac2354SChristoph Lameter }
1612f6ac2354SChristoph Lameter 
16138f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1614f6ac2354SChristoph Lameter 	.start	= frag_start,
1615f6ac2354SChristoph Lameter 	.next	= frag_next,
1616f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1617f6ac2354SChristoph Lameter 	.show	= frag_show,
1618f6ac2354SChristoph Lameter };
1619f6ac2354SChristoph Lameter 
162074e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1621467c996cSMel Gorman 	.start	= frag_start,
1622467c996cSMel Gorman 	.next	= frag_next,
1623467c996cSMel Gorman 	.stop	= frag_stop,
1624467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1625467c996cSMel Gorman };
1626467c996cSMel Gorman 
1627e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1628e2ecc8a7SMel Gorman {
1629e2ecc8a7SMel Gorman 	int zid;
1630e2ecc8a7SMel Gorman 
1631e2ecc8a7SMel Gorman 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1632e2ecc8a7SMel Gorman 		struct zone *compare = &pgdat->node_zones[zid];
1633e2ecc8a7SMel Gorman 
1634e2ecc8a7SMel Gorman 		if (populated_zone(compare))
1635e2ecc8a7SMel Gorman 			return zone == compare;
1636e2ecc8a7SMel Gorman 	}
1637e2ecc8a7SMel Gorman 
1638e2ecc8a7SMel Gorman 	return false;
1639e2ecc8a7SMel Gorman }
1640e2ecc8a7SMel Gorman 
1641467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1642467c996cSMel Gorman 							struct zone *zone)
1643f6ac2354SChristoph Lameter {
1644f6ac2354SChristoph Lameter 	int i;
1645f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1646e2ecc8a7SMel Gorman 	if (is_zone_first_populated(pgdat, zone)) {
1647e2ecc8a7SMel Gorman 		seq_printf(m, "\n  per-node stats");
1648e2ecc8a7SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
164969473e5dSMuchun Song 			unsigned long pages = node_page_state_pages(pgdat, i);
165069473e5dSMuchun Song 
165169473e5dSMuchun Song 			if (vmstat_item_print_in_thp(i))
165269473e5dSMuchun Song 				pages /= HPAGE_PMD_NR;
16539d7ea9a2SKonstantin Khlebnikov 			seq_printf(m, "\n      %-12s %lu", node_stat_name(i),
165469473e5dSMuchun Song 				   pages);
1655e2ecc8a7SMel Gorman 		}
1656e2ecc8a7SMel Gorman 	}
1657f6ac2354SChristoph Lameter 	seq_printf(m,
1658f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1659f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1660f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1661f6ac2354SChristoph Lameter 		   "\n        high     %lu"
1662f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
16639feedc9dSJiang Liu 		   "\n        present  %lu"
16643c381db1SDavid Hildenbrand 		   "\n        managed  %lu"
16653c381db1SDavid Hildenbrand 		   "\n        cma      %lu",
166688f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
166741858966SMel Gorman 		   min_wmark_pages(zone),
166841858966SMel Gorman 		   low_wmark_pages(zone),
166941858966SMel Gorman 		   high_wmark_pages(zone),
1670f6ac2354SChristoph Lameter 		   zone->spanned_pages,
16719feedc9dSJiang Liu 		   zone->present_pages,
16723c381db1SDavid Hildenbrand 		   zone_managed_pages(zone),
16733c381db1SDavid Hildenbrand 		   zone_cma_pages(zone));
16742244b95aSChristoph Lameter 
1675f6ac2354SChristoph Lameter 	seq_printf(m,
16763484b2deSMel Gorman 		   "\n        protection: (%ld",
1677f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1678f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
16793484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
16807dfb8bf3SDavid Rientjes 	seq_putc(m, ')');
16817dfb8bf3SDavid Rientjes 
1682a8a4b7aeSBaoquan He 	/* If unpopulated, no other information is useful */
1683a8a4b7aeSBaoquan He 	if (!populated_zone(zone)) {
1684a8a4b7aeSBaoquan He 		seq_putc(m, '\n');
1685a8a4b7aeSBaoquan He 		return;
1686a8a4b7aeSBaoquan He 	}
1687a8a4b7aeSBaoquan He 
16887dfb8bf3SDavid Rientjes 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
16899d7ea9a2SKonstantin Khlebnikov 		seq_printf(m, "\n      %-12s %lu", zone_stat_name(i),
16907dfb8bf3SDavid Rientjes 			   zone_page_state(zone, i));
16917dfb8bf3SDavid Rientjes 
16923a321d2aSKemi Wang #ifdef CONFIG_NUMA
1693f19298b9SMel Gorman 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
16949d7ea9a2SKonstantin Khlebnikov 		seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
1695f19298b9SMel Gorman 			   zone_numa_event_state(zone, i));
16963a321d2aSKemi Wang #endif
16973a321d2aSKemi Wang 
16987dfb8bf3SDavid Rientjes 	seq_printf(m, "\n  pagesets");
1699f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
170028f836b6SMel Gorman 		struct per_cpu_pages *pcp;
170128f836b6SMel Gorman 		struct per_cpu_zonestat __maybe_unused *pzstats;
1702f6ac2354SChristoph Lameter 
170328f836b6SMel Gorman 		pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
1704f6ac2354SChristoph Lameter 		seq_printf(m,
17053dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1706f6ac2354SChristoph Lameter 			   "\n              count: %i"
1707f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1708f6ac2354SChristoph Lameter 			   "\n              batch: %i",
17093dfa5721SChristoph Lameter 			   i,
171028f836b6SMel Gorman 			   pcp->count,
171128f836b6SMel Gorman 			   pcp->high,
171228f836b6SMel Gorman 			   pcp->batch);
1713df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
171428f836b6SMel Gorman 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
1715df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
171628f836b6SMel Gorman 				pzstats->stat_threshold);
1717df9ecabaSChristoph Lameter #endif
1718f6ac2354SChristoph Lameter 	}
1719f6ac2354SChristoph Lameter 	seq_printf(m,
1720599d0c95SMel Gorman 		   "\n  node_unreclaimable:  %u"
17213a50d14dSAndrey Ryabinin 		   "\n  start_pfn:           %lu",
1722c73322d0SJohannes Weiner 		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
17233a50d14dSAndrey Ryabinin 		   zone->zone_start_pfn);
1724f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1725f6ac2354SChristoph Lameter }
1726467c996cSMel Gorman 
1727467c996cSMel Gorman /*
1728b2bd8598SDavid Rientjes  * Output information about zones in @pgdat.  All zones are printed regardless
1729b2bd8598SDavid Rientjes  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1730b2bd8598SDavid Rientjes  * set of all zones and userspace would not be aware of such zones if they are
1731b2bd8598SDavid Rientjes  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1732467c996cSMel Gorman  */
1733467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1734467c996cSMel Gorman {
1735467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1736727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1737f6ac2354SChristoph Lameter 	return 0;
1738f6ac2354SChristoph Lameter }
1739f6ac2354SChristoph Lameter 
17405c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1741f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1742f6ac2354SChristoph Lameter 			       * fragmentation. */
1743f6ac2354SChristoph Lameter 	.next	= frag_next,
1744f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1745f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1746f6ac2354SChristoph Lameter };
1747f6ac2354SChristoph Lameter 
17489d7ea9a2SKonstantin Khlebnikov #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1749f19298b9SMel Gorman 			 NR_VM_NUMA_EVENT_ITEMS + \
17509d7ea9a2SKonstantin Khlebnikov 			 NR_VM_NODE_STAT_ITEMS + \
17519d7ea9a2SKonstantin Khlebnikov 			 NR_VM_WRITEBACK_STAT_ITEMS + \
17529d7ea9a2SKonstantin Khlebnikov 			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
17539d7ea9a2SKonstantin Khlebnikov 			  NR_VM_EVENT_ITEMS : 0))
175479da826aSMichael Rubin 
1755f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1756f6ac2354SChristoph Lameter {
17572244b95aSChristoph Lameter 	unsigned long *v;
17589d7ea9a2SKonstantin Khlebnikov 	int i;
1759f6ac2354SChristoph Lameter 
17609d7ea9a2SKonstantin Khlebnikov 	if (*pos >= NR_VMSTAT_ITEMS)
1761f6ac2354SChristoph Lameter 		return NULL;
1762f6ac2354SChristoph Lameter 
17639d7ea9a2SKonstantin Khlebnikov 	BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1764f19298b9SMel Gorman 	fold_vm_numa_events();
17659d7ea9a2SKonstantin Khlebnikov 	v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
17662244b95aSChristoph Lameter 	m->private = v;
17672244b95aSChristoph Lameter 	if (!v)
1768f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
17692244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1770c41f012aSMichal Hocko 		v[i] = global_zone_page_state(i);
177179da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
177279da826aSMichael Rubin 
17733a321d2aSKemi Wang #ifdef CONFIG_NUMA
1774f19298b9SMel Gorman 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1775f19298b9SMel Gorman 		v[i] = global_numa_event_state(i);
1776f19298b9SMel Gorman 	v += NR_VM_NUMA_EVENT_ITEMS;
17773a321d2aSKemi Wang #endif
17783a321d2aSKemi Wang 
177969473e5dSMuchun Song 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1780ea426c2aSRoman Gushchin 		v[i] = global_node_page_state_pages(i);
178169473e5dSMuchun Song 		if (vmstat_item_print_in_thp(i))
178269473e5dSMuchun Song 			v[i] /= HPAGE_PMD_NR;
178369473e5dSMuchun Song 	}
178475ef7184SMel Gorman 	v += NR_VM_NODE_STAT_ITEMS;
178575ef7184SMel Gorman 
178679da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
178779da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
178879da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
178979da826aSMichael Rubin 
1790f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
179179da826aSMichael Rubin 	all_vm_events(v);
179279da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
179379da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1794f8891e5eSChristoph Lameter #endif
1795ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1796f6ac2354SChristoph Lameter }
1797f6ac2354SChristoph Lameter 
1798f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1799f6ac2354SChristoph Lameter {
1800f6ac2354SChristoph Lameter 	(*pos)++;
18019d7ea9a2SKonstantin Khlebnikov 	if (*pos >= NR_VMSTAT_ITEMS)
1802f6ac2354SChristoph Lameter 		return NULL;
1803f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1804f6ac2354SChristoph Lameter }
1805f6ac2354SChristoph Lameter 
1806f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1807f6ac2354SChristoph Lameter {
1808f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1809f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
181068ba0326SAlexey Dobriyan 
181168ba0326SAlexey Dobriyan 	seq_puts(m, vmstat_text[off]);
181275ba1d07SJoe Perches 	seq_put_decimal_ull(m, " ", *l);
181368ba0326SAlexey Dobriyan 	seq_putc(m, '\n');
18148d92890bSNeilBrown 
18158d92890bSNeilBrown 	if (off == NR_VMSTAT_ITEMS - 1) {
18168d92890bSNeilBrown 		/*
18178d92890bSNeilBrown 		 * We've come to the end - add any deprecated counters to avoid
18188d92890bSNeilBrown 		 * breaking userspace which might depend on them being present.
18198d92890bSNeilBrown 		 */
18208d92890bSNeilBrown 		seq_puts(m, "nr_unstable 0\n");
18218d92890bSNeilBrown 	}
1822f6ac2354SChristoph Lameter 	return 0;
1823f6ac2354SChristoph Lameter }
1824f6ac2354SChristoph Lameter 
1825f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1826f6ac2354SChristoph Lameter {
1827f6ac2354SChristoph Lameter 	kfree(m->private);
1828f6ac2354SChristoph Lameter 	m->private = NULL;
1829f6ac2354SChristoph Lameter }
1830f6ac2354SChristoph Lameter 
1831b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1832f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1833f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1834f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1835f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1836f6ac2354SChristoph Lameter };
1837f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1838f6ac2354SChristoph Lameter 
1839df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1840d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
184177461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1842d1187ed2SChristoph Lameter 
184352b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS
184452b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work)
184552b6f46bSHugh Dickins {
184652b6f46bSHugh Dickins 	refresh_cpu_vm_stats(true);
184752b6f46bSHugh Dickins }
184852b6f46bSHugh Dickins 
184952b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write,
185032927393SChristoph Hellwig 		   void *buffer, size_t *lenp, loff_t *ppos)
185152b6f46bSHugh Dickins {
185252b6f46bSHugh Dickins 	long val;
185352b6f46bSHugh Dickins 	int err;
185452b6f46bSHugh Dickins 	int i;
185552b6f46bSHugh Dickins 
185652b6f46bSHugh Dickins 	/*
185752b6f46bSHugh Dickins 	 * The regular update, every sysctl_stat_interval, may come later
185852b6f46bSHugh Dickins 	 * than expected: leaving a significant amount in per_cpu buckets.
185952b6f46bSHugh Dickins 	 * This is particularly misleading when checking a quantity of HUGE
186052b6f46bSHugh Dickins 	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
186152b6f46bSHugh Dickins 	 * which can equally be echo'ed to or cat'ted from (by root),
186252b6f46bSHugh Dickins 	 * can be used to update the stats just before reading them.
186352b6f46bSHugh Dickins 	 *
1864c41f012aSMichal Hocko 	 * Oh, and since global_zone_page_state() etc. are so careful to hide
186552b6f46bSHugh Dickins 	 * transiently negative values, report an error here if any of
186652b6f46bSHugh Dickins 	 * the stats is negative, so we know to go looking for imbalance.
186752b6f46bSHugh Dickins 	 */
186852b6f46bSHugh Dickins 	err = schedule_on_each_cpu(refresh_vm_stats);
186952b6f46bSHugh Dickins 	if (err)
187052b6f46bSHugh Dickins 		return err;
187152b6f46bSHugh Dickins 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
187275083aaeSHugh Dickins 		/*
187375083aaeSHugh Dickins 		 * Skip checking stats known to go negative occasionally.
187475083aaeSHugh Dickins 		 */
187575083aaeSHugh Dickins 		switch (i) {
187675083aaeSHugh Dickins 		case NR_ZONE_WRITE_PENDING:
187775083aaeSHugh Dickins 		case NR_FREE_CMA_PAGES:
187875083aaeSHugh Dickins 			continue;
187975083aaeSHugh Dickins 		}
188075ef7184SMel Gorman 		val = atomic_long_read(&vm_zone_stat[i]);
188152b6f46bSHugh Dickins 		if (val < 0) {
188252b6f46bSHugh Dickins 			pr_warn("%s: %s %ld\n",
18839d7ea9a2SKonstantin Khlebnikov 				__func__, zone_stat_name(i), val);
188452b6f46bSHugh Dickins 		}
188552b6f46bSHugh Dickins 	}
188676d8cc3cSHugh Dickins 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
188775083aaeSHugh Dickins 		/*
188875083aaeSHugh Dickins 		 * Skip checking stats known to go negative occasionally.
188975083aaeSHugh Dickins 		 */
189075083aaeSHugh Dickins 		switch (i) {
189175083aaeSHugh Dickins 		case NR_WRITEBACK:
189275083aaeSHugh Dickins 			continue;
189375083aaeSHugh Dickins 		}
189476d8cc3cSHugh Dickins 		val = atomic_long_read(&vm_node_stat[i]);
189576d8cc3cSHugh Dickins 		if (val < 0) {
189676d8cc3cSHugh Dickins 			pr_warn("%s: %s %ld\n",
189776d8cc3cSHugh Dickins 				__func__, node_stat_name(i), val);
189876d8cc3cSHugh Dickins 		}
189976d8cc3cSHugh Dickins 	}
190052b6f46bSHugh Dickins 	if (write)
190152b6f46bSHugh Dickins 		*ppos += *lenp;
190252b6f46bSHugh Dickins 	else
190352b6f46bSHugh Dickins 		*lenp = 0;
190452b6f46bSHugh Dickins 	return 0;
190552b6f46bSHugh Dickins }
190652b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */
190752b6f46bSHugh Dickins 
1908d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1909d1187ed2SChristoph Lameter {
19100eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
19117cc36bbdSChristoph Lameter 		/*
19127cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
19137cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
19147cc36bbdSChristoph Lameter 		 * update worker thread.
19157cc36bbdSChristoph Lameter 		 */
1916ce612879SMichal Hocko 		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1917176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
191898f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1919f01f17d3SMichal Hocko 	}
1920d1187ed2SChristoph Lameter }
1921d1187ed2SChristoph Lameter 
19227cc36bbdSChristoph Lameter /*
19237cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
19247cc36bbdSChristoph Lameter  * an update is needed.
19257cc36bbdSChristoph Lameter  */
19267cc36bbdSChristoph Lameter static bool need_update(int cpu)
1927d1187ed2SChristoph Lameter {
19282bbd00aeSJohannes Weiner 	pg_data_t *last_pgdat = NULL;
19297cc36bbdSChristoph Lameter 	struct zone *zone;
1930d1187ed2SChristoph Lameter 
19317cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
193228f836b6SMel Gorman 		struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
19332bbd00aeSJohannes Weiner 		struct per_cpu_nodestat *n;
193428f836b6SMel Gorman 
19357cc36bbdSChristoph Lameter 		/*
19367cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
19377cc36bbdSChristoph Lameter 		 */
193864632fd3SMiaohe Lin 		if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
19397cc36bbdSChristoph Lameter 			return true;
1940f19298b9SMel Gorman 
19412bbd00aeSJohannes Weiner 		if (last_pgdat == zone->zone_pgdat)
19422bbd00aeSJohannes Weiner 			continue;
19432bbd00aeSJohannes Weiner 		last_pgdat = zone->zone_pgdat;
19442bbd00aeSJohannes Weiner 		n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
194564632fd3SMiaohe Lin 		if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
19462bbd00aeSJohannes Weiner 			return true;
19477cc36bbdSChristoph Lameter 	}
19487cc36bbdSChristoph Lameter 	return false;
19497cc36bbdSChristoph Lameter }
19507cc36bbdSChristoph Lameter 
19517b8da4c7SChristoph Lameter /*
19527b8da4c7SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
19537b8da4c7SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
19547b8da4c7SChristoph Lameter  * invoked when tick processing is not active.
19557b8da4c7SChristoph Lameter  */
1956f01f17d3SMichal Hocko void quiet_vmstat(void)
1957f01f17d3SMichal Hocko {
1958f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1959f01f17d3SMichal Hocko 		return;
1960f01f17d3SMichal Hocko 
19617b8da4c7SChristoph Lameter 	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1962f01f17d3SMichal Hocko 		return;
1963f01f17d3SMichal Hocko 
1964f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
1965f01f17d3SMichal Hocko 		return;
1966f01f17d3SMichal Hocko 
1967f01f17d3SMichal Hocko 	/*
1968f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
1969f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
1970f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
1971f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
1972f01f17d3SMichal Hocko 	 */
1973f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
1974f01f17d3SMichal Hocko }
1975f01f17d3SMichal Hocko 
19767cc36bbdSChristoph Lameter /*
19777cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
19787cc36bbdSChristoph Lameter  * differentials of processors that have their worker
19797cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
19807cc36bbdSChristoph Lameter  * inactivity.
19817cc36bbdSChristoph Lameter  */
19827cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
19837cc36bbdSChristoph Lameter 
19840eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
19857cc36bbdSChristoph Lameter 
19867cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
19877cc36bbdSChristoph Lameter {
19887cc36bbdSChristoph Lameter 	int cpu;
19897cc36bbdSChristoph Lameter 
19907625eccdSSebastian Andrzej Siewior 	cpus_read_lock();
19917cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
19927b8da4c7SChristoph Lameter 	for_each_online_cpu(cpu) {
1993f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
19947cc36bbdSChristoph Lameter 
19957b8da4c7SChristoph Lameter 		if (!delayed_work_pending(dw) && need_update(cpu))
1996ce612879SMichal Hocko 			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1997fbcc8183SJiang Biao 
1998fbcc8183SJiang Biao 		cond_resched();
1999f01f17d3SMichal Hocko 	}
20007625eccdSSebastian Andrzej Siewior 	cpus_read_unlock();
20017cc36bbdSChristoph Lameter 
20027cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
20037cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
20047cc36bbdSChristoph Lameter }
20057cc36bbdSChristoph Lameter 
20067cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
20077cc36bbdSChristoph Lameter {
20087cc36bbdSChristoph Lameter 	int cpu;
20097cc36bbdSChristoph Lameter 
20107cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
2011ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
20127cc36bbdSChristoph Lameter 			vmstat_update);
20137cc36bbdSChristoph Lameter 
20147cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
20157cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
2016d1187ed2SChristoph Lameter }
2017d1187ed2SChristoph Lameter 
201803e86dbaSTim Chen static void __init init_cpu_node_state(void)
201903e86dbaSTim Chen {
20204c501327SSebastian Andrzej Siewior 	int node;
202103e86dbaSTim Chen 
20224c501327SSebastian Andrzej Siewior 	for_each_online_node(node) {
20234c501327SSebastian Andrzej Siewior 		if (cpumask_weight(cpumask_of_node(node)) > 0)
20244c501327SSebastian Andrzej Siewior 			node_set_state(node, N_CPU);
20254c501327SSebastian Andrzej Siewior 	}
202603e86dbaSTim Chen }
202703e86dbaSTim Chen 
20285438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu)
2029807a1bd2SToshi Kani {
20305ee28a44SKAMEZAWA Hiroyuki 	refresh_zone_stat_thresholds();
2031ad596925SChristoph Lameter 	node_set_state(cpu_to_node(cpu), N_CPU);
20325438da97SSebastian Andrzej Siewior 	return 0;
2033df9ecabaSChristoph Lameter }
2034df9ecabaSChristoph Lameter 
20355438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu)
20365438da97SSebastian Andrzej Siewior {
20375438da97SSebastian Andrzej Siewior 	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
20385438da97SSebastian Andrzej Siewior 	return 0;
20395438da97SSebastian Andrzej Siewior }
20405438da97SSebastian Andrzej Siewior 
20415438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu)
20425438da97SSebastian Andrzej Siewior {
20435438da97SSebastian Andrzej Siewior 	const struct cpumask *node_cpus;
20445438da97SSebastian Andrzej Siewior 	int node;
20455438da97SSebastian Andrzej Siewior 
20465438da97SSebastian Andrzej Siewior 	node = cpu_to_node(cpu);
20475438da97SSebastian Andrzej Siewior 
20485438da97SSebastian Andrzej Siewior 	refresh_zone_stat_thresholds();
20495438da97SSebastian Andrzej Siewior 	node_cpus = cpumask_of_node(node);
20505438da97SSebastian Andrzej Siewior 	if (cpumask_weight(node_cpus) > 0)
20515438da97SSebastian Andrzej Siewior 		return 0;
20525438da97SSebastian Andrzej Siewior 
20535438da97SSebastian Andrzej Siewior 	node_clear_state(node, N_CPU);
20545438da97SSebastian Andrzej Siewior 	return 0;
20555438da97SSebastian Andrzej Siewior }
20565438da97SSebastian Andrzej Siewior 
20578f32f7e5SAlexey Dobriyan #endif
2058df9ecabaSChristoph Lameter 
2059ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq;
2060ce612879SMichal Hocko 
2061597b7305SMichal Hocko void __init init_mm_internals(void)
2062df9ecabaSChristoph Lameter {
2063ce612879SMichal Hocko 	int ret __maybe_unused;
20645438da97SSebastian Andrzej Siewior 
206580d136e1SMichal Hocko 	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2066ce612879SMichal Hocko 
2067ce612879SMichal Hocko #ifdef CONFIG_SMP
20685438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
20695438da97SSebastian Andrzej Siewior 					NULL, vmstat_cpu_dead);
20705438da97SSebastian Andrzej Siewior 	if (ret < 0)
20715438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'dead' hotplug state\n");
20725438da97SSebastian Andrzej Siewior 
20735438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
20745438da97SSebastian Andrzej Siewior 					vmstat_cpu_online,
20755438da97SSebastian Andrzej Siewior 					vmstat_cpu_down_prep);
20765438da97SSebastian Andrzej Siewior 	if (ret < 0)
20775438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'online' hotplug state\n");
20785438da97SSebastian Andrzej Siewior 
20797625eccdSSebastian Andrzej Siewior 	cpus_read_lock();
208003e86dbaSTim Chen 	init_cpu_node_state();
20817625eccdSSebastian Andrzej Siewior 	cpus_read_unlock();
2082d1187ed2SChristoph Lameter 
20837cc36bbdSChristoph Lameter 	start_shepherd_timer();
20848f32f7e5SAlexey Dobriyan #endif
20858f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
2086fddda2b7SChristoph Hellwig 	proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2087abaed011SMichal Hocko 	proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2088fddda2b7SChristoph Hellwig 	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2089fddda2b7SChristoph Hellwig 	proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
20908f32f7e5SAlexey Dobriyan #endif
2091df9ecabaSChristoph Lameter }
2092d7a5752cSMel Gorman 
2093d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2094d7a5752cSMel Gorman 
2095d7a5752cSMel Gorman /*
2096d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
2097d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
2098d7a5752cSMel Gorman  */
2099d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
2100d7a5752cSMel Gorman 				struct contig_page_info *info)
2101d7a5752cSMel Gorman {
2102d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
2103d7a5752cSMel Gorman 	if (info->free_pages == 0)
2104d7a5752cSMel Gorman 		return 1000;
2105d7a5752cSMel Gorman 
2106d7a5752cSMel Gorman 	/*
2107d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
2108d7a5752cSMel Gorman 	 * decimal places.
2109d7a5752cSMel Gorman 	 *
2110d7a5752cSMel Gorman 	 * 0 => no fragmentation
2111d7a5752cSMel Gorman 	 * 1 => high fragmentation
2112d7a5752cSMel Gorman 	 */
2113d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2114d7a5752cSMel Gorman 
2115d7a5752cSMel Gorman }
2116d7a5752cSMel Gorman 
2117d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
2118d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2119d7a5752cSMel Gorman {
2120d7a5752cSMel Gorman 	unsigned int order;
2121d7a5752cSMel Gorman 	int index;
2122d7a5752cSMel Gorman 	struct contig_page_info info;
2123d7a5752cSMel Gorman 
2124d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2125d7a5752cSMel Gorman 				pgdat->node_id,
2126d7a5752cSMel Gorman 				zone->name);
2127d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2128d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
2129d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
2130d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2131d7a5752cSMel Gorman 	}
2132d7a5752cSMel Gorman 
2133d7a5752cSMel Gorman 	seq_putc(m, '\n');
2134d7a5752cSMel Gorman }
2135d7a5752cSMel Gorman 
2136d7a5752cSMel Gorman /*
2137d7a5752cSMel Gorman  * Display unusable free space index
2138d7a5752cSMel Gorman  *
2139d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
2140d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
2141d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
2142d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
2143d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
2144d7a5752cSMel Gorman  */
2145d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
2146d7a5752cSMel Gorman {
2147d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2148d7a5752cSMel Gorman 
2149d7a5752cSMel Gorman 	/* check memoryless node */
2150a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
2151d7a5752cSMel Gorman 		return 0;
2152d7a5752cSMel Gorman 
2153727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2154d7a5752cSMel Gorman 
2155d7a5752cSMel Gorman 	return 0;
2156d7a5752cSMel Gorman }
2157d7a5752cSMel Gorman 
215801a99560SKefeng Wang static const struct seq_operations unusable_sops = {
2159d7a5752cSMel Gorman 	.start	= frag_start,
2160d7a5752cSMel Gorman 	.next	= frag_next,
2161d7a5752cSMel Gorman 	.stop	= frag_stop,
2162d7a5752cSMel Gorman 	.show	= unusable_show,
2163d7a5752cSMel Gorman };
2164d7a5752cSMel Gorman 
216501a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(unusable);
2166d7a5752cSMel Gorman 
2167f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
2168f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2169f1a5ab12SMel Gorman {
2170f1a5ab12SMel Gorman 	unsigned int order;
2171f1a5ab12SMel Gorman 	int index;
2172f1a5ab12SMel Gorman 
2173f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
2174f1a5ab12SMel Gorman 	struct contig_page_info info;
2175f1a5ab12SMel Gorman 
2176f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2177f1a5ab12SMel Gorman 				pgdat->node_id,
2178f1a5ab12SMel Gorman 				zone->name);
2179f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2180f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
218156de7263SMel Gorman 		index = __fragmentation_index(order, &info);
2182f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2183f1a5ab12SMel Gorman 	}
2184f1a5ab12SMel Gorman 
2185f1a5ab12SMel Gorman 	seq_putc(m, '\n');
2186f1a5ab12SMel Gorman }
2187f1a5ab12SMel Gorman 
2188f1a5ab12SMel Gorman /*
2189f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
2190f1a5ab12SMel Gorman  */
2191f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
2192f1a5ab12SMel Gorman {
2193f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2194f1a5ab12SMel Gorman 
2195727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2196f1a5ab12SMel Gorman 
2197f1a5ab12SMel Gorman 	return 0;
2198f1a5ab12SMel Gorman }
2199f1a5ab12SMel Gorman 
220001a99560SKefeng Wang static const struct seq_operations extfrag_sops = {
2201f1a5ab12SMel Gorman 	.start	= frag_start,
2202f1a5ab12SMel Gorman 	.next	= frag_next,
2203f1a5ab12SMel Gorman 	.stop	= frag_stop,
2204f1a5ab12SMel Gorman 	.show	= extfrag_show,
2205f1a5ab12SMel Gorman };
2206f1a5ab12SMel Gorman 
220701a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(extfrag);
2208f1a5ab12SMel Gorman 
2209d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
2210d7a5752cSMel Gorman {
2211bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
2212bde8bd8aSSasikantha babu 
2213d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2214d7a5752cSMel Gorman 
2215d9f7979cSGreg Kroah-Hartman 	debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
221601a99560SKefeng Wang 			    &unusable_fops);
2217d7a5752cSMel Gorman 
2218d9f7979cSGreg Kroah-Hartman 	debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
221901a99560SKefeng Wang 			    &extfrag_fops);
2220f1a5ab12SMel Gorman 
2221d7a5752cSMel Gorman 	return 0;
2222d7a5752cSMel Gorman }
2223d7a5752cSMel Gorman 
2224d7a5752cSMel Gorman module_init(extfrag_debug_init);
2225d7a5752cSMel Gorman #endif
2226