xref: /linux/mm/vmstat.c (revision 3c381db1fac80373f2cc0d8c1d0bcfbf8bd4fb57)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2f6ac2354SChristoph Lameter /*
3f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
4f6ac2354SChristoph Lameter  *
5f6ac2354SChristoph Lameter  *  Manages VM statistics
6f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
72244b95aSChristoph Lameter  *
82244b95aSChristoph Lameter  *  zoned VM statistics
92244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
102244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
117cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
12f6ac2354SChristoph Lameter  */
138f32f7e5SAlexey Dobriyan #include <linux/fs.h>
14f6ac2354SChristoph Lameter #include <linux/mm.h>
154e950f6fSAlexey Dobriyan #include <linux/err.h>
162244b95aSChristoph Lameter #include <linux/module.h>
175a0e3ad6STejun Heo #include <linux/slab.h>
18df9ecabaSChristoph Lameter #include <linux/cpu.h>
197cc36bbdSChristoph Lameter #include <linux/cpumask.h>
20c748e134SAdrian Bunk #include <linux/vmstat.h>
213c486871SAndrew Morton #include <linux/proc_fs.h>
223c486871SAndrew Morton #include <linux/seq_file.h>
233c486871SAndrew Morton #include <linux/debugfs.h>
24e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
25f1a5ab12SMel Gorman #include <linux/math64.h>
2679da826aSMichael Rubin #include <linux/writeback.h>
2736deb0beSNamhyung Kim #include <linux/compaction.h>
286e543d57SLisa Du #include <linux/mm_inline.h>
2948c96a36SJoonsoo Kim #include <linux/page_ext.h>
3048c96a36SJoonsoo Kim #include <linux/page_owner.h>
316e543d57SLisa Du 
326e543d57SLisa Du #include "internal.h"
33f6ac2354SChristoph Lameter 
341d90ca89SKemi Wang #define NUMA_STATS_THRESHOLD (U16_MAX - 2)
351d90ca89SKemi Wang 
364518085eSKemi Wang #ifdef CONFIG_NUMA
374518085eSKemi Wang int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
384518085eSKemi Wang 
394518085eSKemi Wang /* zero numa counters within a zone */
404518085eSKemi Wang static void zero_zone_numa_counters(struct zone *zone)
414518085eSKemi Wang {
424518085eSKemi Wang 	int item, cpu;
434518085eSKemi Wang 
444518085eSKemi Wang 	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
454518085eSKemi Wang 		atomic_long_set(&zone->vm_numa_stat[item], 0);
464518085eSKemi Wang 		for_each_online_cpu(cpu)
474518085eSKemi Wang 			per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
484518085eSKemi Wang 						= 0;
494518085eSKemi Wang 	}
504518085eSKemi Wang }
514518085eSKemi Wang 
524518085eSKemi Wang /* zero numa counters of all the populated zones */
534518085eSKemi Wang static void zero_zones_numa_counters(void)
544518085eSKemi Wang {
554518085eSKemi Wang 	struct zone *zone;
564518085eSKemi Wang 
574518085eSKemi Wang 	for_each_populated_zone(zone)
584518085eSKemi Wang 		zero_zone_numa_counters(zone);
594518085eSKemi Wang }
604518085eSKemi Wang 
614518085eSKemi Wang /* zero global numa counters */
624518085eSKemi Wang static void zero_global_numa_counters(void)
634518085eSKemi Wang {
644518085eSKemi Wang 	int item;
654518085eSKemi Wang 
664518085eSKemi Wang 	for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
674518085eSKemi Wang 		atomic_long_set(&vm_numa_stat[item], 0);
684518085eSKemi Wang }
694518085eSKemi Wang 
704518085eSKemi Wang static void invalid_numa_statistics(void)
714518085eSKemi Wang {
724518085eSKemi Wang 	zero_zones_numa_counters();
734518085eSKemi Wang 	zero_global_numa_counters();
744518085eSKemi Wang }
754518085eSKemi Wang 
764518085eSKemi Wang static DEFINE_MUTEX(vm_numa_stat_lock);
774518085eSKemi Wang 
784518085eSKemi Wang int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
7932927393SChristoph Hellwig 		void *buffer, size_t *length, loff_t *ppos)
804518085eSKemi Wang {
814518085eSKemi Wang 	int ret, oldval;
824518085eSKemi Wang 
834518085eSKemi Wang 	mutex_lock(&vm_numa_stat_lock);
844518085eSKemi Wang 	if (write)
854518085eSKemi Wang 		oldval = sysctl_vm_numa_stat;
864518085eSKemi Wang 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
874518085eSKemi Wang 	if (ret || !write)
884518085eSKemi Wang 		goto out;
894518085eSKemi Wang 
904518085eSKemi Wang 	if (oldval == sysctl_vm_numa_stat)
914518085eSKemi Wang 		goto out;
924518085eSKemi Wang 	else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
934518085eSKemi Wang 		static_branch_enable(&vm_numa_stat_key);
944518085eSKemi Wang 		pr_info("enable numa statistics\n");
954518085eSKemi Wang 	} else {
964518085eSKemi Wang 		static_branch_disable(&vm_numa_stat_key);
974518085eSKemi Wang 		invalid_numa_statistics();
984518085eSKemi Wang 		pr_info("disable numa statistics, and clear numa counters\n");
994518085eSKemi Wang 	}
1004518085eSKemi Wang 
1014518085eSKemi Wang out:
1024518085eSKemi Wang 	mutex_unlock(&vm_numa_stat_lock);
1034518085eSKemi Wang 	return ret;
1044518085eSKemi Wang }
1054518085eSKemi Wang #endif
1064518085eSKemi Wang 
107f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
108f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
109f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
110f8891e5eSChristoph Lameter 
11131f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
112f8891e5eSChristoph Lameter {
1139eccf2a8SChristoph Lameter 	int cpu;
114f8891e5eSChristoph Lameter 	int i;
115f8891e5eSChristoph Lameter 
116f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
117f8891e5eSChristoph Lameter 
11831f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
119f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
120f8891e5eSChristoph Lameter 
121f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
122f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
123f8891e5eSChristoph Lameter 	}
124f8891e5eSChristoph Lameter }
125f8891e5eSChristoph Lameter 
126f8891e5eSChristoph Lameter /*
127f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
128f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
129f8891e5eSChristoph Lameter  * during and after execution of this function.
130f8891e5eSChristoph Lameter */
131f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
132f8891e5eSChristoph Lameter {
133b5be1132SKOSAKI Motohiro 	get_online_cpus();
13431f961a8SMinchan Kim 	sum_vm_events(ret);
135b5be1132SKOSAKI Motohiro 	put_online_cpus();
136f8891e5eSChristoph Lameter }
13732dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
138f8891e5eSChristoph Lameter 
139f8891e5eSChristoph Lameter /*
140f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
141f8891e5eSChristoph Lameter  *
142f8891e5eSChristoph Lameter  * This is adding to the events on one processor
143f8891e5eSChristoph Lameter  * but keeps the global counts constant.
144f8891e5eSChristoph Lameter  */
145f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
146f8891e5eSChristoph Lameter {
147f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
148f8891e5eSChristoph Lameter 	int i;
149f8891e5eSChristoph Lameter 
150f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
151f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
152f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
153f8891e5eSChristoph Lameter 	}
154f8891e5eSChristoph Lameter }
155f8891e5eSChristoph Lameter 
156f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
157f8891e5eSChristoph Lameter 
1582244b95aSChristoph Lameter /*
1592244b95aSChristoph Lameter  * Manage combined zone based / global counters
1602244b95aSChristoph Lameter  *
1612244b95aSChristoph Lameter  * vm_stat contains the global counters
1622244b95aSChristoph Lameter  */
16375ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
1643a321d2aSKemi Wang atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
16575ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
16675ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat);
1673a321d2aSKemi Wang EXPORT_SYMBOL(vm_numa_stat);
16875ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat);
1692244b95aSChristoph Lameter 
1702244b95aSChristoph Lameter #ifdef CONFIG_SMP
1712244b95aSChristoph Lameter 
172b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
17388f5acf8SMel Gorman {
17488f5acf8SMel Gorman 	int threshold;
17588f5acf8SMel Gorman 	int watermark_distance;
17688f5acf8SMel Gorman 
17788f5acf8SMel Gorman 	/*
17888f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
17988f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
18088f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
18188f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
18288f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
18388f5acf8SMel Gorman 	 * the min watermark
18488f5acf8SMel Gorman 	 */
18588f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
18688f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
18788f5acf8SMel Gorman 
18888f5acf8SMel Gorman 	/*
18988f5acf8SMel Gorman 	 * Maximum threshold is 125
19088f5acf8SMel Gorman 	 */
19188f5acf8SMel Gorman 	threshold = min(125, threshold);
19288f5acf8SMel Gorman 
19388f5acf8SMel Gorman 	return threshold;
19488f5acf8SMel Gorman }
19588f5acf8SMel Gorman 
196b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
197df9ecabaSChristoph Lameter {
198df9ecabaSChristoph Lameter 	int threshold;
199df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
2002244b95aSChristoph Lameter 
2012244b95aSChristoph Lameter 	/*
202df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
203df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
204df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
205df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
2062244b95aSChristoph Lameter 	 *
207df9ecabaSChristoph Lameter 	 * Some sample thresholds:
208df9ecabaSChristoph Lameter 	 *
209df9ecabaSChristoph Lameter 	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
210df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
211df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
212df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
213df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
214df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
215df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
216df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
217df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
218df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
219df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
220df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
221df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
222df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
223df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
224df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
225df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
226df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
227df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
228df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
2292244b95aSChristoph Lameter 	 */
230df9ecabaSChristoph Lameter 
2319705bea5SArun KS 	mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
232df9ecabaSChristoph Lameter 
233df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
234df9ecabaSChristoph Lameter 
235df9ecabaSChristoph Lameter 	/*
236df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
237df9ecabaSChristoph Lameter 	 */
238df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
239df9ecabaSChristoph Lameter 
240df9ecabaSChristoph Lameter 	return threshold;
241df9ecabaSChristoph Lameter }
242df9ecabaSChristoph Lameter 
243df9ecabaSChristoph Lameter /*
244df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
245df9ecabaSChristoph Lameter  */
246a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
2472244b95aSChristoph Lameter {
24875ef7184SMel Gorman 	struct pglist_data *pgdat;
249df9ecabaSChristoph Lameter 	struct zone *zone;
250df9ecabaSChristoph Lameter 	int cpu;
251df9ecabaSChristoph Lameter 	int threshold;
252df9ecabaSChristoph Lameter 
25375ef7184SMel Gorman 	/* Zero current pgdat thresholds */
25475ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
25575ef7184SMel Gorman 		for_each_online_cpu(cpu) {
25675ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
25775ef7184SMel Gorman 		}
25875ef7184SMel Gorman 	}
25975ef7184SMel Gorman 
260ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
26175ef7184SMel Gorman 		struct pglist_data *pgdat = zone->zone_pgdat;
262aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
263aa454840SChristoph Lameter 
264b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
265df9ecabaSChristoph Lameter 
26675ef7184SMel Gorman 		for_each_online_cpu(cpu) {
26775ef7184SMel Gorman 			int pgdat_threshold;
26875ef7184SMel Gorman 
26999dcc3e5SChristoph Lameter 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
27099dcc3e5SChristoph Lameter 							= threshold;
2711d90ca89SKemi Wang 
27275ef7184SMel Gorman 			/* Base nodestat threshold on the largest populated zone. */
27375ef7184SMel Gorman 			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
27475ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
27575ef7184SMel Gorman 				= max(threshold, pgdat_threshold);
27675ef7184SMel Gorman 		}
27775ef7184SMel Gorman 
278aa454840SChristoph Lameter 		/*
279aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
280aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
281aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
282aa454840SChristoph Lameter 		 */
283aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
284aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
285aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
286aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
287aa454840SChristoph Lameter 					max_drift;
288df9ecabaSChristoph Lameter 	}
2892244b95aSChristoph Lameter }
2902244b95aSChristoph Lameter 
291b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
292b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
29388f5acf8SMel Gorman {
29488f5acf8SMel Gorman 	struct zone *zone;
29588f5acf8SMel Gorman 	int cpu;
29688f5acf8SMel Gorman 	int threshold;
29788f5acf8SMel Gorman 	int i;
29888f5acf8SMel Gorman 
29988f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
30088f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
30188f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
30288f5acf8SMel Gorman 			continue;
30388f5acf8SMel Gorman 
304b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
3051d90ca89SKemi Wang 		for_each_online_cpu(cpu)
30688f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
30788f5acf8SMel Gorman 							= threshold;
30888f5acf8SMel Gorman 	}
30988f5acf8SMel Gorman }
31088f5acf8SMel Gorman 
3112244b95aSChristoph Lameter /*
312bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
313bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
314bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
3152244b95aSChristoph Lameter  */
3162244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3176cdb18adSHeiko Carstens 			   long delta)
3182244b95aSChristoph Lameter {
31912938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
32012938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
3212244b95aSChristoph Lameter 	long x;
32212938a92SChristoph Lameter 	long t;
3232244b95aSChristoph Lameter 
32412938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
3252244b95aSChristoph Lameter 
32612938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
32712938a92SChristoph Lameter 
32840610076SMiaohe Lin 	if (unlikely(abs(x) > t)) {
3292244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
3302244b95aSChristoph Lameter 		x = 0;
3312244b95aSChristoph Lameter 	}
33212938a92SChristoph Lameter 	__this_cpu_write(*p, x);
3332244b95aSChristoph Lameter }
3342244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
3352244b95aSChristoph Lameter 
33675ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
33775ef7184SMel Gorman 				long delta)
33875ef7184SMel Gorman {
33975ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
34075ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
34175ef7184SMel Gorman 	long x;
34275ef7184SMel Gorman 	long t;
34375ef7184SMel Gorman 
344ea426c2aSRoman Gushchin 	if (vmstat_item_in_bytes(item)) {
345ea426c2aSRoman Gushchin 		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
346ea426c2aSRoman Gushchin 		delta >>= PAGE_SHIFT;
347ea426c2aSRoman Gushchin 	}
348ea426c2aSRoman Gushchin 
34975ef7184SMel Gorman 	x = delta + __this_cpu_read(*p);
35075ef7184SMel Gorman 
35175ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
35275ef7184SMel Gorman 
35340610076SMiaohe Lin 	if (unlikely(abs(x) > t)) {
35475ef7184SMel Gorman 		node_page_state_add(x, pgdat, item);
35575ef7184SMel Gorman 		x = 0;
35675ef7184SMel Gorman 	}
35775ef7184SMel Gorman 	__this_cpu_write(*p, x);
35875ef7184SMel Gorman }
35975ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state);
36075ef7184SMel Gorman 
3612244b95aSChristoph Lameter /*
3622244b95aSChristoph Lameter  * Optimized increment and decrement functions.
3632244b95aSChristoph Lameter  *
3642244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
3652244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
3662244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
3672244b95aSChristoph Lameter  *
3682244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
3692244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
3702244b95aSChristoph Lameter  * generate better code.
3712244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
3722244b95aSChristoph Lameter  * be omitted.
3732244b95aSChristoph Lameter  *
374df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
375df9ecabaSChristoph Lameter  * with care.
376df9ecabaSChristoph Lameter  *
3772244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
3782244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
3792244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
3802244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
3812244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
3822244b95aSChristoph Lameter  * in a useful way here.
3832244b95aSChristoph Lameter  */
384c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
3852244b95aSChristoph Lameter {
38612938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
38712938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
38812938a92SChristoph Lameter 	s8 v, t;
3892244b95aSChristoph Lameter 
390908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
39112938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
39212938a92SChristoph Lameter 	if (unlikely(v > t)) {
39312938a92SChristoph Lameter 		s8 overstep = t >> 1;
3942244b95aSChristoph Lameter 
39512938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
39612938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
3972244b95aSChristoph Lameter 	}
3982244b95aSChristoph Lameter }
399ca889e6cSChristoph Lameter 
40075ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
40175ef7184SMel Gorman {
40275ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
40375ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
40475ef7184SMel Gorman 	s8 v, t;
40575ef7184SMel Gorman 
406ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
407ea426c2aSRoman Gushchin 
40875ef7184SMel Gorman 	v = __this_cpu_inc_return(*p);
40975ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
41075ef7184SMel Gorman 	if (unlikely(v > t)) {
41175ef7184SMel Gorman 		s8 overstep = t >> 1;
41275ef7184SMel Gorman 
41375ef7184SMel Gorman 		node_page_state_add(v + overstep, pgdat, item);
41475ef7184SMel Gorman 		__this_cpu_write(*p, -overstep);
41575ef7184SMel Gorman 	}
41675ef7184SMel Gorman }
41775ef7184SMel Gorman 
418ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
419ca889e6cSChristoph Lameter {
420ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
421ca889e6cSChristoph Lameter }
4222244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
4232244b95aSChristoph Lameter 
42475ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item)
42575ef7184SMel Gorman {
42675ef7184SMel Gorman 	__inc_node_state(page_pgdat(page), item);
42775ef7184SMel Gorman }
42875ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state);
42975ef7184SMel Gorman 
430c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
4312244b95aSChristoph Lameter {
43212938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
43312938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
43412938a92SChristoph Lameter 	s8 v, t;
4352244b95aSChristoph Lameter 
436908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
43712938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
43812938a92SChristoph Lameter 	if (unlikely(v < - t)) {
43912938a92SChristoph Lameter 		s8 overstep = t >> 1;
4402244b95aSChristoph Lameter 
44112938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
44212938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
4432244b95aSChristoph Lameter 	}
4442244b95aSChristoph Lameter }
445c8785385SChristoph Lameter 
44675ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
44775ef7184SMel Gorman {
44875ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
44975ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
45075ef7184SMel Gorman 	s8 v, t;
45175ef7184SMel Gorman 
452ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
453ea426c2aSRoman Gushchin 
45475ef7184SMel Gorman 	v = __this_cpu_dec_return(*p);
45575ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
45675ef7184SMel Gorman 	if (unlikely(v < - t)) {
45775ef7184SMel Gorman 		s8 overstep = t >> 1;
45875ef7184SMel Gorman 
45975ef7184SMel Gorman 		node_page_state_add(v - overstep, pgdat, item);
46075ef7184SMel Gorman 		__this_cpu_write(*p, overstep);
46175ef7184SMel Gorman 	}
46275ef7184SMel Gorman }
46375ef7184SMel Gorman 
464c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
465c8785385SChristoph Lameter {
466c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
467c8785385SChristoph Lameter }
4682244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
4692244b95aSChristoph Lameter 
47075ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item)
47175ef7184SMel Gorman {
47275ef7184SMel Gorman 	__dec_node_state(page_pgdat(page), item);
47375ef7184SMel Gorman }
47475ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state);
47575ef7184SMel Gorman 
4764156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
4777c839120SChristoph Lameter /*
4787c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
4797c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
4807c839120SChristoph Lameter  *
4817c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
4827c839120SChristoph Lameter  * operations.
4837c839120SChristoph Lameter  *
4847c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
4857c839120SChristoph Lameter  *     0       No overstepping
4867c839120SChristoph Lameter  *     1       Overstepping half of threshold
4877c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
4887c839120SChristoph Lameter */
48975ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone,
49075ef7184SMel Gorman        enum zone_stat_item item, long delta, int overstep_mode)
4917c839120SChristoph Lameter {
4927c839120SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
4937c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
4947c839120SChristoph Lameter 	long o, n, t, z;
4957c839120SChristoph Lameter 
4967c839120SChristoph Lameter 	do {
4977c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
4987c839120SChristoph Lameter 
4997c839120SChristoph Lameter 		/*
5007c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
5017c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
502d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
503d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
504d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
505d3bc2367SChristoph Lameter 		 *
506d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
507d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
5087c839120SChristoph Lameter 		 */
5097c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
5107c839120SChristoph Lameter 
5117c839120SChristoph Lameter 		o = this_cpu_read(*p);
5127c839120SChristoph Lameter 		n = delta + o;
5137c839120SChristoph Lameter 
51440610076SMiaohe Lin 		if (abs(n) > t) {
5157c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
5167c839120SChristoph Lameter 
5177c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
5187c839120SChristoph Lameter 			z = n + os;
5197c839120SChristoph Lameter 			n = -os;
5207c839120SChristoph Lameter 		}
5217c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
5227c839120SChristoph Lameter 
5237c839120SChristoph Lameter 	if (z)
5247c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
5257c839120SChristoph Lameter }
5267c839120SChristoph Lameter 
5277c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
5286cdb18adSHeiko Carstens 			 long delta)
5297c839120SChristoph Lameter {
53075ef7184SMel Gorman 	mod_zone_state(zone, item, delta, 0);
5317c839120SChristoph Lameter }
5327c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
5337c839120SChristoph Lameter 
5347c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
5357c839120SChristoph Lameter {
53675ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, 1, 1);
5377c839120SChristoph Lameter }
5387c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
5397c839120SChristoph Lameter 
5407c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
5417c839120SChristoph Lameter {
54275ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, -1, -1);
5437c839120SChristoph Lameter }
5447c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
54575ef7184SMel Gorman 
54675ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat,
54775ef7184SMel Gorman        enum node_stat_item item, int delta, int overstep_mode)
54875ef7184SMel Gorman {
54975ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
55075ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
55175ef7184SMel Gorman 	long o, n, t, z;
55275ef7184SMel Gorman 
553ea426c2aSRoman Gushchin 	if (vmstat_item_in_bytes(item)) {
554ea426c2aSRoman Gushchin 		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
555ea426c2aSRoman Gushchin 		delta >>= PAGE_SHIFT;
556ea426c2aSRoman Gushchin 	}
557ea426c2aSRoman Gushchin 
55875ef7184SMel Gorman 	do {
55975ef7184SMel Gorman 		z = 0;  /* overflow to node counters */
56075ef7184SMel Gorman 
56175ef7184SMel Gorman 		/*
56275ef7184SMel Gorman 		 * The fetching of the stat_threshold is racy. We may apply
56375ef7184SMel Gorman 		 * a counter threshold to the wrong the cpu if we get
56475ef7184SMel Gorman 		 * rescheduled while executing here. However, the next
56575ef7184SMel Gorman 		 * counter update will apply the threshold again and
56675ef7184SMel Gorman 		 * therefore bring the counter under the threshold again.
56775ef7184SMel Gorman 		 *
56875ef7184SMel Gorman 		 * Most of the time the thresholds are the same anyways
56975ef7184SMel Gorman 		 * for all cpus in a node.
57075ef7184SMel Gorman 		 */
57175ef7184SMel Gorman 		t = this_cpu_read(pcp->stat_threshold);
57275ef7184SMel Gorman 
57375ef7184SMel Gorman 		o = this_cpu_read(*p);
57475ef7184SMel Gorman 		n = delta + o;
57575ef7184SMel Gorman 
57640610076SMiaohe Lin 		if (abs(n) > t) {
57775ef7184SMel Gorman 			int os = overstep_mode * (t >> 1) ;
57875ef7184SMel Gorman 
57975ef7184SMel Gorman 			/* Overflow must be added to node counters */
58075ef7184SMel Gorman 			z = n + os;
58175ef7184SMel Gorman 			n = -os;
58275ef7184SMel Gorman 		}
58375ef7184SMel Gorman 	} while (this_cpu_cmpxchg(*p, o, n) != o);
58475ef7184SMel Gorman 
58575ef7184SMel Gorman 	if (z)
58675ef7184SMel Gorman 		node_page_state_add(z, pgdat, item);
58775ef7184SMel Gorman }
58875ef7184SMel Gorman 
58975ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
59075ef7184SMel Gorman 					long delta)
59175ef7184SMel Gorman {
59275ef7184SMel Gorman 	mod_node_state(pgdat, item, delta, 0);
59375ef7184SMel Gorman }
59475ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
59575ef7184SMel Gorman 
59675ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
59775ef7184SMel Gorman {
59875ef7184SMel Gorman 	mod_node_state(pgdat, item, 1, 1);
59975ef7184SMel Gorman }
60075ef7184SMel Gorman 
60175ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
60275ef7184SMel Gorman {
60375ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, 1, 1);
60475ef7184SMel Gorman }
60575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
60675ef7184SMel Gorman 
60775ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
60875ef7184SMel Gorman {
60975ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, -1, -1);
61075ef7184SMel Gorman }
61175ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
6127c839120SChristoph Lameter #else
6137c839120SChristoph Lameter /*
6147c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
6157c839120SChristoph Lameter  */
6167c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
6176cdb18adSHeiko Carstens 			 long delta)
6187c839120SChristoph Lameter {
6197c839120SChristoph Lameter 	unsigned long flags;
6207c839120SChristoph Lameter 
6217c839120SChristoph Lameter 	local_irq_save(flags);
6227c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
6237c839120SChristoph Lameter 	local_irq_restore(flags);
6247c839120SChristoph Lameter }
6257c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
6267c839120SChristoph Lameter 
6272244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
6282244b95aSChristoph Lameter {
6292244b95aSChristoph Lameter 	unsigned long flags;
6302244b95aSChristoph Lameter 	struct zone *zone;
6312244b95aSChristoph Lameter 
6322244b95aSChristoph Lameter 	zone = page_zone(page);
6332244b95aSChristoph Lameter 	local_irq_save(flags);
634ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
6352244b95aSChristoph Lameter 	local_irq_restore(flags);
6362244b95aSChristoph Lameter }
6372244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
6382244b95aSChristoph Lameter 
6392244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
6402244b95aSChristoph Lameter {
6412244b95aSChristoph Lameter 	unsigned long flags;
6422244b95aSChristoph Lameter 
6432244b95aSChristoph Lameter 	local_irq_save(flags);
644a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
6452244b95aSChristoph Lameter 	local_irq_restore(flags);
6462244b95aSChristoph Lameter }
6472244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
6482244b95aSChristoph Lameter 
64975ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
65075ef7184SMel Gorman {
65175ef7184SMel Gorman 	unsigned long flags;
65275ef7184SMel Gorman 
65375ef7184SMel Gorman 	local_irq_save(flags);
65475ef7184SMel Gorman 	__inc_node_state(pgdat, item);
65575ef7184SMel Gorman 	local_irq_restore(flags);
65675ef7184SMel Gorman }
65775ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state);
65875ef7184SMel Gorman 
65975ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
66075ef7184SMel Gorman 					long delta)
66175ef7184SMel Gorman {
66275ef7184SMel Gorman 	unsigned long flags;
66375ef7184SMel Gorman 
66475ef7184SMel Gorman 	local_irq_save(flags);
66575ef7184SMel Gorman 	__mod_node_page_state(pgdat, item, delta);
66675ef7184SMel Gorman 	local_irq_restore(flags);
66775ef7184SMel Gorman }
66875ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
66975ef7184SMel Gorman 
67075ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
67175ef7184SMel Gorman {
67275ef7184SMel Gorman 	unsigned long flags;
67375ef7184SMel Gorman 	struct pglist_data *pgdat;
67475ef7184SMel Gorman 
67575ef7184SMel Gorman 	pgdat = page_pgdat(page);
67675ef7184SMel Gorman 	local_irq_save(flags);
67775ef7184SMel Gorman 	__inc_node_state(pgdat, item);
67875ef7184SMel Gorman 	local_irq_restore(flags);
67975ef7184SMel Gorman }
68075ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
68175ef7184SMel Gorman 
68275ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
68375ef7184SMel Gorman {
68475ef7184SMel Gorman 	unsigned long flags;
68575ef7184SMel Gorman 
68675ef7184SMel Gorman 	local_irq_save(flags);
68775ef7184SMel Gorman 	__dec_node_page_state(page, item);
68875ef7184SMel Gorman 	local_irq_restore(flags);
68975ef7184SMel Gorman }
69075ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
69175ef7184SMel Gorman #endif
6927cc36bbdSChristoph Lameter 
6937cc36bbdSChristoph Lameter /*
6947cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
6957cc36bbdSChristoph Lameter  * Returns the number of counters updated.
6967cc36bbdSChristoph Lameter  */
6973a321d2aSKemi Wang #ifdef CONFIG_NUMA
6983a321d2aSKemi Wang static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
6993a321d2aSKemi Wang {
7003a321d2aSKemi Wang 	int i;
7013a321d2aSKemi Wang 	int changes = 0;
7023a321d2aSKemi Wang 
7033a321d2aSKemi Wang 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
7043a321d2aSKemi Wang 		if (zone_diff[i]) {
7053a321d2aSKemi Wang 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
7063a321d2aSKemi Wang 			changes++;
7073a321d2aSKemi Wang 	}
7083a321d2aSKemi Wang 
7093a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
7103a321d2aSKemi Wang 		if (numa_diff[i]) {
7113a321d2aSKemi Wang 			atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
7123a321d2aSKemi Wang 			changes++;
7133a321d2aSKemi Wang 	}
7143a321d2aSKemi Wang 
7153a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
7163a321d2aSKemi Wang 		if (node_diff[i]) {
7173a321d2aSKemi Wang 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7183a321d2aSKemi Wang 			changes++;
7193a321d2aSKemi Wang 	}
7203a321d2aSKemi Wang 	return changes;
7213a321d2aSKemi Wang }
7223a321d2aSKemi Wang #else
72375ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff)
7244edb0748SChristoph Lameter {
7254edb0748SChristoph Lameter 	int i;
7267cc36bbdSChristoph Lameter 	int changes = 0;
7274edb0748SChristoph Lameter 
7284edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
72975ef7184SMel Gorman 		if (zone_diff[i]) {
73075ef7184SMel Gorman 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
73175ef7184SMel Gorman 			changes++;
73275ef7184SMel Gorman 	}
73375ef7184SMel Gorman 
73475ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
73575ef7184SMel Gorman 		if (node_diff[i]) {
73675ef7184SMel Gorman 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
7377cc36bbdSChristoph Lameter 			changes++;
7387cc36bbdSChristoph Lameter 	}
7397cc36bbdSChristoph Lameter 	return changes;
7404edb0748SChristoph Lameter }
7413a321d2aSKemi Wang #endif /* CONFIG_NUMA */
7424edb0748SChristoph Lameter 
7432244b95aSChristoph Lameter /*
7442bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
745a7f75e25SChristoph Lameter  *
7464037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
7474037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
7484037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
7494037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
7504037d452SChristoph Lameter  * the processor.
7514037d452SChristoph Lameter  *
7524037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
7534037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
7544037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
7554037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
7567cc36bbdSChristoph Lameter  *
7577cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
7582244b95aSChristoph Lameter  */
7590eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
7602244b95aSChristoph Lameter {
76175ef7184SMel Gorman 	struct pglist_data *pgdat;
7622244b95aSChristoph Lameter 	struct zone *zone;
7632244b95aSChristoph Lameter 	int i;
76475ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
7653a321d2aSKemi Wang #ifdef CONFIG_NUMA
7663a321d2aSKemi Wang 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
7673a321d2aSKemi Wang #endif
76875ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7697cc36bbdSChristoph Lameter 	int changes = 0;
7702244b95aSChristoph Lameter 
771ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
772fbc2edb0SChristoph Lameter 		struct per_cpu_pageset __percpu *p = zone->pageset;
7732244b95aSChristoph Lameter 
774fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
775a7f75e25SChristoph Lameter 			int v;
776a7f75e25SChristoph Lameter 
777fbc2edb0SChristoph Lameter 			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
778fbc2edb0SChristoph Lameter 			if (v) {
779fbc2edb0SChristoph Lameter 
780a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
78175ef7184SMel Gorman 				global_zone_diff[i] += v;
7824037d452SChristoph Lameter #ifdef CONFIG_NUMA
7834037d452SChristoph Lameter 				/* 3 seconds idle till flush */
784fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 3);
7854037d452SChristoph Lameter #endif
7862244b95aSChristoph Lameter 			}
787fbc2edb0SChristoph Lameter 		}
7884037d452SChristoph Lameter #ifdef CONFIG_NUMA
7893a321d2aSKemi Wang 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
7903a321d2aSKemi Wang 			int v;
7913a321d2aSKemi Wang 
7923a321d2aSKemi Wang 			v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
7933a321d2aSKemi Wang 			if (v) {
7943a321d2aSKemi Wang 
7953a321d2aSKemi Wang 				atomic_long_add(v, &zone->vm_numa_stat[i]);
7963a321d2aSKemi Wang 				global_numa_diff[i] += v;
7973a321d2aSKemi Wang 				__this_cpu_write(p->expire, 3);
7983a321d2aSKemi Wang 			}
7993a321d2aSKemi Wang 		}
8003a321d2aSKemi Wang 
8010eb77e98SChristoph Lameter 		if (do_pagesets) {
8020eb77e98SChristoph Lameter 			cond_resched();
8034037d452SChristoph Lameter 			/*
8044037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
8054037d452SChristoph Lameter 			 * processor
8064037d452SChristoph Lameter 			 *
8074037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
8084037d452SChristoph Lameter 			 * if not then there is nothing to expire.
8094037d452SChristoph Lameter 			 */
810fbc2edb0SChristoph Lameter 			if (!__this_cpu_read(p->expire) ||
811fbc2edb0SChristoph Lameter 			       !__this_cpu_read(p->pcp.count))
8124037d452SChristoph Lameter 				continue;
8134037d452SChristoph Lameter 
8144037d452SChristoph Lameter 			/*
8154037d452SChristoph Lameter 			 * We never drain zones local to this processor.
8164037d452SChristoph Lameter 			 */
8174037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
818fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 0);
8194037d452SChristoph Lameter 				continue;
8204037d452SChristoph Lameter 			}
8214037d452SChristoph Lameter 
822fbc2edb0SChristoph Lameter 			if (__this_cpu_dec_return(p->expire))
8234037d452SChristoph Lameter 				continue;
8244037d452SChristoph Lameter 
8257cc36bbdSChristoph Lameter 			if (__this_cpu_read(p->pcp.count)) {
8267c8e0181SChristoph Lameter 				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
8277cc36bbdSChristoph Lameter 				changes++;
8287cc36bbdSChristoph Lameter 			}
8290eb77e98SChristoph Lameter 		}
8304037d452SChristoph Lameter #endif
8312244b95aSChristoph Lameter 	}
83275ef7184SMel Gorman 
83375ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
83475ef7184SMel Gorman 		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
83575ef7184SMel Gorman 
83675ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
83775ef7184SMel Gorman 			int v;
83875ef7184SMel Gorman 
83975ef7184SMel Gorman 			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
84075ef7184SMel Gorman 			if (v) {
84175ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
84275ef7184SMel Gorman 				global_node_diff[i] += v;
84375ef7184SMel Gorman 			}
84475ef7184SMel Gorman 		}
84575ef7184SMel Gorman 	}
84675ef7184SMel Gorman 
8473a321d2aSKemi Wang #ifdef CONFIG_NUMA
8483a321d2aSKemi Wang 	changes += fold_diff(global_zone_diff, global_numa_diff,
8493a321d2aSKemi Wang 			     global_node_diff);
8503a321d2aSKemi Wang #else
85175ef7184SMel Gorman 	changes += fold_diff(global_zone_diff, global_node_diff);
8523a321d2aSKemi Wang #endif
8537cc36bbdSChristoph Lameter 	return changes;
8542244b95aSChristoph Lameter }
8552244b95aSChristoph Lameter 
85640f4b1eaSCody P Schafer /*
8572bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
8582bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
8592bb921e5SChristoph Lameter  * synchronization is simplified.
8602bb921e5SChristoph Lameter  */
8612bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
8622bb921e5SChristoph Lameter {
86375ef7184SMel Gorman 	struct pglist_data *pgdat;
8642bb921e5SChristoph Lameter 	struct zone *zone;
8652bb921e5SChristoph Lameter 	int i;
86675ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
8673a321d2aSKemi Wang #ifdef CONFIG_NUMA
8683a321d2aSKemi Wang 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
8693a321d2aSKemi Wang #endif
87075ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
8712bb921e5SChristoph Lameter 
8722bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
8732bb921e5SChristoph Lameter 		struct per_cpu_pageset *p;
8742bb921e5SChristoph Lameter 
8752bb921e5SChristoph Lameter 		p = per_cpu_ptr(zone->pageset, cpu);
8762bb921e5SChristoph Lameter 
8772bb921e5SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
8782bb921e5SChristoph Lameter 			if (p->vm_stat_diff[i]) {
8792bb921e5SChristoph Lameter 				int v;
8802bb921e5SChristoph Lameter 
8812bb921e5SChristoph Lameter 				v = p->vm_stat_diff[i];
8822bb921e5SChristoph Lameter 				p->vm_stat_diff[i] = 0;
8832bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
88475ef7184SMel Gorman 				global_zone_diff[i] += v;
8852bb921e5SChristoph Lameter 			}
8863a321d2aSKemi Wang 
8873a321d2aSKemi Wang #ifdef CONFIG_NUMA
8883a321d2aSKemi Wang 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
8893a321d2aSKemi Wang 			if (p->vm_numa_stat_diff[i]) {
8903a321d2aSKemi Wang 				int v;
8913a321d2aSKemi Wang 
8923a321d2aSKemi Wang 				v = p->vm_numa_stat_diff[i];
8933a321d2aSKemi Wang 				p->vm_numa_stat_diff[i] = 0;
8943a321d2aSKemi Wang 				atomic_long_add(v, &zone->vm_numa_stat[i]);
8953a321d2aSKemi Wang 				global_numa_diff[i] += v;
8963a321d2aSKemi Wang 			}
8973a321d2aSKemi Wang #endif
8982bb921e5SChristoph Lameter 	}
8992bb921e5SChristoph Lameter 
90075ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
90175ef7184SMel Gorman 		struct per_cpu_nodestat *p;
90275ef7184SMel Gorman 
90375ef7184SMel Gorman 		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
90475ef7184SMel Gorman 
90575ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
90675ef7184SMel Gorman 			if (p->vm_node_stat_diff[i]) {
90775ef7184SMel Gorman 				int v;
90875ef7184SMel Gorman 
90975ef7184SMel Gorman 				v = p->vm_node_stat_diff[i];
91075ef7184SMel Gorman 				p->vm_node_stat_diff[i] = 0;
91175ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
91275ef7184SMel Gorman 				global_node_diff[i] += v;
91375ef7184SMel Gorman 			}
91475ef7184SMel Gorman 	}
91575ef7184SMel Gorman 
9163a321d2aSKemi Wang #ifdef CONFIG_NUMA
9173a321d2aSKemi Wang 	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
9183a321d2aSKemi Wang #else
91975ef7184SMel Gorman 	fold_diff(global_zone_diff, global_node_diff);
9203a321d2aSKemi Wang #endif
9212bb921e5SChristoph Lameter }
9222bb921e5SChristoph Lameter 
9232bb921e5SChristoph Lameter /*
92440f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
92540f4b1eaSCody P Schafer  * pset->vm_stat_diff[] exsist.
92640f4b1eaSCody P Schafer  */
9275a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
9285a883813SMinchan Kim {
9295a883813SMinchan Kim 	int i;
9305a883813SMinchan Kim 
9315a883813SMinchan Kim 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
9325a883813SMinchan Kim 		if (pset->vm_stat_diff[i]) {
9335a883813SMinchan Kim 			int v = pset->vm_stat_diff[i];
9345a883813SMinchan Kim 			pset->vm_stat_diff[i] = 0;
9355a883813SMinchan Kim 			atomic_long_add(v, &zone->vm_stat[i]);
93675ef7184SMel Gorman 			atomic_long_add(v, &vm_zone_stat[i]);
9375a883813SMinchan Kim 		}
9383a321d2aSKemi Wang 
9393a321d2aSKemi Wang #ifdef CONFIG_NUMA
9403a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
9413a321d2aSKemi Wang 		if (pset->vm_numa_stat_diff[i]) {
9423a321d2aSKemi Wang 			int v = pset->vm_numa_stat_diff[i];
9433a321d2aSKemi Wang 
9443a321d2aSKemi Wang 			pset->vm_numa_stat_diff[i] = 0;
9453a321d2aSKemi Wang 			atomic_long_add(v, &zone->vm_numa_stat[i]);
9463a321d2aSKemi Wang 			atomic_long_add(v, &vm_numa_stat[i]);
9473a321d2aSKemi Wang 		}
9483a321d2aSKemi Wang #endif
9495a883813SMinchan Kim }
9502244b95aSChristoph Lameter #endif
9512244b95aSChristoph Lameter 
952ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
9533a321d2aSKemi Wang void __inc_numa_state(struct zone *zone,
9543a321d2aSKemi Wang 				 enum numa_stat_item item)
9553a321d2aSKemi Wang {
9563a321d2aSKemi Wang 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
9571d90ca89SKemi Wang 	u16 __percpu *p = pcp->vm_numa_stat_diff + item;
9581d90ca89SKemi Wang 	u16 v;
9593a321d2aSKemi Wang 
9603a321d2aSKemi Wang 	v = __this_cpu_inc_return(*p);
9613a321d2aSKemi Wang 
9621d90ca89SKemi Wang 	if (unlikely(v > NUMA_STATS_THRESHOLD)) {
9631d90ca89SKemi Wang 		zone_numa_state_add(v, zone, item);
9641d90ca89SKemi Wang 		__this_cpu_write(*p, 0);
9653a321d2aSKemi Wang 	}
9663a321d2aSKemi Wang }
9673a321d2aSKemi Wang 
968ca889e6cSChristoph Lameter /*
96975ef7184SMel Gorman  * Determine the per node value of a stat item. This function
97075ef7184SMel Gorman  * is called frequently in a NUMA machine, so try to be as
97175ef7184SMel Gorman  * frugal as possible.
972c2d42c16SAndrew Morton  */
97375ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node,
97475ef7184SMel Gorman 				 enum zone_stat_item item)
975c2d42c16SAndrew Morton {
976c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
977e87d59f7SJoonsoo Kim 	int i;
978e87d59f7SJoonsoo Kim 	unsigned long count = 0;
979c2d42c16SAndrew Morton 
980e87d59f7SJoonsoo Kim 	for (i = 0; i < MAX_NR_ZONES; i++)
981e87d59f7SJoonsoo Kim 		count += zone_page_state(zones + i, item);
982e87d59f7SJoonsoo Kim 
983e87d59f7SJoonsoo Kim 	return count;
984c2d42c16SAndrew Morton }
985c2d42c16SAndrew Morton 
98663803222SKemi Wang /*
98763803222SKemi Wang  * Determine the per node value of a numa stat item. To avoid deviation,
98863803222SKemi Wang  * the per cpu stat number in vm_numa_stat_diff[] is also included.
98963803222SKemi Wang  */
9903a321d2aSKemi Wang unsigned long sum_zone_numa_state(int node,
9913a321d2aSKemi Wang 				 enum numa_stat_item item)
9923a321d2aSKemi Wang {
9933a321d2aSKemi Wang 	struct zone *zones = NODE_DATA(node)->node_zones;
9943a321d2aSKemi Wang 	int i;
9953a321d2aSKemi Wang 	unsigned long count = 0;
9963a321d2aSKemi Wang 
9973a321d2aSKemi Wang 	for (i = 0; i < MAX_NR_ZONES; i++)
99863803222SKemi Wang 		count += zone_numa_state_snapshot(zones + i, item);
9993a321d2aSKemi Wang 
10003a321d2aSKemi Wang 	return count;
10013a321d2aSKemi Wang }
10023a321d2aSKemi Wang 
100375ef7184SMel Gorman /*
100475ef7184SMel Gorman  * Determine the per node value of a stat item.
100575ef7184SMel Gorman  */
1006ea426c2aSRoman Gushchin unsigned long node_page_state_pages(struct pglist_data *pgdat,
100775ef7184SMel Gorman 				    enum node_stat_item item)
100875ef7184SMel Gorman {
100975ef7184SMel Gorman 	long x = atomic_long_read(&pgdat->vm_stat[item]);
101075ef7184SMel Gorman #ifdef CONFIG_SMP
101175ef7184SMel Gorman 	if (x < 0)
101275ef7184SMel Gorman 		x = 0;
101375ef7184SMel Gorman #endif
101475ef7184SMel Gorman 	return x;
101575ef7184SMel Gorman }
1016ea426c2aSRoman Gushchin 
1017ea426c2aSRoman Gushchin unsigned long node_page_state(struct pglist_data *pgdat,
1018ea426c2aSRoman Gushchin 			      enum node_stat_item item)
1019ea426c2aSRoman Gushchin {
1020ea426c2aSRoman Gushchin 	VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1021ea426c2aSRoman Gushchin 
1022ea426c2aSRoman Gushchin 	return node_page_state_pages(pgdat, item);
1023ea426c2aSRoman Gushchin }
1024ca889e6cSChristoph Lameter #endif
1025ca889e6cSChristoph Lameter 
1026d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
102736deb0beSNamhyung Kim 
1028d7a5752cSMel Gorman struct contig_page_info {
1029d7a5752cSMel Gorman 	unsigned long free_pages;
1030d7a5752cSMel Gorman 	unsigned long free_blocks_total;
1031d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
1032d7a5752cSMel Gorman };
1033d7a5752cSMel Gorman 
1034d7a5752cSMel Gorman /*
1035d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
1036d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
1037d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
1038d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
1039d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
1040d7a5752cSMel Gorman  * figured out from userspace
1041d7a5752cSMel Gorman  */
1042d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
1043d7a5752cSMel Gorman 				unsigned int suitable_order,
1044d7a5752cSMel Gorman 				struct contig_page_info *info)
1045d7a5752cSMel Gorman {
1046d7a5752cSMel Gorman 	unsigned int order;
1047d7a5752cSMel Gorman 
1048d7a5752cSMel Gorman 	info->free_pages = 0;
1049d7a5752cSMel Gorman 	info->free_blocks_total = 0;
1050d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
1051d7a5752cSMel Gorman 
1052d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
1053d7a5752cSMel Gorman 		unsigned long blocks;
1054d7a5752cSMel Gorman 
1055d7a5752cSMel Gorman 		/* Count number of free blocks */
1056d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
1057d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
1058d7a5752cSMel Gorman 
1059d7a5752cSMel Gorman 		/* Count free base pages */
1060d7a5752cSMel Gorman 		info->free_pages += blocks << order;
1061d7a5752cSMel Gorman 
1062d7a5752cSMel Gorman 		/* Count the suitable free blocks */
1063d7a5752cSMel Gorman 		if (order >= suitable_order)
1064d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
1065d7a5752cSMel Gorman 						(order - suitable_order);
1066d7a5752cSMel Gorman 	}
1067d7a5752cSMel Gorman }
1068f1a5ab12SMel Gorman 
1069f1a5ab12SMel Gorman /*
1070f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
1071f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
1072f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
1073f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
1074f1a5ab12SMel Gorman  * should be used
1075f1a5ab12SMel Gorman  */
107656de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1077f1a5ab12SMel Gorman {
1078f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
1079f1a5ab12SMel Gorman 
108088d6ac40SWen Yang 	if (WARN_ON_ONCE(order >= MAX_ORDER))
108188d6ac40SWen Yang 		return 0;
108288d6ac40SWen Yang 
1083f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
1084f1a5ab12SMel Gorman 		return 0;
1085f1a5ab12SMel Gorman 
1086f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
1087f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
1088f1a5ab12SMel Gorman 		return -1000;
1089f1a5ab12SMel Gorman 
1090f1a5ab12SMel Gorman 	/*
1091f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
1092f1a5ab12SMel Gorman 	 *
1093f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
1094f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
1095f1a5ab12SMel Gorman 	 */
1096f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1097f1a5ab12SMel Gorman }
109856de7263SMel Gorman 
1099facdaa91SNitin Gupta /*
1100facdaa91SNitin Gupta  * Calculates external fragmentation within a zone wrt the given order.
1101facdaa91SNitin Gupta  * It is defined as the percentage of pages found in blocks of size
1102facdaa91SNitin Gupta  * less than 1 << order. It returns values in range [0, 100].
1103facdaa91SNitin Gupta  */
1104d34c0a75SNitin Gupta unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1105facdaa91SNitin Gupta {
1106facdaa91SNitin Gupta 	struct contig_page_info info;
1107facdaa91SNitin Gupta 
1108facdaa91SNitin Gupta 	fill_contig_page_info(zone, order, &info);
1109facdaa91SNitin Gupta 	if (info.free_pages == 0)
1110facdaa91SNitin Gupta 		return 0;
1111facdaa91SNitin Gupta 
1112facdaa91SNitin Gupta 	return div_u64((info.free_pages -
1113facdaa91SNitin Gupta 			(info.free_blocks_suitable << order)) * 100,
1114facdaa91SNitin Gupta 			info.free_pages);
1115facdaa91SNitin Gupta }
1116facdaa91SNitin Gupta 
111756de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
111856de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
111956de7263SMel Gorman {
112056de7263SMel Gorman 	struct contig_page_info info;
112156de7263SMel Gorman 
112256de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
112356de7263SMel Gorman 	return __fragmentation_index(order, &info);
112456de7263SMel Gorman }
1125d7a5752cSMel Gorman #endif
1126d7a5752cSMel Gorman 
1127ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1128ebc5d83dSKonstantin Khlebnikov     defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1129fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
1130fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
1131fa25c503SKOSAKI Motohiro #else
1132fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
1133fa25c503SKOSAKI Motohiro #endif
1134fa25c503SKOSAKI Motohiro 
1135fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
1136fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
1137fa25c503SKOSAKI Motohiro #else
1138fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
1139fa25c503SKOSAKI Motohiro #endif
1140fa25c503SKOSAKI Motohiro 
1141fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
1142fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1143fa25c503SKOSAKI Motohiro #else
1144fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
1145fa25c503SKOSAKI Motohiro #endif
1146fa25c503SKOSAKI Motohiro 
1147fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1148fa25c503SKOSAKI Motohiro 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
1149fa25c503SKOSAKI Motohiro 
1150fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
11518d92890bSNeilBrown 	/* enum zone_stat_item counters */
1152fa25c503SKOSAKI Motohiro 	"nr_free_pages",
115371c799f4SMinchan Kim 	"nr_zone_inactive_anon",
115471c799f4SMinchan Kim 	"nr_zone_active_anon",
115571c799f4SMinchan Kim 	"nr_zone_inactive_file",
115671c799f4SMinchan Kim 	"nr_zone_active_file",
115771c799f4SMinchan Kim 	"nr_zone_unevictable",
11585a1c84b4SMel Gorman 	"nr_zone_write_pending",
1159fa25c503SKOSAKI Motohiro 	"nr_mlock",
1160fa25c503SKOSAKI Motohiro 	"nr_bounce",
116191537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC)
116291537feeSMinchan Kim 	"nr_zspages",
116391537feeSMinchan Kim #endif
11643a321d2aSKemi Wang 	"nr_free_cma",
11653a321d2aSKemi Wang 
11663a321d2aSKemi Wang 	/* enum numa_stat_item counters */
1167fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1168fa25c503SKOSAKI Motohiro 	"numa_hit",
1169fa25c503SKOSAKI Motohiro 	"numa_miss",
1170fa25c503SKOSAKI Motohiro 	"numa_foreign",
1171fa25c503SKOSAKI Motohiro 	"numa_interleave",
1172fa25c503SKOSAKI Motohiro 	"numa_local",
1173fa25c503SKOSAKI Motohiro 	"numa_other",
1174fa25c503SKOSAKI Motohiro #endif
117509316c09SKonstantin Khlebnikov 
11769d7ea9a2SKonstantin Khlebnikov 	/* enum node_stat_item counters */
1177599d0c95SMel Gorman 	"nr_inactive_anon",
1178599d0c95SMel Gorman 	"nr_active_anon",
1179599d0c95SMel Gorman 	"nr_inactive_file",
1180599d0c95SMel Gorman 	"nr_active_file",
1181599d0c95SMel Gorman 	"nr_unevictable",
1182385386cfSJohannes Weiner 	"nr_slab_reclaimable",
1183385386cfSJohannes Weiner 	"nr_slab_unreclaimable",
1184599d0c95SMel Gorman 	"nr_isolated_anon",
1185599d0c95SMel Gorman 	"nr_isolated_file",
118668d48e6aSJohannes Weiner 	"workingset_nodes",
1187170b04b7SJoonsoo Kim 	"workingset_refault_anon",
1188170b04b7SJoonsoo Kim 	"workingset_refault_file",
1189170b04b7SJoonsoo Kim 	"workingset_activate_anon",
1190170b04b7SJoonsoo Kim 	"workingset_activate_file",
1191170b04b7SJoonsoo Kim 	"workingset_restore_anon",
1192170b04b7SJoonsoo Kim 	"workingset_restore_file",
11931e6b1085SMel Gorman 	"workingset_nodereclaim",
119450658e2eSMel Gorman 	"nr_anon_pages",
119550658e2eSMel Gorman 	"nr_mapped",
119611fb9989SMel Gorman 	"nr_file_pages",
119711fb9989SMel Gorman 	"nr_dirty",
119811fb9989SMel Gorman 	"nr_writeback",
119911fb9989SMel Gorman 	"nr_writeback_temp",
120011fb9989SMel Gorman 	"nr_shmem",
120111fb9989SMel Gorman 	"nr_shmem_hugepages",
120211fb9989SMel Gorman 	"nr_shmem_pmdmapped",
120360fbf0abSSong Liu 	"nr_file_hugepages",
120460fbf0abSSong Liu 	"nr_file_pmdmapped",
120511fb9989SMel Gorman 	"nr_anon_transparent_hugepages",
1206c4a25635SMel Gorman 	"nr_vmscan_write",
1207c4a25635SMel Gorman 	"nr_vmscan_immediate_reclaim",
1208c4a25635SMel Gorman 	"nr_dirtied",
1209c4a25635SMel Gorman 	"nr_written",
1210b29940c1SVlastimil Babka 	"nr_kernel_misc_reclaimable",
12111970dc6fSJohn Hubbard 	"nr_foll_pin_acquired",
12121970dc6fSJohn Hubbard 	"nr_foll_pin_released",
1213991e7673SShakeel Butt 	"nr_kernel_stack",
1214991e7673SShakeel Butt #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1215991e7673SShakeel Butt 	"nr_shadow_call_stack",
1216991e7673SShakeel Butt #endif
1217f0c0c115SShakeel Butt 	"nr_page_table_pages",
1218b6038942SShakeel Butt #ifdef CONFIG_SWAP
1219b6038942SShakeel Butt 	"nr_swapcached",
1220b6038942SShakeel Butt #endif
1221599d0c95SMel Gorman 
122209316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
1223fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
1224fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
1225fa25c503SKOSAKI Motohiro 
1226ebc5d83dSKonstantin Khlebnikov #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
122709316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
1228fa25c503SKOSAKI Motohiro 	"pgpgin",
1229fa25c503SKOSAKI Motohiro 	"pgpgout",
1230fa25c503SKOSAKI Motohiro 	"pswpin",
1231fa25c503SKOSAKI Motohiro 	"pswpout",
1232fa25c503SKOSAKI Motohiro 
1233fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
12347cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("allocstall")
12357cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("pgskip")
1236fa25c503SKOSAKI Motohiro 
1237fa25c503SKOSAKI Motohiro 	"pgfree",
1238fa25c503SKOSAKI Motohiro 	"pgactivate",
1239fa25c503SKOSAKI Motohiro 	"pgdeactivate",
1240f7ad2a6cSShaohua Li 	"pglazyfree",
1241fa25c503SKOSAKI Motohiro 
1242fa25c503SKOSAKI Motohiro 	"pgfault",
1243fa25c503SKOSAKI Motohiro 	"pgmajfault",
1244854e9ed0SMinchan Kim 	"pglazyfreed",
1245fa25c503SKOSAKI Motohiro 
1246599d0c95SMel Gorman 	"pgrefill",
1247798a6b87SPeter Xu 	"pgreuse",
1248599d0c95SMel Gorman 	"pgsteal_kswapd",
1249599d0c95SMel Gorman 	"pgsteal_direct",
1250599d0c95SMel Gorman 	"pgscan_kswapd",
1251599d0c95SMel Gorman 	"pgscan_direct",
125268243e76SMel Gorman 	"pgscan_direct_throttle",
1253497a6c1bSJohannes Weiner 	"pgscan_anon",
1254497a6c1bSJohannes Weiner 	"pgscan_file",
1255497a6c1bSJohannes Weiner 	"pgsteal_anon",
1256497a6c1bSJohannes Weiner 	"pgsteal_file",
1257fa25c503SKOSAKI Motohiro 
1258fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1259fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
1260fa25c503SKOSAKI Motohiro #endif
1261fa25c503SKOSAKI Motohiro 	"pginodesteal",
1262fa25c503SKOSAKI Motohiro 	"slabs_scanned",
1263fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
1264fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
1265fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
1266fa25c503SKOSAKI Motohiro 	"pageoutrun",
1267fa25c503SKOSAKI Motohiro 
1268fa25c503SKOSAKI Motohiro 	"pgrotated",
1269fa25c503SKOSAKI Motohiro 
12705509a5d2SDave Hansen 	"drop_pagecache",
12715509a5d2SDave Hansen 	"drop_slab",
12728e675f7aSKonstantin Khlebnikov 	"oom_kill",
12735509a5d2SDave Hansen 
127403c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
127503c5a6e1SMel Gorman 	"numa_pte_updates",
127672403b4aSMel Gorman 	"numa_huge_pte_updates",
127703c5a6e1SMel Gorman 	"numa_hint_faults",
127803c5a6e1SMel Gorman 	"numa_hint_faults_local",
127903c5a6e1SMel Gorman 	"numa_pages_migrated",
128003c5a6e1SMel Gorman #endif
12815647bc29SMel Gorman #ifdef CONFIG_MIGRATION
12825647bc29SMel Gorman 	"pgmigrate_success",
12835647bc29SMel Gorman 	"pgmigrate_fail",
12841a5bae25SAnshuman Khandual 	"thp_migration_success",
12851a5bae25SAnshuman Khandual 	"thp_migration_fail",
12861a5bae25SAnshuman Khandual 	"thp_migration_split",
12875647bc29SMel Gorman #endif
1288fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
1289397487dbSMel Gorman 	"compact_migrate_scanned",
1290397487dbSMel Gorman 	"compact_free_scanned",
1291397487dbSMel Gorman 	"compact_isolated",
1292fa25c503SKOSAKI Motohiro 	"compact_stall",
1293fa25c503SKOSAKI Motohiro 	"compact_fail",
1294fa25c503SKOSAKI Motohiro 	"compact_success",
1295698b1b30SVlastimil Babka 	"compact_daemon_wake",
12967f354a54SDavid Rientjes 	"compact_daemon_migrate_scanned",
12977f354a54SDavid Rientjes 	"compact_daemon_free_scanned",
1298fa25c503SKOSAKI Motohiro #endif
1299fa25c503SKOSAKI Motohiro 
1300fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
1301fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
1302fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
1303fa25c503SKOSAKI Motohiro #endif
1304fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
1305fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
1306fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
1307fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
1308fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
1309fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
1310fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
1311fa25c503SKOSAKI Motohiro 
1312fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1313fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
1314fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
131585b9f46eSDavid Rientjes 	"thp_fault_fallback_charge",
1316fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
1317fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
131895ecedcdSKirill A. Shutemov 	"thp_file_alloc",
1319dcdf11eeSDavid Rientjes 	"thp_file_fallback",
132085b9f46eSDavid Rientjes 	"thp_file_fallback_charge",
132195ecedcdSKirill A. Shutemov 	"thp_file_mapped",
1322122afea9SKirill A. Shutemov 	"thp_split_page",
1323122afea9SKirill A. Shutemov 	"thp_split_page_failed",
1324f9719a03SKirill A. Shutemov 	"thp_deferred_split_page",
1325122afea9SKirill A. Shutemov 	"thp_split_pmd",
1326ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1327ce9311cfSYisheng Xie 	"thp_split_pud",
1328ce9311cfSYisheng Xie #endif
1329d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
1330d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
1331225311a4SHuang Ying 	"thp_swpout",
1332fe490cc0SHuang Ying 	"thp_swpout_fallback",
1333fa25c503SKOSAKI Motohiro #endif
133409316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
133509316c09SKonstantin Khlebnikov 	"balloon_inflate",
133609316c09SKonstantin Khlebnikov 	"balloon_deflate",
133709316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
133809316c09SKonstantin Khlebnikov 	"balloon_migrate",
133909316c09SKonstantin Khlebnikov #endif
134009316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
1341ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
13429824cf97SDave Hansen 	"nr_tlb_remote_flush",
13439824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
13449824cf97SDave Hansen 	"nr_tlb_local_flush_all",
13459824cf97SDave Hansen 	"nr_tlb_local_flush_one",
1346ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
1347fa25c503SKOSAKI Motohiro 
13484f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE
13494f115147SDavidlohr Bueso 	"vmacache_find_calls",
13504f115147SDavidlohr Bueso 	"vmacache_find_hits",
13514f115147SDavidlohr Bueso #endif
1352cbc65df2SHuang Ying #ifdef CONFIG_SWAP
1353cbc65df2SHuang Ying 	"swap_ra",
1354cbc65df2SHuang Ying 	"swap_ra_hit",
1355cbc65df2SHuang Ying #endif
1356ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
1357fa25c503SKOSAKI Motohiro };
1358ebc5d83dSKonstantin Khlebnikov #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
1359fa25c503SKOSAKI Motohiro 
13603c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
13613c486871SAndrew Morton      defined(CONFIG_PROC_FS)
13623c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
13633c486871SAndrew Morton {
13643c486871SAndrew Morton 	pg_data_t *pgdat;
13653c486871SAndrew Morton 	loff_t node = *pos;
13663c486871SAndrew Morton 
13673c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
13683c486871SAndrew Morton 	     pgdat && node;
13693c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
13703c486871SAndrew Morton 		--node;
13713c486871SAndrew Morton 
13723c486871SAndrew Morton 	return pgdat;
13733c486871SAndrew Morton }
13743c486871SAndrew Morton 
13753c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
13763c486871SAndrew Morton {
13773c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
13783c486871SAndrew Morton 
13793c486871SAndrew Morton 	(*pos)++;
13803c486871SAndrew Morton 	return next_online_pgdat(pgdat);
13813c486871SAndrew Morton }
13823c486871SAndrew Morton 
13833c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
13843c486871SAndrew Morton {
13853c486871SAndrew Morton }
13863c486871SAndrew Morton 
1387b2bd8598SDavid Rientjes /*
1388b2bd8598SDavid Rientjes  * Walk zones in a node and print using a callback.
1389b2bd8598SDavid Rientjes  * If @assert_populated is true, only use callback for zones that are populated.
1390b2bd8598SDavid Rientjes  */
13913c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1392727c080fSVinayak Menon 		bool assert_populated, bool nolock,
13933c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
13943c486871SAndrew Morton {
13953c486871SAndrew Morton 	struct zone *zone;
13963c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
13973c486871SAndrew Morton 	unsigned long flags;
13983c486871SAndrew Morton 
13993c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1400b2bd8598SDavid Rientjes 		if (assert_populated && !populated_zone(zone))
14013c486871SAndrew Morton 			continue;
14023c486871SAndrew Morton 
1403727c080fSVinayak Menon 		if (!nolock)
14043c486871SAndrew Morton 			spin_lock_irqsave(&zone->lock, flags);
14053c486871SAndrew Morton 		print(m, pgdat, zone);
1406727c080fSVinayak Menon 		if (!nolock)
14073c486871SAndrew Morton 			spin_unlock_irqrestore(&zone->lock, flags);
14083c486871SAndrew Morton 	}
14093c486871SAndrew Morton }
14103c486871SAndrew Morton #endif
14113c486871SAndrew Morton 
1412d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
1413467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1414467c996cSMel Gorman 						struct zone *zone)
1415467c996cSMel Gorman {
1416467c996cSMel Gorman 	int order;
1417467c996cSMel Gorman 
1418f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1419f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
1420f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1421f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1422f6ac2354SChristoph Lameter }
1423467c996cSMel Gorman 
1424467c996cSMel Gorman /*
1425467c996cSMel Gorman  * This walks the free areas for each zone.
1426467c996cSMel Gorman  */
1427467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
1428467c996cSMel Gorman {
1429467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1430727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1431467c996cSMel Gorman 	return 0;
1432467c996cSMel Gorman }
1433467c996cSMel Gorman 
1434467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
1435467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1436467c996cSMel Gorman {
1437467c996cSMel Gorman 	int order, mtype;
1438467c996cSMel Gorman 
1439467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1440467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
1441467c996cSMel Gorman 					pgdat->node_id,
1442467c996cSMel Gorman 					zone->name,
1443467c996cSMel Gorman 					migratetype_names[mtype]);
1444467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
1445467c996cSMel Gorman 			unsigned long freecount = 0;
1446467c996cSMel Gorman 			struct free_area *area;
1447467c996cSMel Gorman 			struct list_head *curr;
144893b3a674SMichal Hocko 			bool overflow = false;
1449467c996cSMel Gorman 
1450467c996cSMel Gorman 			area = &(zone->free_area[order]);
1451467c996cSMel Gorman 
145293b3a674SMichal Hocko 			list_for_each(curr, &area->free_list[mtype]) {
145393b3a674SMichal Hocko 				/*
145493b3a674SMichal Hocko 				 * Cap the free_list iteration because it might
145593b3a674SMichal Hocko 				 * be really large and we are under a spinlock
145693b3a674SMichal Hocko 				 * so a long time spent here could trigger a
145793b3a674SMichal Hocko 				 * hard lockup detector. Anyway this is a
145893b3a674SMichal Hocko 				 * debugging tool so knowing there is a handful
145993b3a674SMichal Hocko 				 * of pages of this order should be more than
146093b3a674SMichal Hocko 				 * sufficient.
146193b3a674SMichal Hocko 				 */
146293b3a674SMichal Hocko 				if (++freecount >= 100000) {
146393b3a674SMichal Hocko 					overflow = true;
146493b3a674SMichal Hocko 					break;
146593b3a674SMichal Hocko 				}
146693b3a674SMichal Hocko 			}
146793b3a674SMichal Hocko 			seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
146893b3a674SMichal Hocko 			spin_unlock_irq(&zone->lock);
146993b3a674SMichal Hocko 			cond_resched();
147093b3a674SMichal Hocko 			spin_lock_irq(&zone->lock);
1471467c996cSMel Gorman 		}
1472467c996cSMel Gorman 		seq_putc(m, '\n');
1473467c996cSMel Gorman 	}
1474467c996cSMel Gorman }
1475467c996cSMel Gorman 
1476467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
1477467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1478467c996cSMel Gorman {
1479467c996cSMel Gorman 	int order;
1480467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1481467c996cSMel Gorman 
1482467c996cSMel Gorman 	/* Print header */
1483467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1484467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
1485467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
1486467c996cSMel Gorman 	seq_putc(m, '\n');
1487467c996cSMel Gorman 
1488727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1489467c996cSMel Gorman 
1490467c996cSMel Gorman 	return 0;
1491467c996cSMel Gorman }
1492467c996cSMel Gorman 
1493467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1494467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1495467c996cSMel Gorman {
1496467c996cSMel Gorman 	int mtype;
1497467c996cSMel Gorman 	unsigned long pfn;
1498467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1499108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1500467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1501467c996cSMel Gorman 
1502467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1503467c996cSMel Gorman 		struct page *page;
1504467c996cSMel Gorman 
1505d336e94eSMichal Hocko 		page = pfn_to_online_page(pfn);
1506d336e94eSMichal Hocko 		if (!page)
1507467c996cSMel Gorman 			continue;
1508467c996cSMel Gorman 
1509a91c43c7SJoonsoo Kim 		if (page_zone(page) != zone)
1510a91c43c7SJoonsoo Kim 			continue;
1511a91c43c7SJoonsoo Kim 
1512467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1513467c996cSMel Gorman 
1514e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1515467c996cSMel Gorman 			count[mtype]++;
1516467c996cSMel Gorman 	}
1517467c996cSMel Gorman 
1518467c996cSMel Gorman 	/* Print counts */
1519467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1520467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1521467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1522467c996cSMel Gorman 	seq_putc(m, '\n');
1523467c996cSMel Gorman }
1524467c996cSMel Gorman 
1525f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */
1526467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1527467c996cSMel Gorman {
1528467c996cSMel Gorman 	int mtype;
1529467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1530467c996cSMel Gorman 
1531467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1532467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1533467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1534467c996cSMel Gorman 	seq_putc(m, '\n');
1535727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false,
1536727c080fSVinayak Menon 		pagetypeinfo_showblockcount_print);
1537467c996cSMel Gorman 
1538467c996cSMel Gorman 	return 0;
1539467c996cSMel Gorman }
1540467c996cSMel Gorman 
154148c96a36SJoonsoo Kim /*
154248c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
154348c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
154448c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
154548c96a36SJoonsoo Kim  * to determine what is going on
154648c96a36SJoonsoo Kim  */
154748c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
154848c96a36SJoonsoo Kim {
154948c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
155048c96a36SJoonsoo Kim 	int mtype;
155148c96a36SJoonsoo Kim 
15527dd80b8aSVlastimil Babka 	if (!static_branch_unlikely(&page_owner_inited))
155348c96a36SJoonsoo Kim 		return;
155448c96a36SJoonsoo Kim 
155548c96a36SJoonsoo Kim 	drain_all_pages(NULL);
155648c96a36SJoonsoo Kim 
155748c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
155848c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
155948c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
156048c96a36SJoonsoo Kim 	seq_putc(m, '\n');
156148c96a36SJoonsoo Kim 
1562727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, true,
1563727c080fSVinayak Menon 		pagetypeinfo_showmixedcount_print);
156448c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
156548c96a36SJoonsoo Kim }
156648c96a36SJoonsoo Kim 
1567467c996cSMel Gorman /*
1568467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1569467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1570467c996cSMel Gorman  */
1571467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1572467c996cSMel Gorman {
1573467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1574467c996cSMel Gorman 
157541b25a37SKOSAKI Motohiro 	/* check memoryless node */
1576a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
157741b25a37SKOSAKI Motohiro 		return 0;
157841b25a37SKOSAKI Motohiro 
1579467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1580467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1581467c996cSMel Gorman 	seq_putc(m, '\n');
1582467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1583467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
158448c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1585467c996cSMel Gorman 
1586f6ac2354SChristoph Lameter 	return 0;
1587f6ac2354SChristoph Lameter }
1588f6ac2354SChristoph Lameter 
15898f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1590f6ac2354SChristoph Lameter 	.start	= frag_start,
1591f6ac2354SChristoph Lameter 	.next	= frag_next,
1592f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1593f6ac2354SChristoph Lameter 	.show	= frag_show,
1594f6ac2354SChristoph Lameter };
1595f6ac2354SChristoph Lameter 
159674e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1597467c996cSMel Gorman 	.start	= frag_start,
1598467c996cSMel Gorman 	.next	= frag_next,
1599467c996cSMel Gorman 	.stop	= frag_stop,
1600467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1601467c996cSMel Gorman };
1602467c996cSMel Gorman 
1603e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1604e2ecc8a7SMel Gorman {
1605e2ecc8a7SMel Gorman 	int zid;
1606e2ecc8a7SMel Gorman 
1607e2ecc8a7SMel Gorman 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1608e2ecc8a7SMel Gorman 		struct zone *compare = &pgdat->node_zones[zid];
1609e2ecc8a7SMel Gorman 
1610e2ecc8a7SMel Gorman 		if (populated_zone(compare))
1611e2ecc8a7SMel Gorman 			return zone == compare;
1612e2ecc8a7SMel Gorman 	}
1613e2ecc8a7SMel Gorman 
1614e2ecc8a7SMel Gorman 	return false;
1615e2ecc8a7SMel Gorman }
1616e2ecc8a7SMel Gorman 
1617467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1618467c996cSMel Gorman 							struct zone *zone)
1619f6ac2354SChristoph Lameter {
1620f6ac2354SChristoph Lameter 	int i;
1621f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1622e2ecc8a7SMel Gorman 	if (is_zone_first_populated(pgdat, zone)) {
1623e2ecc8a7SMel Gorman 		seq_printf(m, "\n  per-node stats");
1624e2ecc8a7SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
162569473e5dSMuchun Song 			unsigned long pages = node_page_state_pages(pgdat, i);
162669473e5dSMuchun Song 
162769473e5dSMuchun Song 			if (vmstat_item_print_in_thp(i))
162869473e5dSMuchun Song 				pages /= HPAGE_PMD_NR;
16299d7ea9a2SKonstantin Khlebnikov 			seq_printf(m, "\n      %-12s %lu", node_stat_name(i),
163069473e5dSMuchun Song 				   pages);
1631e2ecc8a7SMel Gorman 		}
1632e2ecc8a7SMel Gorman 	}
1633f6ac2354SChristoph Lameter 	seq_printf(m,
1634f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1635f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1636f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1637f6ac2354SChristoph Lameter 		   "\n        high     %lu"
1638f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
16399feedc9dSJiang Liu 		   "\n        present  %lu"
1640*3c381db1SDavid Hildenbrand 		   "\n        managed  %lu"
1641*3c381db1SDavid Hildenbrand 		   "\n        cma      %lu",
164288f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
164341858966SMel Gorman 		   min_wmark_pages(zone),
164441858966SMel Gorman 		   low_wmark_pages(zone),
164541858966SMel Gorman 		   high_wmark_pages(zone),
1646f6ac2354SChristoph Lameter 		   zone->spanned_pages,
16479feedc9dSJiang Liu 		   zone->present_pages,
1648*3c381db1SDavid Hildenbrand 		   zone_managed_pages(zone),
1649*3c381db1SDavid Hildenbrand 		   zone_cma_pages(zone));
16502244b95aSChristoph Lameter 
1651f6ac2354SChristoph Lameter 	seq_printf(m,
16523484b2deSMel Gorman 		   "\n        protection: (%ld",
1653f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1654f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
16553484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
16567dfb8bf3SDavid Rientjes 	seq_putc(m, ')');
16577dfb8bf3SDavid Rientjes 
1658a8a4b7aeSBaoquan He 	/* If unpopulated, no other information is useful */
1659a8a4b7aeSBaoquan He 	if (!populated_zone(zone)) {
1660a8a4b7aeSBaoquan He 		seq_putc(m, '\n');
1661a8a4b7aeSBaoquan He 		return;
1662a8a4b7aeSBaoquan He 	}
1663a8a4b7aeSBaoquan He 
16647dfb8bf3SDavid Rientjes 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
16659d7ea9a2SKonstantin Khlebnikov 		seq_printf(m, "\n      %-12s %lu", zone_stat_name(i),
16667dfb8bf3SDavid Rientjes 			   zone_page_state(zone, i));
16677dfb8bf3SDavid Rientjes 
16683a321d2aSKemi Wang #ifdef CONFIG_NUMA
16693a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
16709d7ea9a2SKonstantin Khlebnikov 		seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
167163803222SKemi Wang 			   zone_numa_state_snapshot(zone, i));
16723a321d2aSKemi Wang #endif
16733a321d2aSKemi Wang 
16747dfb8bf3SDavid Rientjes 	seq_printf(m, "\n  pagesets");
1675f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
1676f6ac2354SChristoph Lameter 		struct per_cpu_pageset *pageset;
1677f6ac2354SChristoph Lameter 
167899dcc3e5SChristoph Lameter 		pageset = per_cpu_ptr(zone->pageset, i);
1679f6ac2354SChristoph Lameter 		seq_printf(m,
16803dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1681f6ac2354SChristoph Lameter 			   "\n              count: %i"
1682f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1683f6ac2354SChristoph Lameter 			   "\n              batch: %i",
16843dfa5721SChristoph Lameter 			   i,
16853dfa5721SChristoph Lameter 			   pageset->pcp.count,
16863dfa5721SChristoph Lameter 			   pageset->pcp.high,
16873dfa5721SChristoph Lameter 			   pageset->pcp.batch);
1688df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1689df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
1690df9ecabaSChristoph Lameter 				pageset->stat_threshold);
1691df9ecabaSChristoph Lameter #endif
1692f6ac2354SChristoph Lameter 	}
1693f6ac2354SChristoph Lameter 	seq_printf(m,
1694599d0c95SMel Gorman 		   "\n  node_unreclaimable:  %u"
16953a50d14dSAndrey Ryabinin 		   "\n  start_pfn:           %lu",
1696c73322d0SJohannes Weiner 		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
16973a50d14dSAndrey Ryabinin 		   zone->zone_start_pfn);
1698f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1699f6ac2354SChristoph Lameter }
1700467c996cSMel Gorman 
1701467c996cSMel Gorman /*
1702b2bd8598SDavid Rientjes  * Output information about zones in @pgdat.  All zones are printed regardless
1703b2bd8598SDavid Rientjes  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1704b2bd8598SDavid Rientjes  * set of all zones and userspace would not be aware of such zones if they are
1705b2bd8598SDavid Rientjes  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1706467c996cSMel Gorman  */
1707467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1708467c996cSMel Gorman {
1709467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1710727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1711f6ac2354SChristoph Lameter 	return 0;
1712f6ac2354SChristoph Lameter }
1713f6ac2354SChristoph Lameter 
17145c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1715f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1716f6ac2354SChristoph Lameter 			       * fragmentation. */
1717f6ac2354SChristoph Lameter 	.next	= frag_next,
1718f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1719f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1720f6ac2354SChristoph Lameter };
1721f6ac2354SChristoph Lameter 
17229d7ea9a2SKonstantin Khlebnikov #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
17239d7ea9a2SKonstantin Khlebnikov 			 NR_VM_NUMA_STAT_ITEMS + \
17249d7ea9a2SKonstantin Khlebnikov 			 NR_VM_NODE_STAT_ITEMS + \
17259d7ea9a2SKonstantin Khlebnikov 			 NR_VM_WRITEBACK_STAT_ITEMS + \
17269d7ea9a2SKonstantin Khlebnikov 			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
17279d7ea9a2SKonstantin Khlebnikov 			  NR_VM_EVENT_ITEMS : 0))
172879da826aSMichael Rubin 
1729f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1730f6ac2354SChristoph Lameter {
17312244b95aSChristoph Lameter 	unsigned long *v;
17329d7ea9a2SKonstantin Khlebnikov 	int i;
1733f6ac2354SChristoph Lameter 
17349d7ea9a2SKonstantin Khlebnikov 	if (*pos >= NR_VMSTAT_ITEMS)
1735f6ac2354SChristoph Lameter 		return NULL;
1736f6ac2354SChristoph Lameter 
17379d7ea9a2SKonstantin Khlebnikov 	BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
17389d7ea9a2SKonstantin Khlebnikov 	v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
17392244b95aSChristoph Lameter 	m->private = v;
17402244b95aSChristoph Lameter 	if (!v)
1741f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
17422244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1743c41f012aSMichal Hocko 		v[i] = global_zone_page_state(i);
174479da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
174579da826aSMichael Rubin 
17463a321d2aSKemi Wang #ifdef CONFIG_NUMA
17473a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
17483a321d2aSKemi Wang 		v[i] = global_numa_state(i);
17493a321d2aSKemi Wang 	v += NR_VM_NUMA_STAT_ITEMS;
17503a321d2aSKemi Wang #endif
17513a321d2aSKemi Wang 
175269473e5dSMuchun Song 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1753ea426c2aSRoman Gushchin 		v[i] = global_node_page_state_pages(i);
175469473e5dSMuchun Song 		if (vmstat_item_print_in_thp(i))
175569473e5dSMuchun Song 			v[i] /= HPAGE_PMD_NR;
175669473e5dSMuchun Song 	}
175775ef7184SMel Gorman 	v += NR_VM_NODE_STAT_ITEMS;
175875ef7184SMel Gorman 
175979da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
176079da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
176179da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
176279da826aSMichael Rubin 
1763f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
176479da826aSMichael Rubin 	all_vm_events(v);
176579da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
176679da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1767f8891e5eSChristoph Lameter #endif
1768ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1769f6ac2354SChristoph Lameter }
1770f6ac2354SChristoph Lameter 
1771f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1772f6ac2354SChristoph Lameter {
1773f6ac2354SChristoph Lameter 	(*pos)++;
17749d7ea9a2SKonstantin Khlebnikov 	if (*pos >= NR_VMSTAT_ITEMS)
1775f6ac2354SChristoph Lameter 		return NULL;
1776f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1777f6ac2354SChristoph Lameter }
1778f6ac2354SChristoph Lameter 
1779f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1780f6ac2354SChristoph Lameter {
1781f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1782f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
178368ba0326SAlexey Dobriyan 
178468ba0326SAlexey Dobriyan 	seq_puts(m, vmstat_text[off]);
178575ba1d07SJoe Perches 	seq_put_decimal_ull(m, " ", *l);
178668ba0326SAlexey Dobriyan 	seq_putc(m, '\n');
17878d92890bSNeilBrown 
17888d92890bSNeilBrown 	if (off == NR_VMSTAT_ITEMS - 1) {
17898d92890bSNeilBrown 		/*
17908d92890bSNeilBrown 		 * We've come to the end - add any deprecated counters to avoid
17918d92890bSNeilBrown 		 * breaking userspace which might depend on them being present.
17928d92890bSNeilBrown 		 */
17938d92890bSNeilBrown 		seq_puts(m, "nr_unstable 0\n");
17948d92890bSNeilBrown 	}
1795f6ac2354SChristoph Lameter 	return 0;
1796f6ac2354SChristoph Lameter }
1797f6ac2354SChristoph Lameter 
1798f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1799f6ac2354SChristoph Lameter {
1800f6ac2354SChristoph Lameter 	kfree(m->private);
1801f6ac2354SChristoph Lameter 	m->private = NULL;
1802f6ac2354SChristoph Lameter }
1803f6ac2354SChristoph Lameter 
1804b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1805f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1806f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1807f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1808f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1809f6ac2354SChristoph Lameter };
1810f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1811f6ac2354SChristoph Lameter 
1812df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1813d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
181477461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1815d1187ed2SChristoph Lameter 
181652b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS
181752b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work)
181852b6f46bSHugh Dickins {
181952b6f46bSHugh Dickins 	refresh_cpu_vm_stats(true);
182052b6f46bSHugh Dickins }
182152b6f46bSHugh Dickins 
182252b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write,
182332927393SChristoph Hellwig 		   void *buffer, size_t *lenp, loff_t *ppos)
182452b6f46bSHugh Dickins {
182552b6f46bSHugh Dickins 	long val;
182652b6f46bSHugh Dickins 	int err;
182752b6f46bSHugh Dickins 	int i;
182852b6f46bSHugh Dickins 
182952b6f46bSHugh Dickins 	/*
183052b6f46bSHugh Dickins 	 * The regular update, every sysctl_stat_interval, may come later
183152b6f46bSHugh Dickins 	 * than expected: leaving a significant amount in per_cpu buckets.
183252b6f46bSHugh Dickins 	 * This is particularly misleading when checking a quantity of HUGE
183352b6f46bSHugh Dickins 	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
183452b6f46bSHugh Dickins 	 * which can equally be echo'ed to or cat'ted from (by root),
183552b6f46bSHugh Dickins 	 * can be used to update the stats just before reading them.
183652b6f46bSHugh Dickins 	 *
1837c41f012aSMichal Hocko 	 * Oh, and since global_zone_page_state() etc. are so careful to hide
183852b6f46bSHugh Dickins 	 * transiently negative values, report an error here if any of
183952b6f46bSHugh Dickins 	 * the stats is negative, so we know to go looking for imbalance.
184052b6f46bSHugh Dickins 	 */
184152b6f46bSHugh Dickins 	err = schedule_on_each_cpu(refresh_vm_stats);
184252b6f46bSHugh Dickins 	if (err)
184352b6f46bSHugh Dickins 		return err;
184452b6f46bSHugh Dickins 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
184575ef7184SMel Gorman 		val = atomic_long_read(&vm_zone_stat[i]);
184652b6f46bSHugh Dickins 		if (val < 0) {
184752b6f46bSHugh Dickins 			pr_warn("%s: %s %ld\n",
18489d7ea9a2SKonstantin Khlebnikov 				__func__, zone_stat_name(i), val);
184952b6f46bSHugh Dickins 			err = -EINVAL;
185052b6f46bSHugh Dickins 		}
185152b6f46bSHugh Dickins 	}
18523a321d2aSKemi Wang #ifdef CONFIG_NUMA
18533a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
18543a321d2aSKemi Wang 		val = atomic_long_read(&vm_numa_stat[i]);
18553a321d2aSKemi Wang 		if (val < 0) {
18563a321d2aSKemi Wang 			pr_warn("%s: %s %ld\n",
18579d7ea9a2SKonstantin Khlebnikov 				__func__, numa_stat_name(i), val);
18583a321d2aSKemi Wang 			err = -EINVAL;
18593a321d2aSKemi Wang 		}
18603a321d2aSKemi Wang 	}
18613a321d2aSKemi Wang #endif
186252b6f46bSHugh Dickins 	if (err)
186352b6f46bSHugh Dickins 		return err;
186452b6f46bSHugh Dickins 	if (write)
186552b6f46bSHugh Dickins 		*ppos += *lenp;
186652b6f46bSHugh Dickins 	else
186752b6f46bSHugh Dickins 		*lenp = 0;
186852b6f46bSHugh Dickins 	return 0;
186952b6f46bSHugh Dickins }
187052b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */
187152b6f46bSHugh Dickins 
1872d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1873d1187ed2SChristoph Lameter {
18740eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
18757cc36bbdSChristoph Lameter 		/*
18767cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
18777cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
18787cc36bbdSChristoph Lameter 		 * update worker thread.
18797cc36bbdSChristoph Lameter 		 */
1880ce612879SMichal Hocko 		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1881176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
188298f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1883f01f17d3SMichal Hocko 	}
1884d1187ed2SChristoph Lameter }
1885d1187ed2SChristoph Lameter 
18867cc36bbdSChristoph Lameter /*
18870eb77e98SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
18880eb77e98SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
18890eb77e98SChristoph Lameter  * invoked when tick processing is not active.
18900eb77e98SChristoph Lameter  */
18910eb77e98SChristoph Lameter /*
18927cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
18937cc36bbdSChristoph Lameter  * an update is needed.
18947cc36bbdSChristoph Lameter  */
18957cc36bbdSChristoph Lameter static bool need_update(int cpu)
1896d1187ed2SChristoph Lameter {
18977cc36bbdSChristoph Lameter 	struct zone *zone;
1898d1187ed2SChristoph Lameter 
18997cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
19007cc36bbdSChristoph Lameter 		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
19017cc36bbdSChristoph Lameter 
19027cc36bbdSChristoph Lameter 		BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
19033a321d2aSKemi Wang #ifdef CONFIG_NUMA
19041d90ca89SKemi Wang 		BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
19053a321d2aSKemi Wang #endif
190663803222SKemi Wang 
19077cc36bbdSChristoph Lameter 		/*
19087cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
19097cc36bbdSChristoph Lameter 		 */
191013c9aaf7SJanne Huttunen 		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
191113c9aaf7SJanne Huttunen 			       sizeof(p->vm_stat_diff[0])))
19127cc36bbdSChristoph Lameter 			return true;
19133a321d2aSKemi Wang #ifdef CONFIG_NUMA
191413c9aaf7SJanne Huttunen 		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
191513c9aaf7SJanne Huttunen 			       sizeof(p->vm_numa_stat_diff[0])))
19163a321d2aSKemi Wang 			return true;
19173a321d2aSKemi Wang #endif
19187cc36bbdSChristoph Lameter 	}
19197cc36bbdSChristoph Lameter 	return false;
19207cc36bbdSChristoph Lameter }
19217cc36bbdSChristoph Lameter 
19227b8da4c7SChristoph Lameter /*
19237b8da4c7SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
19247b8da4c7SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
19257b8da4c7SChristoph Lameter  * invoked when tick processing is not active.
19267b8da4c7SChristoph Lameter  */
1927f01f17d3SMichal Hocko void quiet_vmstat(void)
1928f01f17d3SMichal Hocko {
1929f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1930f01f17d3SMichal Hocko 		return;
1931f01f17d3SMichal Hocko 
19327b8da4c7SChristoph Lameter 	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1933f01f17d3SMichal Hocko 		return;
1934f01f17d3SMichal Hocko 
1935f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
1936f01f17d3SMichal Hocko 		return;
1937f01f17d3SMichal Hocko 
1938f01f17d3SMichal Hocko 	/*
1939f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
1940f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
1941f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
1942f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
1943f01f17d3SMichal Hocko 	 */
1944f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
1945f01f17d3SMichal Hocko }
1946f01f17d3SMichal Hocko 
19477cc36bbdSChristoph Lameter /*
19487cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
19497cc36bbdSChristoph Lameter  * differentials of processors that have their worker
19507cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
19517cc36bbdSChristoph Lameter  * inactivity.
19527cc36bbdSChristoph Lameter  */
19537cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
19547cc36bbdSChristoph Lameter 
19550eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
19567cc36bbdSChristoph Lameter 
19577cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
19587cc36bbdSChristoph Lameter {
19597cc36bbdSChristoph Lameter 	int cpu;
19607cc36bbdSChristoph Lameter 
19617cc36bbdSChristoph Lameter 	get_online_cpus();
19627cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
19637b8da4c7SChristoph Lameter 	for_each_online_cpu(cpu) {
1964f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
19657cc36bbdSChristoph Lameter 
19667b8da4c7SChristoph Lameter 		if (!delayed_work_pending(dw) && need_update(cpu))
1967ce612879SMichal Hocko 			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1968f01f17d3SMichal Hocko 	}
19697cc36bbdSChristoph Lameter 	put_online_cpus();
19707cc36bbdSChristoph Lameter 
19717cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
19727cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
19737cc36bbdSChristoph Lameter }
19747cc36bbdSChristoph Lameter 
19757cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
19767cc36bbdSChristoph Lameter {
19777cc36bbdSChristoph Lameter 	int cpu;
19787cc36bbdSChristoph Lameter 
19797cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
1980ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
19817cc36bbdSChristoph Lameter 			vmstat_update);
19827cc36bbdSChristoph Lameter 
19837cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
19847cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
1985d1187ed2SChristoph Lameter }
1986d1187ed2SChristoph Lameter 
198703e86dbaSTim Chen static void __init init_cpu_node_state(void)
198803e86dbaSTim Chen {
19894c501327SSebastian Andrzej Siewior 	int node;
199003e86dbaSTim Chen 
19914c501327SSebastian Andrzej Siewior 	for_each_online_node(node) {
19924c501327SSebastian Andrzej Siewior 		if (cpumask_weight(cpumask_of_node(node)) > 0)
19934c501327SSebastian Andrzej Siewior 			node_set_state(node, N_CPU);
19944c501327SSebastian Andrzej Siewior 	}
199503e86dbaSTim Chen }
199603e86dbaSTim Chen 
19975438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu)
1998807a1bd2SToshi Kani {
19995ee28a44SKAMEZAWA Hiroyuki 	refresh_zone_stat_thresholds();
2000ad596925SChristoph Lameter 	node_set_state(cpu_to_node(cpu), N_CPU);
20015438da97SSebastian Andrzej Siewior 	return 0;
2002df9ecabaSChristoph Lameter }
2003df9ecabaSChristoph Lameter 
20045438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu)
20055438da97SSebastian Andrzej Siewior {
20065438da97SSebastian Andrzej Siewior 	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
20075438da97SSebastian Andrzej Siewior 	return 0;
20085438da97SSebastian Andrzej Siewior }
20095438da97SSebastian Andrzej Siewior 
20105438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu)
20115438da97SSebastian Andrzej Siewior {
20125438da97SSebastian Andrzej Siewior 	const struct cpumask *node_cpus;
20135438da97SSebastian Andrzej Siewior 	int node;
20145438da97SSebastian Andrzej Siewior 
20155438da97SSebastian Andrzej Siewior 	node = cpu_to_node(cpu);
20165438da97SSebastian Andrzej Siewior 
20175438da97SSebastian Andrzej Siewior 	refresh_zone_stat_thresholds();
20185438da97SSebastian Andrzej Siewior 	node_cpus = cpumask_of_node(node);
20195438da97SSebastian Andrzej Siewior 	if (cpumask_weight(node_cpus) > 0)
20205438da97SSebastian Andrzej Siewior 		return 0;
20215438da97SSebastian Andrzej Siewior 
20225438da97SSebastian Andrzej Siewior 	node_clear_state(node, N_CPU);
20235438da97SSebastian Andrzej Siewior 	return 0;
20245438da97SSebastian Andrzej Siewior }
20255438da97SSebastian Andrzej Siewior 
20268f32f7e5SAlexey Dobriyan #endif
2027df9ecabaSChristoph Lameter 
2028ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq;
2029ce612879SMichal Hocko 
2030597b7305SMichal Hocko void __init init_mm_internals(void)
2031df9ecabaSChristoph Lameter {
2032ce612879SMichal Hocko 	int ret __maybe_unused;
20335438da97SSebastian Andrzej Siewior 
203480d136e1SMichal Hocko 	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2035ce612879SMichal Hocko 
2036ce612879SMichal Hocko #ifdef CONFIG_SMP
20375438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
20385438da97SSebastian Andrzej Siewior 					NULL, vmstat_cpu_dead);
20395438da97SSebastian Andrzej Siewior 	if (ret < 0)
20405438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'dead' hotplug state\n");
20415438da97SSebastian Andrzej Siewior 
20425438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
20435438da97SSebastian Andrzej Siewior 					vmstat_cpu_online,
20445438da97SSebastian Andrzej Siewior 					vmstat_cpu_down_prep);
20455438da97SSebastian Andrzej Siewior 	if (ret < 0)
20465438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'online' hotplug state\n");
20475438da97SSebastian Andrzej Siewior 
20485438da97SSebastian Andrzej Siewior 	get_online_cpus();
204903e86dbaSTim Chen 	init_cpu_node_state();
20505438da97SSebastian Andrzej Siewior 	put_online_cpus();
2051d1187ed2SChristoph Lameter 
20527cc36bbdSChristoph Lameter 	start_shepherd_timer();
20538f32f7e5SAlexey Dobriyan #endif
20548f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
2055fddda2b7SChristoph Hellwig 	proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2056abaed011SMichal Hocko 	proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2057fddda2b7SChristoph Hellwig 	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2058fddda2b7SChristoph Hellwig 	proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
20598f32f7e5SAlexey Dobriyan #endif
2060df9ecabaSChristoph Lameter }
2061d7a5752cSMel Gorman 
2062d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2063d7a5752cSMel Gorman 
2064d7a5752cSMel Gorman /*
2065d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
2066d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
2067d7a5752cSMel Gorman  */
2068d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
2069d7a5752cSMel Gorman 				struct contig_page_info *info)
2070d7a5752cSMel Gorman {
2071d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
2072d7a5752cSMel Gorman 	if (info->free_pages == 0)
2073d7a5752cSMel Gorman 		return 1000;
2074d7a5752cSMel Gorman 
2075d7a5752cSMel Gorman 	/*
2076d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
2077d7a5752cSMel Gorman 	 * decimal places.
2078d7a5752cSMel Gorman 	 *
2079d7a5752cSMel Gorman 	 * 0 => no fragmentation
2080d7a5752cSMel Gorman 	 * 1 => high fragmentation
2081d7a5752cSMel Gorman 	 */
2082d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2083d7a5752cSMel Gorman 
2084d7a5752cSMel Gorman }
2085d7a5752cSMel Gorman 
2086d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
2087d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2088d7a5752cSMel Gorman {
2089d7a5752cSMel Gorman 	unsigned int order;
2090d7a5752cSMel Gorman 	int index;
2091d7a5752cSMel Gorman 	struct contig_page_info info;
2092d7a5752cSMel Gorman 
2093d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2094d7a5752cSMel Gorman 				pgdat->node_id,
2095d7a5752cSMel Gorman 				zone->name);
2096d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2097d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
2098d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
2099d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2100d7a5752cSMel Gorman 	}
2101d7a5752cSMel Gorman 
2102d7a5752cSMel Gorman 	seq_putc(m, '\n');
2103d7a5752cSMel Gorman }
2104d7a5752cSMel Gorman 
2105d7a5752cSMel Gorman /*
2106d7a5752cSMel Gorman  * Display unusable free space index
2107d7a5752cSMel Gorman  *
2108d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
2109d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
2110d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
2111d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
2112d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
2113d7a5752cSMel Gorman  */
2114d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
2115d7a5752cSMel Gorman {
2116d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2117d7a5752cSMel Gorman 
2118d7a5752cSMel Gorman 	/* check memoryless node */
2119a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
2120d7a5752cSMel Gorman 		return 0;
2121d7a5752cSMel Gorman 
2122727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2123d7a5752cSMel Gorman 
2124d7a5752cSMel Gorman 	return 0;
2125d7a5752cSMel Gorman }
2126d7a5752cSMel Gorman 
212701a99560SKefeng Wang static const struct seq_operations unusable_sops = {
2128d7a5752cSMel Gorman 	.start	= frag_start,
2129d7a5752cSMel Gorman 	.next	= frag_next,
2130d7a5752cSMel Gorman 	.stop	= frag_stop,
2131d7a5752cSMel Gorman 	.show	= unusable_show,
2132d7a5752cSMel Gorman };
2133d7a5752cSMel Gorman 
213401a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(unusable);
2135d7a5752cSMel Gorman 
2136f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
2137f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2138f1a5ab12SMel Gorman {
2139f1a5ab12SMel Gorman 	unsigned int order;
2140f1a5ab12SMel Gorman 	int index;
2141f1a5ab12SMel Gorman 
2142f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
2143f1a5ab12SMel Gorman 	struct contig_page_info info;
2144f1a5ab12SMel Gorman 
2145f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2146f1a5ab12SMel Gorman 				pgdat->node_id,
2147f1a5ab12SMel Gorman 				zone->name);
2148f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2149f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
215056de7263SMel Gorman 		index = __fragmentation_index(order, &info);
2151f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2152f1a5ab12SMel Gorman 	}
2153f1a5ab12SMel Gorman 
2154f1a5ab12SMel Gorman 	seq_putc(m, '\n');
2155f1a5ab12SMel Gorman }
2156f1a5ab12SMel Gorman 
2157f1a5ab12SMel Gorman /*
2158f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
2159f1a5ab12SMel Gorman  */
2160f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
2161f1a5ab12SMel Gorman {
2162f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2163f1a5ab12SMel Gorman 
2164727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2165f1a5ab12SMel Gorman 
2166f1a5ab12SMel Gorman 	return 0;
2167f1a5ab12SMel Gorman }
2168f1a5ab12SMel Gorman 
216901a99560SKefeng Wang static const struct seq_operations extfrag_sops = {
2170f1a5ab12SMel Gorman 	.start	= frag_start,
2171f1a5ab12SMel Gorman 	.next	= frag_next,
2172f1a5ab12SMel Gorman 	.stop	= frag_stop,
2173f1a5ab12SMel Gorman 	.show	= extfrag_show,
2174f1a5ab12SMel Gorman };
2175f1a5ab12SMel Gorman 
217601a99560SKefeng Wang DEFINE_SEQ_ATTRIBUTE(extfrag);
2177f1a5ab12SMel Gorman 
2178d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
2179d7a5752cSMel Gorman {
2180bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
2181bde8bd8aSSasikantha babu 
2182d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2183d7a5752cSMel Gorman 
2184d9f7979cSGreg Kroah-Hartman 	debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
218501a99560SKefeng Wang 			    &unusable_fops);
2186d7a5752cSMel Gorman 
2187d9f7979cSGreg Kroah-Hartman 	debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
218801a99560SKefeng Wang 			    &extfrag_fops);
2189f1a5ab12SMel Gorman 
2190d7a5752cSMel Gorman 	return 0;
2191d7a5752cSMel Gorman }
2192d7a5752cSMel Gorman 
2193d7a5752cSMel Gorman module_init(extfrag_debug_init);
2194d7a5752cSMel Gorman #endif
2195