xref: /linux/mm/vmstat.c (revision 1d90ca897cb05cf38bd62f36756d219e02913b7d)
1f6ac2354SChristoph Lameter /*
2f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
3f6ac2354SChristoph Lameter  *
4f6ac2354SChristoph Lameter  *  Manages VM statistics
5f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
62244b95aSChristoph Lameter  *
72244b95aSChristoph Lameter  *  zoned VM statistics
82244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
92244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
107cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
11f6ac2354SChristoph Lameter  */
128f32f7e5SAlexey Dobriyan #include <linux/fs.h>
13f6ac2354SChristoph Lameter #include <linux/mm.h>
144e950f6fSAlexey Dobriyan #include <linux/err.h>
152244b95aSChristoph Lameter #include <linux/module.h>
165a0e3ad6STejun Heo #include <linux/slab.h>
17df9ecabaSChristoph Lameter #include <linux/cpu.h>
187cc36bbdSChristoph Lameter #include <linux/cpumask.h>
19c748e134SAdrian Bunk #include <linux/vmstat.h>
203c486871SAndrew Morton #include <linux/proc_fs.h>
213c486871SAndrew Morton #include <linux/seq_file.h>
223c486871SAndrew Morton #include <linux/debugfs.h>
23e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
24f1a5ab12SMel Gorman #include <linux/math64.h>
2579da826aSMichael Rubin #include <linux/writeback.h>
2636deb0beSNamhyung Kim #include <linux/compaction.h>
276e543d57SLisa Du #include <linux/mm_inline.h>
2848c96a36SJoonsoo Kim #include <linux/page_ext.h>
2948c96a36SJoonsoo Kim #include <linux/page_owner.h>
306e543d57SLisa Du 
316e543d57SLisa Du #include "internal.h"
32f6ac2354SChristoph Lameter 
33*1d90ca89SKemi Wang #define NUMA_STATS_THRESHOLD (U16_MAX - 2)
34*1d90ca89SKemi Wang 
35f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
36f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
37f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
38f8891e5eSChristoph Lameter 
3931f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
40f8891e5eSChristoph Lameter {
419eccf2a8SChristoph Lameter 	int cpu;
42f8891e5eSChristoph Lameter 	int i;
43f8891e5eSChristoph Lameter 
44f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
45f8891e5eSChristoph Lameter 
4631f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
47f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
48f8891e5eSChristoph Lameter 
49f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
50f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
51f8891e5eSChristoph Lameter 	}
52f8891e5eSChristoph Lameter }
53f8891e5eSChristoph Lameter 
54f8891e5eSChristoph Lameter /*
55f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
56f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
57f8891e5eSChristoph Lameter  * during and after execution of this function.
58f8891e5eSChristoph Lameter */
59f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
60f8891e5eSChristoph Lameter {
61b5be1132SKOSAKI Motohiro 	get_online_cpus();
6231f961a8SMinchan Kim 	sum_vm_events(ret);
63b5be1132SKOSAKI Motohiro 	put_online_cpus();
64f8891e5eSChristoph Lameter }
6532dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
66f8891e5eSChristoph Lameter 
67f8891e5eSChristoph Lameter /*
68f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
69f8891e5eSChristoph Lameter  *
70f8891e5eSChristoph Lameter  * This is adding to the events on one processor
71f8891e5eSChristoph Lameter  * but keeps the global counts constant.
72f8891e5eSChristoph Lameter  */
73f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
74f8891e5eSChristoph Lameter {
75f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
76f8891e5eSChristoph Lameter 	int i;
77f8891e5eSChristoph Lameter 
78f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
79f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
80f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
81f8891e5eSChristoph Lameter 	}
82f8891e5eSChristoph Lameter }
83f8891e5eSChristoph Lameter 
84f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
85f8891e5eSChristoph Lameter 
862244b95aSChristoph Lameter /*
872244b95aSChristoph Lameter  * Manage combined zone based / global counters
882244b95aSChristoph Lameter  *
892244b95aSChristoph Lameter  * vm_stat contains the global counters
902244b95aSChristoph Lameter  */
9175ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
923a321d2aSKemi Wang atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
9375ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
9475ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat);
953a321d2aSKemi Wang EXPORT_SYMBOL(vm_numa_stat);
9675ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat);
972244b95aSChristoph Lameter 
982244b95aSChristoph Lameter #ifdef CONFIG_SMP
992244b95aSChristoph Lameter 
100b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
10188f5acf8SMel Gorman {
10288f5acf8SMel Gorman 	int threshold;
10388f5acf8SMel Gorman 	int watermark_distance;
10488f5acf8SMel Gorman 
10588f5acf8SMel Gorman 	/*
10688f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
10788f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
10888f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
10988f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
11088f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
11188f5acf8SMel Gorman 	 * the min watermark
11288f5acf8SMel Gorman 	 */
11388f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
11488f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
11588f5acf8SMel Gorman 
11688f5acf8SMel Gorman 	/*
11788f5acf8SMel Gorman 	 * Maximum threshold is 125
11888f5acf8SMel Gorman 	 */
11988f5acf8SMel Gorman 	threshold = min(125, threshold);
12088f5acf8SMel Gorman 
12188f5acf8SMel Gorman 	return threshold;
12288f5acf8SMel Gorman }
12388f5acf8SMel Gorman 
124b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
125df9ecabaSChristoph Lameter {
126df9ecabaSChristoph Lameter 	int threshold;
127df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
1282244b95aSChristoph Lameter 
1292244b95aSChristoph Lameter 	/*
130df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
131df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
132df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
133df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
1342244b95aSChristoph Lameter 	 *
135df9ecabaSChristoph Lameter 	 * Some sample thresholds:
136df9ecabaSChristoph Lameter 	 *
137df9ecabaSChristoph Lameter 	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
138df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
139df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
140df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
141df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
142df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
143df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
144df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
145df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
146df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
147df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
148df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
149df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
150df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
151df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
152df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
153df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
154df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
155df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
156df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
1572244b95aSChristoph Lameter 	 */
158df9ecabaSChristoph Lameter 
159b40da049SJiang Liu 	mem = zone->managed_pages >> (27 - PAGE_SHIFT);
160df9ecabaSChristoph Lameter 
161df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
162df9ecabaSChristoph Lameter 
163df9ecabaSChristoph Lameter 	/*
164df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
165df9ecabaSChristoph Lameter 	 */
166df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
167df9ecabaSChristoph Lameter 
168df9ecabaSChristoph Lameter 	return threshold;
169df9ecabaSChristoph Lameter }
170df9ecabaSChristoph Lameter 
171df9ecabaSChristoph Lameter /*
172df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
173df9ecabaSChristoph Lameter  */
174a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
1752244b95aSChristoph Lameter {
17675ef7184SMel Gorman 	struct pglist_data *pgdat;
177df9ecabaSChristoph Lameter 	struct zone *zone;
178df9ecabaSChristoph Lameter 	int cpu;
179df9ecabaSChristoph Lameter 	int threshold;
180df9ecabaSChristoph Lameter 
18175ef7184SMel Gorman 	/* Zero current pgdat thresholds */
18275ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
18375ef7184SMel Gorman 		for_each_online_cpu(cpu) {
18475ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
18575ef7184SMel Gorman 		}
18675ef7184SMel Gorman 	}
18775ef7184SMel Gorman 
188ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
18975ef7184SMel Gorman 		struct pglist_data *pgdat = zone->zone_pgdat;
190aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
191aa454840SChristoph Lameter 
192b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
193df9ecabaSChristoph Lameter 
19475ef7184SMel Gorman 		for_each_online_cpu(cpu) {
19575ef7184SMel Gorman 			int pgdat_threshold;
19675ef7184SMel Gorman 
19799dcc3e5SChristoph Lameter 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
19899dcc3e5SChristoph Lameter 							= threshold;
199*1d90ca89SKemi Wang 
20075ef7184SMel Gorman 			/* Base nodestat threshold on the largest populated zone. */
20175ef7184SMel Gorman 			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
20275ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
20375ef7184SMel Gorman 				= max(threshold, pgdat_threshold);
20475ef7184SMel Gorman 		}
20575ef7184SMel Gorman 
206aa454840SChristoph Lameter 		/*
207aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
208aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
209aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
210aa454840SChristoph Lameter 		 */
211aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
212aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
213aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
214aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
215aa454840SChristoph Lameter 					max_drift;
216df9ecabaSChristoph Lameter 	}
2172244b95aSChristoph Lameter }
2182244b95aSChristoph Lameter 
219b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
220b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
22188f5acf8SMel Gorman {
22288f5acf8SMel Gorman 	struct zone *zone;
22388f5acf8SMel Gorman 	int cpu;
22488f5acf8SMel Gorman 	int threshold;
22588f5acf8SMel Gorman 	int i;
22688f5acf8SMel Gorman 
22788f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
22888f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
22988f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
23088f5acf8SMel Gorman 			continue;
23188f5acf8SMel Gorman 
232b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
233*1d90ca89SKemi Wang 		for_each_online_cpu(cpu)
23488f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
23588f5acf8SMel Gorman 							= threshold;
23688f5acf8SMel Gorman 	}
23788f5acf8SMel Gorman }
23888f5acf8SMel Gorman 
2392244b95aSChristoph Lameter /*
240bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
241bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
242bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
2432244b95aSChristoph Lameter  */
2442244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
2456cdb18adSHeiko Carstens 			   long delta)
2462244b95aSChristoph Lameter {
24712938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
24812938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
2492244b95aSChristoph Lameter 	long x;
25012938a92SChristoph Lameter 	long t;
2512244b95aSChristoph Lameter 
25212938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
2532244b95aSChristoph Lameter 
25412938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
25512938a92SChristoph Lameter 
25612938a92SChristoph Lameter 	if (unlikely(x > t || x < -t)) {
2572244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
2582244b95aSChristoph Lameter 		x = 0;
2592244b95aSChristoph Lameter 	}
26012938a92SChristoph Lameter 	__this_cpu_write(*p, x);
2612244b95aSChristoph Lameter }
2622244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
2632244b95aSChristoph Lameter 
26475ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
26575ef7184SMel Gorman 				long delta)
26675ef7184SMel Gorman {
26775ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
26875ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
26975ef7184SMel Gorman 	long x;
27075ef7184SMel Gorman 	long t;
27175ef7184SMel Gorman 
27275ef7184SMel Gorman 	x = delta + __this_cpu_read(*p);
27375ef7184SMel Gorman 
27475ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
27575ef7184SMel Gorman 
27675ef7184SMel Gorman 	if (unlikely(x > t || x < -t)) {
27775ef7184SMel Gorman 		node_page_state_add(x, pgdat, item);
27875ef7184SMel Gorman 		x = 0;
27975ef7184SMel Gorman 	}
28075ef7184SMel Gorman 	__this_cpu_write(*p, x);
28175ef7184SMel Gorman }
28275ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state);
28375ef7184SMel Gorman 
2842244b95aSChristoph Lameter /*
2852244b95aSChristoph Lameter  * Optimized increment and decrement functions.
2862244b95aSChristoph Lameter  *
2872244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
2882244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
2892244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
2902244b95aSChristoph Lameter  *
2912244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
2922244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
2932244b95aSChristoph Lameter  * generate better code.
2942244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
2952244b95aSChristoph Lameter  * be omitted.
2962244b95aSChristoph Lameter  *
297df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
298df9ecabaSChristoph Lameter  * with care.
299df9ecabaSChristoph Lameter  *
3002244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
3012244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
3022244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
3032244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
3042244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
3052244b95aSChristoph Lameter  * in a useful way here.
3062244b95aSChristoph Lameter  */
307c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
3082244b95aSChristoph Lameter {
30912938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
31012938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
31112938a92SChristoph Lameter 	s8 v, t;
3122244b95aSChristoph Lameter 
313908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
31412938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
31512938a92SChristoph Lameter 	if (unlikely(v > t)) {
31612938a92SChristoph Lameter 		s8 overstep = t >> 1;
3172244b95aSChristoph Lameter 
31812938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
31912938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
3202244b95aSChristoph Lameter 	}
3212244b95aSChristoph Lameter }
322ca889e6cSChristoph Lameter 
32375ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
32475ef7184SMel Gorman {
32575ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
32675ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
32775ef7184SMel Gorman 	s8 v, t;
32875ef7184SMel Gorman 
32975ef7184SMel Gorman 	v = __this_cpu_inc_return(*p);
33075ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
33175ef7184SMel Gorman 	if (unlikely(v > t)) {
33275ef7184SMel Gorman 		s8 overstep = t >> 1;
33375ef7184SMel Gorman 
33475ef7184SMel Gorman 		node_page_state_add(v + overstep, pgdat, item);
33575ef7184SMel Gorman 		__this_cpu_write(*p, -overstep);
33675ef7184SMel Gorman 	}
33775ef7184SMel Gorman }
33875ef7184SMel Gorman 
339ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
340ca889e6cSChristoph Lameter {
341ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
342ca889e6cSChristoph Lameter }
3432244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
3442244b95aSChristoph Lameter 
34575ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item)
34675ef7184SMel Gorman {
34775ef7184SMel Gorman 	__inc_node_state(page_pgdat(page), item);
34875ef7184SMel Gorman }
34975ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state);
35075ef7184SMel Gorman 
351c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
3522244b95aSChristoph Lameter {
35312938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
35412938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
35512938a92SChristoph Lameter 	s8 v, t;
3562244b95aSChristoph Lameter 
357908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
35812938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
35912938a92SChristoph Lameter 	if (unlikely(v < - t)) {
36012938a92SChristoph Lameter 		s8 overstep = t >> 1;
3612244b95aSChristoph Lameter 
36212938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
36312938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
3642244b95aSChristoph Lameter 	}
3652244b95aSChristoph Lameter }
366c8785385SChristoph Lameter 
36775ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
36875ef7184SMel Gorman {
36975ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
37075ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
37175ef7184SMel Gorman 	s8 v, t;
37275ef7184SMel Gorman 
37375ef7184SMel Gorman 	v = __this_cpu_dec_return(*p);
37475ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
37575ef7184SMel Gorman 	if (unlikely(v < - t)) {
37675ef7184SMel Gorman 		s8 overstep = t >> 1;
37775ef7184SMel Gorman 
37875ef7184SMel Gorman 		node_page_state_add(v - overstep, pgdat, item);
37975ef7184SMel Gorman 		__this_cpu_write(*p, overstep);
38075ef7184SMel Gorman 	}
38175ef7184SMel Gorman }
38275ef7184SMel Gorman 
383c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
384c8785385SChristoph Lameter {
385c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
386c8785385SChristoph Lameter }
3872244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
3882244b95aSChristoph Lameter 
38975ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item)
39075ef7184SMel Gorman {
39175ef7184SMel Gorman 	__dec_node_state(page_pgdat(page), item);
39275ef7184SMel Gorman }
39375ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state);
39475ef7184SMel Gorman 
3954156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
3967c839120SChristoph Lameter /*
3977c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
3987c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
3997c839120SChristoph Lameter  *
4007c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
4017c839120SChristoph Lameter  * operations.
4027c839120SChristoph Lameter  *
4037c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
4047c839120SChristoph Lameter  *     0       No overstepping
4057c839120SChristoph Lameter  *     1       Overstepping half of threshold
4067c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
4077c839120SChristoph Lameter */
40875ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone,
40975ef7184SMel Gorman        enum zone_stat_item item, long delta, int overstep_mode)
4107c839120SChristoph Lameter {
4117c839120SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
4127c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
4137c839120SChristoph Lameter 	long o, n, t, z;
4147c839120SChristoph Lameter 
4157c839120SChristoph Lameter 	do {
4167c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
4177c839120SChristoph Lameter 
4187c839120SChristoph Lameter 		/*
4197c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
4207c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
421d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
422d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
423d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
424d3bc2367SChristoph Lameter 		 *
425d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
426d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
4277c839120SChristoph Lameter 		 */
4287c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
4297c839120SChristoph Lameter 
4307c839120SChristoph Lameter 		o = this_cpu_read(*p);
4317c839120SChristoph Lameter 		n = delta + o;
4327c839120SChristoph Lameter 
4337c839120SChristoph Lameter 		if (n > t || n < -t) {
4347c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
4357c839120SChristoph Lameter 
4367c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
4377c839120SChristoph Lameter 			z = n + os;
4387c839120SChristoph Lameter 			n = -os;
4397c839120SChristoph Lameter 		}
4407c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
4417c839120SChristoph Lameter 
4427c839120SChristoph Lameter 	if (z)
4437c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
4447c839120SChristoph Lameter }
4457c839120SChristoph Lameter 
4467c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
4476cdb18adSHeiko Carstens 			 long delta)
4487c839120SChristoph Lameter {
44975ef7184SMel Gorman 	mod_zone_state(zone, item, delta, 0);
4507c839120SChristoph Lameter }
4517c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
4527c839120SChristoph Lameter 
4537c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
4547c839120SChristoph Lameter {
45575ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, 1, 1);
4567c839120SChristoph Lameter }
4577c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
4587c839120SChristoph Lameter 
4597c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
4607c839120SChristoph Lameter {
46175ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, -1, -1);
4627c839120SChristoph Lameter }
4637c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
46475ef7184SMel Gorman 
46575ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat,
46675ef7184SMel Gorman        enum node_stat_item item, int delta, int overstep_mode)
46775ef7184SMel Gorman {
46875ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
46975ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
47075ef7184SMel Gorman 	long o, n, t, z;
47175ef7184SMel Gorman 
47275ef7184SMel Gorman 	do {
47375ef7184SMel Gorman 		z = 0;  /* overflow to node counters */
47475ef7184SMel Gorman 
47575ef7184SMel Gorman 		/*
47675ef7184SMel Gorman 		 * The fetching of the stat_threshold is racy. We may apply
47775ef7184SMel Gorman 		 * a counter threshold to the wrong the cpu if we get
47875ef7184SMel Gorman 		 * rescheduled while executing here. However, the next
47975ef7184SMel Gorman 		 * counter update will apply the threshold again and
48075ef7184SMel Gorman 		 * therefore bring the counter under the threshold again.
48175ef7184SMel Gorman 		 *
48275ef7184SMel Gorman 		 * Most of the time the thresholds are the same anyways
48375ef7184SMel Gorman 		 * for all cpus in a node.
48475ef7184SMel Gorman 		 */
48575ef7184SMel Gorman 		t = this_cpu_read(pcp->stat_threshold);
48675ef7184SMel Gorman 
48775ef7184SMel Gorman 		o = this_cpu_read(*p);
48875ef7184SMel Gorman 		n = delta + o;
48975ef7184SMel Gorman 
49075ef7184SMel Gorman 		if (n > t || n < -t) {
49175ef7184SMel Gorman 			int os = overstep_mode * (t >> 1) ;
49275ef7184SMel Gorman 
49375ef7184SMel Gorman 			/* Overflow must be added to node counters */
49475ef7184SMel Gorman 			z = n + os;
49575ef7184SMel Gorman 			n = -os;
49675ef7184SMel Gorman 		}
49775ef7184SMel Gorman 	} while (this_cpu_cmpxchg(*p, o, n) != o);
49875ef7184SMel Gorman 
49975ef7184SMel Gorman 	if (z)
50075ef7184SMel Gorman 		node_page_state_add(z, pgdat, item);
50175ef7184SMel Gorman }
50275ef7184SMel Gorman 
50375ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
50475ef7184SMel Gorman 					long delta)
50575ef7184SMel Gorman {
50675ef7184SMel Gorman 	mod_node_state(pgdat, item, delta, 0);
50775ef7184SMel Gorman }
50875ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
50975ef7184SMel Gorman 
51075ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
51175ef7184SMel Gorman {
51275ef7184SMel Gorman 	mod_node_state(pgdat, item, 1, 1);
51375ef7184SMel Gorman }
51475ef7184SMel Gorman 
51575ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
51675ef7184SMel Gorman {
51775ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, 1, 1);
51875ef7184SMel Gorman }
51975ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
52075ef7184SMel Gorman 
52175ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
52275ef7184SMel Gorman {
52375ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, -1, -1);
52475ef7184SMel Gorman }
52575ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
5267c839120SChristoph Lameter #else
5277c839120SChristoph Lameter /*
5287c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
5297c839120SChristoph Lameter  */
5307c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
5316cdb18adSHeiko Carstens 			 long delta)
5327c839120SChristoph Lameter {
5337c839120SChristoph Lameter 	unsigned long flags;
5347c839120SChristoph Lameter 
5357c839120SChristoph Lameter 	local_irq_save(flags);
5367c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
5377c839120SChristoph Lameter 	local_irq_restore(flags);
5387c839120SChristoph Lameter }
5397c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
5407c839120SChristoph Lameter 
5412244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
5422244b95aSChristoph Lameter {
5432244b95aSChristoph Lameter 	unsigned long flags;
5442244b95aSChristoph Lameter 	struct zone *zone;
5452244b95aSChristoph Lameter 
5462244b95aSChristoph Lameter 	zone = page_zone(page);
5472244b95aSChristoph Lameter 	local_irq_save(flags);
548ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
5492244b95aSChristoph Lameter 	local_irq_restore(flags);
5502244b95aSChristoph Lameter }
5512244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
5522244b95aSChristoph Lameter 
5532244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
5542244b95aSChristoph Lameter {
5552244b95aSChristoph Lameter 	unsigned long flags;
5562244b95aSChristoph Lameter 
5572244b95aSChristoph Lameter 	local_irq_save(flags);
558a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
5592244b95aSChristoph Lameter 	local_irq_restore(flags);
5602244b95aSChristoph Lameter }
5612244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
5622244b95aSChristoph Lameter 
56375ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
56475ef7184SMel Gorman {
56575ef7184SMel Gorman 	unsigned long flags;
56675ef7184SMel Gorman 
56775ef7184SMel Gorman 	local_irq_save(flags);
56875ef7184SMel Gorman 	__inc_node_state(pgdat, item);
56975ef7184SMel Gorman 	local_irq_restore(flags);
57075ef7184SMel Gorman }
57175ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state);
57275ef7184SMel Gorman 
57375ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
57475ef7184SMel Gorman 					long delta)
57575ef7184SMel Gorman {
57675ef7184SMel Gorman 	unsigned long flags;
57775ef7184SMel Gorman 
57875ef7184SMel Gorman 	local_irq_save(flags);
57975ef7184SMel Gorman 	__mod_node_page_state(pgdat, item, delta);
58075ef7184SMel Gorman 	local_irq_restore(flags);
58175ef7184SMel Gorman }
58275ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
58375ef7184SMel Gorman 
58475ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
58575ef7184SMel Gorman {
58675ef7184SMel Gorman 	unsigned long flags;
58775ef7184SMel Gorman 	struct pglist_data *pgdat;
58875ef7184SMel Gorman 
58975ef7184SMel Gorman 	pgdat = page_pgdat(page);
59075ef7184SMel Gorman 	local_irq_save(flags);
59175ef7184SMel Gorman 	__inc_node_state(pgdat, item);
59275ef7184SMel Gorman 	local_irq_restore(flags);
59375ef7184SMel Gorman }
59475ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
59575ef7184SMel Gorman 
59675ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
59775ef7184SMel Gorman {
59875ef7184SMel Gorman 	unsigned long flags;
59975ef7184SMel Gorman 
60075ef7184SMel Gorman 	local_irq_save(flags);
60175ef7184SMel Gorman 	__dec_node_page_state(page, item);
60275ef7184SMel Gorman 	local_irq_restore(flags);
60375ef7184SMel Gorman }
60475ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
60575ef7184SMel Gorman #endif
6067cc36bbdSChristoph Lameter 
6077cc36bbdSChristoph Lameter /*
6087cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
6097cc36bbdSChristoph Lameter  * Returns the number of counters updated.
6107cc36bbdSChristoph Lameter  */
6113a321d2aSKemi Wang #ifdef CONFIG_NUMA
6123a321d2aSKemi Wang static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
6133a321d2aSKemi Wang {
6143a321d2aSKemi Wang 	int i;
6153a321d2aSKemi Wang 	int changes = 0;
6163a321d2aSKemi Wang 
6173a321d2aSKemi Wang 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
6183a321d2aSKemi Wang 		if (zone_diff[i]) {
6193a321d2aSKemi Wang 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
6203a321d2aSKemi Wang 			changes++;
6213a321d2aSKemi Wang 	}
6223a321d2aSKemi Wang 
6233a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
6243a321d2aSKemi Wang 		if (numa_diff[i]) {
6253a321d2aSKemi Wang 			atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
6263a321d2aSKemi Wang 			changes++;
6273a321d2aSKemi Wang 	}
6283a321d2aSKemi Wang 
6293a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
6303a321d2aSKemi Wang 		if (node_diff[i]) {
6313a321d2aSKemi Wang 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
6323a321d2aSKemi Wang 			changes++;
6333a321d2aSKemi Wang 	}
6343a321d2aSKemi Wang 	return changes;
6353a321d2aSKemi Wang }
6363a321d2aSKemi Wang #else
63775ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff)
6384edb0748SChristoph Lameter {
6394edb0748SChristoph Lameter 	int i;
6407cc36bbdSChristoph Lameter 	int changes = 0;
6414edb0748SChristoph Lameter 
6424edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
64375ef7184SMel Gorman 		if (zone_diff[i]) {
64475ef7184SMel Gorman 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
64575ef7184SMel Gorman 			changes++;
64675ef7184SMel Gorman 	}
64775ef7184SMel Gorman 
64875ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
64975ef7184SMel Gorman 		if (node_diff[i]) {
65075ef7184SMel Gorman 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
6517cc36bbdSChristoph Lameter 			changes++;
6527cc36bbdSChristoph Lameter 	}
6537cc36bbdSChristoph Lameter 	return changes;
6544edb0748SChristoph Lameter }
6553a321d2aSKemi Wang #endif /* CONFIG_NUMA */
6564edb0748SChristoph Lameter 
6572244b95aSChristoph Lameter /*
6582bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
659a7f75e25SChristoph Lameter  *
6604037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
6614037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
6624037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
6634037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
6644037d452SChristoph Lameter  * the processor.
6654037d452SChristoph Lameter  *
6664037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
6674037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
6684037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
6694037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
6707cc36bbdSChristoph Lameter  *
6717cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
6722244b95aSChristoph Lameter  */
6730eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
6742244b95aSChristoph Lameter {
67575ef7184SMel Gorman 	struct pglist_data *pgdat;
6762244b95aSChristoph Lameter 	struct zone *zone;
6772244b95aSChristoph Lameter 	int i;
67875ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
6793a321d2aSKemi Wang #ifdef CONFIG_NUMA
6803a321d2aSKemi Wang 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
6813a321d2aSKemi Wang #endif
68275ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
6837cc36bbdSChristoph Lameter 	int changes = 0;
6842244b95aSChristoph Lameter 
685ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
686fbc2edb0SChristoph Lameter 		struct per_cpu_pageset __percpu *p = zone->pageset;
6872244b95aSChristoph Lameter 
688fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
689a7f75e25SChristoph Lameter 			int v;
690a7f75e25SChristoph Lameter 
691fbc2edb0SChristoph Lameter 			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
692fbc2edb0SChristoph Lameter 			if (v) {
693fbc2edb0SChristoph Lameter 
694a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
69575ef7184SMel Gorman 				global_zone_diff[i] += v;
6964037d452SChristoph Lameter #ifdef CONFIG_NUMA
6974037d452SChristoph Lameter 				/* 3 seconds idle till flush */
698fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 3);
6994037d452SChristoph Lameter #endif
7002244b95aSChristoph Lameter 			}
701fbc2edb0SChristoph Lameter 		}
7024037d452SChristoph Lameter #ifdef CONFIG_NUMA
7033a321d2aSKemi Wang 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
7043a321d2aSKemi Wang 			int v;
7053a321d2aSKemi Wang 
7063a321d2aSKemi Wang 			v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
7073a321d2aSKemi Wang 			if (v) {
7083a321d2aSKemi Wang 
7093a321d2aSKemi Wang 				atomic_long_add(v, &zone->vm_numa_stat[i]);
7103a321d2aSKemi Wang 				global_numa_diff[i] += v;
7113a321d2aSKemi Wang 				__this_cpu_write(p->expire, 3);
7123a321d2aSKemi Wang 			}
7133a321d2aSKemi Wang 		}
7143a321d2aSKemi Wang 
7150eb77e98SChristoph Lameter 		if (do_pagesets) {
7160eb77e98SChristoph Lameter 			cond_resched();
7174037d452SChristoph Lameter 			/*
7184037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
7194037d452SChristoph Lameter 			 * processor
7204037d452SChristoph Lameter 			 *
7214037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
7224037d452SChristoph Lameter 			 * if not then there is nothing to expire.
7234037d452SChristoph Lameter 			 */
724fbc2edb0SChristoph Lameter 			if (!__this_cpu_read(p->expire) ||
725fbc2edb0SChristoph Lameter 			       !__this_cpu_read(p->pcp.count))
7264037d452SChristoph Lameter 				continue;
7274037d452SChristoph Lameter 
7284037d452SChristoph Lameter 			/*
7294037d452SChristoph Lameter 			 * We never drain zones local to this processor.
7304037d452SChristoph Lameter 			 */
7314037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
732fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 0);
7334037d452SChristoph Lameter 				continue;
7344037d452SChristoph Lameter 			}
7354037d452SChristoph Lameter 
736fbc2edb0SChristoph Lameter 			if (__this_cpu_dec_return(p->expire))
7374037d452SChristoph Lameter 				continue;
7384037d452SChristoph Lameter 
7397cc36bbdSChristoph Lameter 			if (__this_cpu_read(p->pcp.count)) {
7407c8e0181SChristoph Lameter 				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
7417cc36bbdSChristoph Lameter 				changes++;
7427cc36bbdSChristoph Lameter 			}
7430eb77e98SChristoph Lameter 		}
7444037d452SChristoph Lameter #endif
7452244b95aSChristoph Lameter 	}
74675ef7184SMel Gorman 
74775ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
74875ef7184SMel Gorman 		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
74975ef7184SMel Gorman 
75075ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
75175ef7184SMel Gorman 			int v;
75275ef7184SMel Gorman 
75375ef7184SMel Gorman 			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
75475ef7184SMel Gorman 			if (v) {
75575ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
75675ef7184SMel Gorman 				global_node_diff[i] += v;
75775ef7184SMel Gorman 			}
75875ef7184SMel Gorman 		}
75975ef7184SMel Gorman 	}
76075ef7184SMel Gorman 
7613a321d2aSKemi Wang #ifdef CONFIG_NUMA
7623a321d2aSKemi Wang 	changes += fold_diff(global_zone_diff, global_numa_diff,
7633a321d2aSKemi Wang 			     global_node_diff);
7643a321d2aSKemi Wang #else
76575ef7184SMel Gorman 	changes += fold_diff(global_zone_diff, global_node_diff);
7663a321d2aSKemi Wang #endif
7677cc36bbdSChristoph Lameter 	return changes;
7682244b95aSChristoph Lameter }
7692244b95aSChristoph Lameter 
77040f4b1eaSCody P Schafer /*
7712bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
7722bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
7732bb921e5SChristoph Lameter  * synchronization is simplified.
7742bb921e5SChristoph Lameter  */
7752bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
7762bb921e5SChristoph Lameter {
77775ef7184SMel Gorman 	struct pglist_data *pgdat;
7782bb921e5SChristoph Lameter 	struct zone *zone;
7792bb921e5SChristoph Lameter 	int i;
78075ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
7813a321d2aSKemi Wang #ifdef CONFIG_NUMA
7823a321d2aSKemi Wang 	int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
7833a321d2aSKemi Wang #endif
78475ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7852bb921e5SChristoph Lameter 
7862bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
7872bb921e5SChristoph Lameter 		struct per_cpu_pageset *p;
7882bb921e5SChristoph Lameter 
7892bb921e5SChristoph Lameter 		p = per_cpu_ptr(zone->pageset, cpu);
7902bb921e5SChristoph Lameter 
7912bb921e5SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
7922bb921e5SChristoph Lameter 			if (p->vm_stat_diff[i]) {
7932bb921e5SChristoph Lameter 				int v;
7942bb921e5SChristoph Lameter 
7952bb921e5SChristoph Lameter 				v = p->vm_stat_diff[i];
7962bb921e5SChristoph Lameter 				p->vm_stat_diff[i] = 0;
7972bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
79875ef7184SMel Gorman 				global_zone_diff[i] += v;
7992bb921e5SChristoph Lameter 			}
8003a321d2aSKemi Wang 
8013a321d2aSKemi Wang #ifdef CONFIG_NUMA
8023a321d2aSKemi Wang 		for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
8033a321d2aSKemi Wang 			if (p->vm_numa_stat_diff[i]) {
8043a321d2aSKemi Wang 				int v;
8053a321d2aSKemi Wang 
8063a321d2aSKemi Wang 				v = p->vm_numa_stat_diff[i];
8073a321d2aSKemi Wang 				p->vm_numa_stat_diff[i] = 0;
8083a321d2aSKemi Wang 				atomic_long_add(v, &zone->vm_numa_stat[i]);
8093a321d2aSKemi Wang 				global_numa_diff[i] += v;
8103a321d2aSKemi Wang 			}
8113a321d2aSKemi Wang #endif
8122bb921e5SChristoph Lameter 	}
8132bb921e5SChristoph Lameter 
81475ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
81575ef7184SMel Gorman 		struct per_cpu_nodestat *p;
81675ef7184SMel Gorman 
81775ef7184SMel Gorman 		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
81875ef7184SMel Gorman 
81975ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
82075ef7184SMel Gorman 			if (p->vm_node_stat_diff[i]) {
82175ef7184SMel Gorman 				int v;
82275ef7184SMel Gorman 
82375ef7184SMel Gorman 				v = p->vm_node_stat_diff[i];
82475ef7184SMel Gorman 				p->vm_node_stat_diff[i] = 0;
82575ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
82675ef7184SMel Gorman 				global_node_diff[i] += v;
82775ef7184SMel Gorman 			}
82875ef7184SMel Gorman 	}
82975ef7184SMel Gorman 
8303a321d2aSKemi Wang #ifdef CONFIG_NUMA
8313a321d2aSKemi Wang 	fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
8323a321d2aSKemi Wang #else
83375ef7184SMel Gorman 	fold_diff(global_zone_diff, global_node_diff);
8343a321d2aSKemi Wang #endif
8352bb921e5SChristoph Lameter }
8362bb921e5SChristoph Lameter 
8372bb921e5SChristoph Lameter /*
83840f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
83940f4b1eaSCody P Schafer  * pset->vm_stat_diff[] exsist.
84040f4b1eaSCody P Schafer  */
8415a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
8425a883813SMinchan Kim {
8435a883813SMinchan Kim 	int i;
8445a883813SMinchan Kim 
8455a883813SMinchan Kim 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
8465a883813SMinchan Kim 		if (pset->vm_stat_diff[i]) {
8475a883813SMinchan Kim 			int v = pset->vm_stat_diff[i];
8485a883813SMinchan Kim 			pset->vm_stat_diff[i] = 0;
8495a883813SMinchan Kim 			atomic_long_add(v, &zone->vm_stat[i]);
85075ef7184SMel Gorman 			atomic_long_add(v, &vm_zone_stat[i]);
8515a883813SMinchan Kim 		}
8523a321d2aSKemi Wang 
8533a321d2aSKemi Wang #ifdef CONFIG_NUMA
8543a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
8553a321d2aSKemi Wang 		if (pset->vm_numa_stat_diff[i]) {
8563a321d2aSKemi Wang 			int v = pset->vm_numa_stat_diff[i];
8573a321d2aSKemi Wang 
8583a321d2aSKemi Wang 			pset->vm_numa_stat_diff[i] = 0;
8593a321d2aSKemi Wang 			atomic_long_add(v, &zone->vm_numa_stat[i]);
8603a321d2aSKemi Wang 			atomic_long_add(v, &vm_numa_stat[i]);
8613a321d2aSKemi Wang 		}
8623a321d2aSKemi Wang #endif
8635a883813SMinchan Kim }
8642244b95aSChristoph Lameter #endif
8652244b95aSChristoph Lameter 
866ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
8673a321d2aSKemi Wang void __inc_numa_state(struct zone *zone,
8683a321d2aSKemi Wang 				 enum numa_stat_item item)
8693a321d2aSKemi Wang {
8703a321d2aSKemi Wang 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
871*1d90ca89SKemi Wang 	u16 __percpu *p = pcp->vm_numa_stat_diff + item;
872*1d90ca89SKemi Wang 	u16 v;
8733a321d2aSKemi Wang 
8743a321d2aSKemi Wang 	v = __this_cpu_inc_return(*p);
8753a321d2aSKemi Wang 
876*1d90ca89SKemi Wang 	if (unlikely(v > NUMA_STATS_THRESHOLD)) {
877*1d90ca89SKemi Wang 		zone_numa_state_add(v, zone, item);
878*1d90ca89SKemi Wang 		__this_cpu_write(*p, 0);
8793a321d2aSKemi Wang 	}
8803a321d2aSKemi Wang }
8813a321d2aSKemi Wang 
882ca889e6cSChristoph Lameter /*
88375ef7184SMel Gorman  * Determine the per node value of a stat item. This function
88475ef7184SMel Gorman  * is called frequently in a NUMA machine, so try to be as
88575ef7184SMel Gorman  * frugal as possible.
886c2d42c16SAndrew Morton  */
88775ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node,
88875ef7184SMel Gorman 				 enum zone_stat_item item)
889c2d42c16SAndrew Morton {
890c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
891e87d59f7SJoonsoo Kim 	int i;
892e87d59f7SJoonsoo Kim 	unsigned long count = 0;
893c2d42c16SAndrew Morton 
894e87d59f7SJoonsoo Kim 	for (i = 0; i < MAX_NR_ZONES; i++)
895e87d59f7SJoonsoo Kim 		count += zone_page_state(zones + i, item);
896e87d59f7SJoonsoo Kim 
897e87d59f7SJoonsoo Kim 	return count;
898c2d42c16SAndrew Morton }
899c2d42c16SAndrew Morton 
9003a321d2aSKemi Wang unsigned long sum_zone_numa_state(int node,
9013a321d2aSKemi Wang 				 enum numa_stat_item item)
9023a321d2aSKemi Wang {
9033a321d2aSKemi Wang 	struct zone *zones = NODE_DATA(node)->node_zones;
9043a321d2aSKemi Wang 	int i;
9053a321d2aSKemi Wang 	unsigned long count = 0;
9063a321d2aSKemi Wang 
9073a321d2aSKemi Wang 	for (i = 0; i < MAX_NR_ZONES; i++)
9083a321d2aSKemi Wang 		count += zone_numa_state(zones + i, item);
9093a321d2aSKemi Wang 
9103a321d2aSKemi Wang 	return count;
9113a321d2aSKemi Wang }
9123a321d2aSKemi Wang 
91375ef7184SMel Gorman /*
91475ef7184SMel Gorman  * Determine the per node value of a stat item.
91575ef7184SMel Gorman  */
91675ef7184SMel Gorman unsigned long node_page_state(struct pglist_data *pgdat,
91775ef7184SMel Gorman 				enum node_stat_item item)
91875ef7184SMel Gorman {
91975ef7184SMel Gorman 	long x = atomic_long_read(&pgdat->vm_stat[item]);
92075ef7184SMel Gorman #ifdef CONFIG_SMP
92175ef7184SMel Gorman 	if (x < 0)
92275ef7184SMel Gorman 		x = 0;
92375ef7184SMel Gorman #endif
92475ef7184SMel Gorman 	return x;
92575ef7184SMel Gorman }
926ca889e6cSChristoph Lameter #endif
927ca889e6cSChristoph Lameter 
928d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
92936deb0beSNamhyung Kim 
930d7a5752cSMel Gorman struct contig_page_info {
931d7a5752cSMel Gorman 	unsigned long free_pages;
932d7a5752cSMel Gorman 	unsigned long free_blocks_total;
933d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
934d7a5752cSMel Gorman };
935d7a5752cSMel Gorman 
936d7a5752cSMel Gorman /*
937d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
938d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
939d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
940d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
941d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
942d7a5752cSMel Gorman  * figured out from userspace
943d7a5752cSMel Gorman  */
944d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
945d7a5752cSMel Gorman 				unsigned int suitable_order,
946d7a5752cSMel Gorman 				struct contig_page_info *info)
947d7a5752cSMel Gorman {
948d7a5752cSMel Gorman 	unsigned int order;
949d7a5752cSMel Gorman 
950d7a5752cSMel Gorman 	info->free_pages = 0;
951d7a5752cSMel Gorman 	info->free_blocks_total = 0;
952d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
953d7a5752cSMel Gorman 
954d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
955d7a5752cSMel Gorman 		unsigned long blocks;
956d7a5752cSMel Gorman 
957d7a5752cSMel Gorman 		/* Count number of free blocks */
958d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
959d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
960d7a5752cSMel Gorman 
961d7a5752cSMel Gorman 		/* Count free base pages */
962d7a5752cSMel Gorman 		info->free_pages += blocks << order;
963d7a5752cSMel Gorman 
964d7a5752cSMel Gorman 		/* Count the suitable free blocks */
965d7a5752cSMel Gorman 		if (order >= suitable_order)
966d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
967d7a5752cSMel Gorman 						(order - suitable_order);
968d7a5752cSMel Gorman 	}
969d7a5752cSMel Gorman }
970f1a5ab12SMel Gorman 
971f1a5ab12SMel Gorman /*
972f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
973f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
974f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
975f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
976f1a5ab12SMel Gorman  * should be used
977f1a5ab12SMel Gorman  */
97856de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
979f1a5ab12SMel Gorman {
980f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
981f1a5ab12SMel Gorman 
98288d6ac40SWen Yang 	if (WARN_ON_ONCE(order >= MAX_ORDER))
98388d6ac40SWen Yang 		return 0;
98488d6ac40SWen Yang 
985f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
986f1a5ab12SMel Gorman 		return 0;
987f1a5ab12SMel Gorman 
988f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
989f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
990f1a5ab12SMel Gorman 		return -1000;
991f1a5ab12SMel Gorman 
992f1a5ab12SMel Gorman 	/*
993f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
994f1a5ab12SMel Gorman 	 *
995f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
996f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
997f1a5ab12SMel Gorman 	 */
998f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
999f1a5ab12SMel Gorman }
100056de7263SMel Gorman 
100156de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
100256de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
100356de7263SMel Gorman {
100456de7263SMel Gorman 	struct contig_page_info info;
100556de7263SMel Gorman 
100656de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
100756de7263SMel Gorman 	return __fragmentation_index(order, &info);
100856de7263SMel Gorman }
1009d7a5752cSMel Gorman #endif
1010d7a5752cSMel Gorman 
10110d6617c7SDavid Rientjes #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
1012fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
1013fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
1014fa25c503SKOSAKI Motohiro #else
1015fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
1016fa25c503SKOSAKI Motohiro #endif
1017fa25c503SKOSAKI Motohiro 
1018fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
1019fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
1020fa25c503SKOSAKI Motohiro #else
1021fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
1022fa25c503SKOSAKI Motohiro #endif
1023fa25c503SKOSAKI Motohiro 
1024fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
1025fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1026fa25c503SKOSAKI Motohiro #else
1027fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
1028fa25c503SKOSAKI Motohiro #endif
1029fa25c503SKOSAKI Motohiro 
1030fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1031fa25c503SKOSAKI Motohiro 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
1032fa25c503SKOSAKI Motohiro 
1033fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
103409316c09SKonstantin Khlebnikov 	/* enum zone_stat_item countes */
1035fa25c503SKOSAKI Motohiro 	"nr_free_pages",
103671c799f4SMinchan Kim 	"nr_zone_inactive_anon",
103771c799f4SMinchan Kim 	"nr_zone_active_anon",
103871c799f4SMinchan Kim 	"nr_zone_inactive_file",
103971c799f4SMinchan Kim 	"nr_zone_active_file",
104071c799f4SMinchan Kim 	"nr_zone_unevictable",
10415a1c84b4SMel Gorman 	"nr_zone_write_pending",
1042fa25c503SKOSAKI Motohiro 	"nr_mlock",
1043fa25c503SKOSAKI Motohiro 	"nr_page_table_pages",
1044fa25c503SKOSAKI Motohiro 	"nr_kernel_stack",
1045fa25c503SKOSAKI Motohiro 	"nr_bounce",
104691537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC)
104791537feeSMinchan Kim 	"nr_zspages",
104891537feeSMinchan Kim #endif
10493a321d2aSKemi Wang 	"nr_free_cma",
10503a321d2aSKemi Wang 
10513a321d2aSKemi Wang 	/* enum numa_stat_item counters */
1052fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1053fa25c503SKOSAKI Motohiro 	"numa_hit",
1054fa25c503SKOSAKI Motohiro 	"numa_miss",
1055fa25c503SKOSAKI Motohiro 	"numa_foreign",
1056fa25c503SKOSAKI Motohiro 	"numa_interleave",
1057fa25c503SKOSAKI Motohiro 	"numa_local",
1058fa25c503SKOSAKI Motohiro 	"numa_other",
1059fa25c503SKOSAKI Motohiro #endif
106009316c09SKonstantin Khlebnikov 
1061599d0c95SMel Gorman 	/* Node-based counters */
1062599d0c95SMel Gorman 	"nr_inactive_anon",
1063599d0c95SMel Gorman 	"nr_active_anon",
1064599d0c95SMel Gorman 	"nr_inactive_file",
1065599d0c95SMel Gorman 	"nr_active_file",
1066599d0c95SMel Gorman 	"nr_unevictable",
1067385386cfSJohannes Weiner 	"nr_slab_reclaimable",
1068385386cfSJohannes Weiner 	"nr_slab_unreclaimable",
1069599d0c95SMel Gorman 	"nr_isolated_anon",
1070599d0c95SMel Gorman 	"nr_isolated_file",
10711e6b1085SMel Gorman 	"workingset_refault",
10721e6b1085SMel Gorman 	"workingset_activate",
10731e6b1085SMel Gorman 	"workingset_nodereclaim",
107450658e2eSMel Gorman 	"nr_anon_pages",
107550658e2eSMel Gorman 	"nr_mapped",
107611fb9989SMel Gorman 	"nr_file_pages",
107711fb9989SMel Gorman 	"nr_dirty",
107811fb9989SMel Gorman 	"nr_writeback",
107911fb9989SMel Gorman 	"nr_writeback_temp",
108011fb9989SMel Gorman 	"nr_shmem",
108111fb9989SMel Gorman 	"nr_shmem_hugepages",
108211fb9989SMel Gorman 	"nr_shmem_pmdmapped",
108311fb9989SMel Gorman 	"nr_anon_transparent_hugepages",
108411fb9989SMel Gorman 	"nr_unstable",
1085c4a25635SMel Gorman 	"nr_vmscan_write",
1086c4a25635SMel Gorman 	"nr_vmscan_immediate_reclaim",
1087c4a25635SMel Gorman 	"nr_dirtied",
1088c4a25635SMel Gorman 	"nr_written",
1089599d0c95SMel Gorman 
109009316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
1091fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
1092fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
1093fa25c503SKOSAKI Motohiro 
1094fa25c503SKOSAKI Motohiro #ifdef CONFIG_VM_EVENT_COUNTERS
109509316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
1096fa25c503SKOSAKI Motohiro 	"pgpgin",
1097fa25c503SKOSAKI Motohiro 	"pgpgout",
1098fa25c503SKOSAKI Motohiro 	"pswpin",
1099fa25c503SKOSAKI Motohiro 	"pswpout",
1100fa25c503SKOSAKI Motohiro 
1101fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
11027cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("allocstall")
11037cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("pgskip")
1104fa25c503SKOSAKI Motohiro 
1105fa25c503SKOSAKI Motohiro 	"pgfree",
1106fa25c503SKOSAKI Motohiro 	"pgactivate",
1107fa25c503SKOSAKI Motohiro 	"pgdeactivate",
1108f7ad2a6cSShaohua Li 	"pglazyfree",
1109fa25c503SKOSAKI Motohiro 
1110fa25c503SKOSAKI Motohiro 	"pgfault",
1111fa25c503SKOSAKI Motohiro 	"pgmajfault",
1112854e9ed0SMinchan Kim 	"pglazyfreed",
1113fa25c503SKOSAKI Motohiro 
1114599d0c95SMel Gorman 	"pgrefill",
1115599d0c95SMel Gorman 	"pgsteal_kswapd",
1116599d0c95SMel Gorman 	"pgsteal_direct",
1117599d0c95SMel Gorman 	"pgscan_kswapd",
1118599d0c95SMel Gorman 	"pgscan_direct",
111968243e76SMel Gorman 	"pgscan_direct_throttle",
1120fa25c503SKOSAKI Motohiro 
1121fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1122fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
1123fa25c503SKOSAKI Motohiro #endif
1124fa25c503SKOSAKI Motohiro 	"pginodesteal",
1125fa25c503SKOSAKI Motohiro 	"slabs_scanned",
1126fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
1127fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
1128fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
1129fa25c503SKOSAKI Motohiro 	"pageoutrun",
1130fa25c503SKOSAKI Motohiro 
1131fa25c503SKOSAKI Motohiro 	"pgrotated",
1132fa25c503SKOSAKI Motohiro 
11335509a5d2SDave Hansen 	"drop_pagecache",
11345509a5d2SDave Hansen 	"drop_slab",
11358e675f7aSKonstantin Khlebnikov 	"oom_kill",
11365509a5d2SDave Hansen 
113703c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
113803c5a6e1SMel Gorman 	"numa_pte_updates",
113972403b4aSMel Gorman 	"numa_huge_pte_updates",
114003c5a6e1SMel Gorman 	"numa_hint_faults",
114103c5a6e1SMel Gorman 	"numa_hint_faults_local",
114203c5a6e1SMel Gorman 	"numa_pages_migrated",
114303c5a6e1SMel Gorman #endif
11445647bc29SMel Gorman #ifdef CONFIG_MIGRATION
11455647bc29SMel Gorman 	"pgmigrate_success",
11465647bc29SMel Gorman 	"pgmigrate_fail",
11475647bc29SMel Gorman #endif
1148fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
1149397487dbSMel Gorman 	"compact_migrate_scanned",
1150397487dbSMel Gorman 	"compact_free_scanned",
1151397487dbSMel Gorman 	"compact_isolated",
1152fa25c503SKOSAKI Motohiro 	"compact_stall",
1153fa25c503SKOSAKI Motohiro 	"compact_fail",
1154fa25c503SKOSAKI Motohiro 	"compact_success",
1155698b1b30SVlastimil Babka 	"compact_daemon_wake",
11567f354a54SDavid Rientjes 	"compact_daemon_migrate_scanned",
11577f354a54SDavid Rientjes 	"compact_daemon_free_scanned",
1158fa25c503SKOSAKI Motohiro #endif
1159fa25c503SKOSAKI Motohiro 
1160fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
1161fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
1162fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
1163fa25c503SKOSAKI Motohiro #endif
1164fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
1165fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
1166fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
1167fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
1168fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
1169fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
1170fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
1171fa25c503SKOSAKI Motohiro 
1172fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1173fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
1174fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
1175fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
1176fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
117795ecedcdSKirill A. Shutemov 	"thp_file_alloc",
117895ecedcdSKirill A. Shutemov 	"thp_file_mapped",
1179122afea9SKirill A. Shutemov 	"thp_split_page",
1180122afea9SKirill A. Shutemov 	"thp_split_page_failed",
1181f9719a03SKirill A. Shutemov 	"thp_deferred_split_page",
1182122afea9SKirill A. Shutemov 	"thp_split_pmd",
1183ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1184ce9311cfSYisheng Xie 	"thp_split_pud",
1185ce9311cfSYisheng Xie #endif
1186d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
1187d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
1188225311a4SHuang Ying 	"thp_swpout",
1189fe490cc0SHuang Ying 	"thp_swpout_fallback",
1190fa25c503SKOSAKI Motohiro #endif
119109316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
119209316c09SKonstantin Khlebnikov 	"balloon_inflate",
119309316c09SKonstantin Khlebnikov 	"balloon_deflate",
119409316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
119509316c09SKonstantin Khlebnikov 	"balloon_migrate",
119609316c09SKonstantin Khlebnikov #endif
119709316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
1198ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
11996df46865SDave Hansen #ifdef CONFIG_SMP
12009824cf97SDave Hansen 	"nr_tlb_remote_flush",
12019824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
1202ec659934SMel Gorman #endif /* CONFIG_SMP */
12039824cf97SDave Hansen 	"nr_tlb_local_flush_all",
12049824cf97SDave Hansen 	"nr_tlb_local_flush_one",
1205ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
1206fa25c503SKOSAKI Motohiro 
12074f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE
12084f115147SDavidlohr Bueso 	"vmacache_find_calls",
12094f115147SDavidlohr Bueso 	"vmacache_find_hits",
1210f5f302e2SDavidlohr Bueso 	"vmacache_full_flushes",
12114f115147SDavidlohr Bueso #endif
1212cbc65df2SHuang Ying #ifdef CONFIG_SWAP
1213cbc65df2SHuang Ying 	"swap_ra",
1214cbc65df2SHuang Ying 	"swap_ra_hit",
1215cbc65df2SHuang Ying #endif
1216fa25c503SKOSAKI Motohiro #endif /* CONFIG_VM_EVENTS_COUNTERS */
1217fa25c503SKOSAKI Motohiro };
12180d6617c7SDavid Rientjes #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
1219fa25c503SKOSAKI Motohiro 
12203c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
12213c486871SAndrew Morton      defined(CONFIG_PROC_FS)
12223c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
12233c486871SAndrew Morton {
12243c486871SAndrew Morton 	pg_data_t *pgdat;
12253c486871SAndrew Morton 	loff_t node = *pos;
12263c486871SAndrew Morton 
12273c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
12283c486871SAndrew Morton 	     pgdat && node;
12293c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
12303c486871SAndrew Morton 		--node;
12313c486871SAndrew Morton 
12323c486871SAndrew Morton 	return pgdat;
12333c486871SAndrew Morton }
12343c486871SAndrew Morton 
12353c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
12363c486871SAndrew Morton {
12373c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
12383c486871SAndrew Morton 
12393c486871SAndrew Morton 	(*pos)++;
12403c486871SAndrew Morton 	return next_online_pgdat(pgdat);
12413c486871SAndrew Morton }
12423c486871SAndrew Morton 
12433c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
12443c486871SAndrew Morton {
12453c486871SAndrew Morton }
12463c486871SAndrew Morton 
1247b2bd8598SDavid Rientjes /*
1248b2bd8598SDavid Rientjes  * Walk zones in a node and print using a callback.
1249b2bd8598SDavid Rientjes  * If @assert_populated is true, only use callback for zones that are populated.
1250b2bd8598SDavid Rientjes  */
12513c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1252727c080fSVinayak Menon 		bool assert_populated, bool nolock,
12533c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
12543c486871SAndrew Morton {
12553c486871SAndrew Morton 	struct zone *zone;
12563c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
12573c486871SAndrew Morton 	unsigned long flags;
12583c486871SAndrew Morton 
12593c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1260b2bd8598SDavid Rientjes 		if (assert_populated && !populated_zone(zone))
12613c486871SAndrew Morton 			continue;
12623c486871SAndrew Morton 
1263727c080fSVinayak Menon 		if (!nolock)
12643c486871SAndrew Morton 			spin_lock_irqsave(&zone->lock, flags);
12653c486871SAndrew Morton 		print(m, pgdat, zone);
1266727c080fSVinayak Menon 		if (!nolock)
12673c486871SAndrew Morton 			spin_unlock_irqrestore(&zone->lock, flags);
12683c486871SAndrew Morton 	}
12693c486871SAndrew Morton }
12703c486871SAndrew Morton #endif
12713c486871SAndrew Morton 
1272d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
1273467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1274467c996cSMel Gorman 						struct zone *zone)
1275467c996cSMel Gorman {
1276467c996cSMel Gorman 	int order;
1277467c996cSMel Gorman 
1278f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1279f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
1280f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1281f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1282f6ac2354SChristoph Lameter }
1283467c996cSMel Gorman 
1284467c996cSMel Gorman /*
1285467c996cSMel Gorman  * This walks the free areas for each zone.
1286467c996cSMel Gorman  */
1287467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
1288467c996cSMel Gorman {
1289467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1290727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1291467c996cSMel Gorman 	return 0;
1292467c996cSMel Gorman }
1293467c996cSMel Gorman 
1294467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
1295467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1296467c996cSMel Gorman {
1297467c996cSMel Gorman 	int order, mtype;
1298467c996cSMel Gorman 
1299467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1300467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
1301467c996cSMel Gorman 					pgdat->node_id,
1302467c996cSMel Gorman 					zone->name,
1303467c996cSMel Gorman 					migratetype_names[mtype]);
1304467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
1305467c996cSMel Gorman 			unsigned long freecount = 0;
1306467c996cSMel Gorman 			struct free_area *area;
1307467c996cSMel Gorman 			struct list_head *curr;
1308467c996cSMel Gorman 
1309467c996cSMel Gorman 			area = &(zone->free_area[order]);
1310467c996cSMel Gorman 
1311467c996cSMel Gorman 			list_for_each(curr, &area->free_list[mtype])
1312467c996cSMel Gorman 				freecount++;
1313467c996cSMel Gorman 			seq_printf(m, "%6lu ", freecount);
1314467c996cSMel Gorman 		}
1315467c996cSMel Gorman 		seq_putc(m, '\n');
1316467c996cSMel Gorman 	}
1317467c996cSMel Gorman }
1318467c996cSMel Gorman 
1319467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
1320467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1321467c996cSMel Gorman {
1322467c996cSMel Gorman 	int order;
1323467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1324467c996cSMel Gorman 
1325467c996cSMel Gorman 	/* Print header */
1326467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1327467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
1328467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
1329467c996cSMel Gorman 	seq_putc(m, '\n');
1330467c996cSMel Gorman 
1331727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1332467c996cSMel Gorman 
1333467c996cSMel Gorman 	return 0;
1334467c996cSMel Gorman }
1335467c996cSMel Gorman 
1336467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1337467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1338467c996cSMel Gorman {
1339467c996cSMel Gorman 	int mtype;
1340467c996cSMel Gorman 	unsigned long pfn;
1341467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1342108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1343467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1344467c996cSMel Gorman 
1345467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1346467c996cSMel Gorman 		struct page *page;
1347467c996cSMel Gorman 
1348d336e94eSMichal Hocko 		page = pfn_to_online_page(pfn);
1349d336e94eSMichal Hocko 		if (!page)
1350467c996cSMel Gorman 			continue;
1351467c996cSMel Gorman 
1352eb33575cSMel Gorman 		/* Watch for unexpected holes punched in the memmap */
1353eb33575cSMel Gorman 		if (!memmap_valid_within(pfn, page, zone))
1354e80d6a24SMel Gorman 			continue;
1355eb33575cSMel Gorman 
1356a91c43c7SJoonsoo Kim 		if (page_zone(page) != zone)
1357a91c43c7SJoonsoo Kim 			continue;
1358a91c43c7SJoonsoo Kim 
1359467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1360467c996cSMel Gorman 
1361e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1362467c996cSMel Gorman 			count[mtype]++;
1363467c996cSMel Gorman 	}
1364467c996cSMel Gorman 
1365467c996cSMel Gorman 	/* Print counts */
1366467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1367467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1368467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1369467c996cSMel Gorman 	seq_putc(m, '\n');
1370467c996cSMel Gorman }
1371467c996cSMel Gorman 
1372f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */
1373467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1374467c996cSMel Gorman {
1375467c996cSMel Gorman 	int mtype;
1376467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1377467c996cSMel Gorman 
1378467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1379467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1380467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1381467c996cSMel Gorman 	seq_putc(m, '\n');
1382727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false,
1383727c080fSVinayak Menon 		pagetypeinfo_showblockcount_print);
1384467c996cSMel Gorman 
1385467c996cSMel Gorman 	return 0;
1386467c996cSMel Gorman }
1387467c996cSMel Gorman 
138848c96a36SJoonsoo Kim /*
138948c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
139048c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
139148c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
139248c96a36SJoonsoo Kim  * to determine what is going on
139348c96a36SJoonsoo Kim  */
139448c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
139548c96a36SJoonsoo Kim {
139648c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
139748c96a36SJoonsoo Kim 	int mtype;
139848c96a36SJoonsoo Kim 
13997dd80b8aSVlastimil Babka 	if (!static_branch_unlikely(&page_owner_inited))
140048c96a36SJoonsoo Kim 		return;
140148c96a36SJoonsoo Kim 
140248c96a36SJoonsoo Kim 	drain_all_pages(NULL);
140348c96a36SJoonsoo Kim 
140448c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
140548c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
140648c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
140748c96a36SJoonsoo Kim 	seq_putc(m, '\n');
140848c96a36SJoonsoo Kim 
1409727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, true,
1410727c080fSVinayak Menon 		pagetypeinfo_showmixedcount_print);
141148c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
141248c96a36SJoonsoo Kim }
141348c96a36SJoonsoo Kim 
1414467c996cSMel Gorman /*
1415467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1416467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1417467c996cSMel Gorman  */
1418467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1419467c996cSMel Gorman {
1420467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1421467c996cSMel Gorman 
142241b25a37SKOSAKI Motohiro 	/* check memoryless node */
1423a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
142441b25a37SKOSAKI Motohiro 		return 0;
142541b25a37SKOSAKI Motohiro 
1426467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1427467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1428467c996cSMel Gorman 	seq_putc(m, '\n');
1429467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1430467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
143148c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1432467c996cSMel Gorman 
1433f6ac2354SChristoph Lameter 	return 0;
1434f6ac2354SChristoph Lameter }
1435f6ac2354SChristoph Lameter 
14368f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1437f6ac2354SChristoph Lameter 	.start	= frag_start,
1438f6ac2354SChristoph Lameter 	.next	= frag_next,
1439f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1440f6ac2354SChristoph Lameter 	.show	= frag_show,
1441f6ac2354SChristoph Lameter };
1442f6ac2354SChristoph Lameter 
14438f32f7e5SAlexey Dobriyan static int fragmentation_open(struct inode *inode, struct file *file)
14448f32f7e5SAlexey Dobriyan {
14458f32f7e5SAlexey Dobriyan 	return seq_open(file, &fragmentation_op);
14468f32f7e5SAlexey Dobriyan }
14478f32f7e5SAlexey Dobriyan 
14489d85e15fSAnshuman Khandual static const struct file_operations buddyinfo_file_operations = {
14498f32f7e5SAlexey Dobriyan 	.open		= fragmentation_open,
14508f32f7e5SAlexey Dobriyan 	.read		= seq_read,
14518f32f7e5SAlexey Dobriyan 	.llseek		= seq_lseek,
14528f32f7e5SAlexey Dobriyan 	.release	= seq_release,
14538f32f7e5SAlexey Dobriyan };
14548f32f7e5SAlexey Dobriyan 
145574e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1456467c996cSMel Gorman 	.start	= frag_start,
1457467c996cSMel Gorman 	.next	= frag_next,
1458467c996cSMel Gorman 	.stop	= frag_stop,
1459467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1460467c996cSMel Gorman };
1461467c996cSMel Gorman 
146274e2e8e8SAlexey Dobriyan static int pagetypeinfo_open(struct inode *inode, struct file *file)
146374e2e8e8SAlexey Dobriyan {
146474e2e8e8SAlexey Dobriyan 	return seq_open(file, &pagetypeinfo_op);
146574e2e8e8SAlexey Dobriyan }
146674e2e8e8SAlexey Dobriyan 
14679d85e15fSAnshuman Khandual static const struct file_operations pagetypeinfo_file_operations = {
146874e2e8e8SAlexey Dobriyan 	.open		= pagetypeinfo_open,
146974e2e8e8SAlexey Dobriyan 	.read		= seq_read,
147074e2e8e8SAlexey Dobriyan 	.llseek		= seq_lseek,
147174e2e8e8SAlexey Dobriyan 	.release	= seq_release,
147274e2e8e8SAlexey Dobriyan };
147374e2e8e8SAlexey Dobriyan 
1474e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1475e2ecc8a7SMel Gorman {
1476e2ecc8a7SMel Gorman 	int zid;
1477e2ecc8a7SMel Gorman 
1478e2ecc8a7SMel Gorman 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1479e2ecc8a7SMel Gorman 		struct zone *compare = &pgdat->node_zones[zid];
1480e2ecc8a7SMel Gorman 
1481e2ecc8a7SMel Gorman 		if (populated_zone(compare))
1482e2ecc8a7SMel Gorman 			return zone == compare;
1483e2ecc8a7SMel Gorman 	}
1484e2ecc8a7SMel Gorman 
1485e2ecc8a7SMel Gorman 	return false;
1486e2ecc8a7SMel Gorman }
1487e2ecc8a7SMel Gorman 
1488467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1489467c996cSMel Gorman 							struct zone *zone)
1490f6ac2354SChristoph Lameter {
1491f6ac2354SChristoph Lameter 	int i;
1492f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1493e2ecc8a7SMel Gorman 	if (is_zone_first_populated(pgdat, zone)) {
1494e2ecc8a7SMel Gorman 		seq_printf(m, "\n  per-node stats");
1495e2ecc8a7SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1496e2ecc8a7SMel Gorman 			seq_printf(m, "\n      %-12s %lu",
14973a321d2aSKemi Wang 				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
14983a321d2aSKemi Wang 				NR_VM_NUMA_STAT_ITEMS],
1499e2ecc8a7SMel Gorman 				node_page_state(pgdat, i));
1500e2ecc8a7SMel Gorman 		}
1501e2ecc8a7SMel Gorman 	}
1502f6ac2354SChristoph Lameter 	seq_printf(m,
1503f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1504f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1505f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1506f6ac2354SChristoph Lameter 		   "\n        high     %lu"
1507f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
15089feedc9dSJiang Liu 		   "\n        present  %lu"
15099feedc9dSJiang Liu 		   "\n        managed  %lu",
151088f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
151141858966SMel Gorman 		   min_wmark_pages(zone),
151241858966SMel Gorman 		   low_wmark_pages(zone),
151341858966SMel Gorman 		   high_wmark_pages(zone),
1514f6ac2354SChristoph Lameter 		   zone->spanned_pages,
15159feedc9dSJiang Liu 		   zone->present_pages,
15169feedc9dSJiang Liu 		   zone->managed_pages);
15172244b95aSChristoph Lameter 
1518f6ac2354SChristoph Lameter 	seq_printf(m,
15193484b2deSMel Gorman 		   "\n        protection: (%ld",
1520f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1521f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
15223484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
15237dfb8bf3SDavid Rientjes 	seq_putc(m, ')');
15247dfb8bf3SDavid Rientjes 
15257dfb8bf3SDavid Rientjes 	/* If unpopulated, no other information is useful */
15267dfb8bf3SDavid Rientjes 	if (!populated_zone(zone)) {
15277dfb8bf3SDavid Rientjes 		seq_putc(m, '\n');
15287dfb8bf3SDavid Rientjes 		return;
15297dfb8bf3SDavid Rientjes 	}
15307dfb8bf3SDavid Rientjes 
15317dfb8bf3SDavid Rientjes 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
15327dfb8bf3SDavid Rientjes 		seq_printf(m, "\n      %-12s %lu", vmstat_text[i],
15337dfb8bf3SDavid Rientjes 				zone_page_state(zone, i));
15347dfb8bf3SDavid Rientjes 
15353a321d2aSKemi Wang #ifdef CONFIG_NUMA
15363a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
15373a321d2aSKemi Wang 		seq_printf(m, "\n      %-12s %lu",
15383a321d2aSKemi Wang 				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
15393a321d2aSKemi Wang 				zone_numa_state(zone, i));
15403a321d2aSKemi Wang #endif
15413a321d2aSKemi Wang 
15427dfb8bf3SDavid Rientjes 	seq_printf(m, "\n  pagesets");
1543f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
1544f6ac2354SChristoph Lameter 		struct per_cpu_pageset *pageset;
1545f6ac2354SChristoph Lameter 
154699dcc3e5SChristoph Lameter 		pageset = per_cpu_ptr(zone->pageset, i);
1547f6ac2354SChristoph Lameter 		seq_printf(m,
15483dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1549f6ac2354SChristoph Lameter 			   "\n              count: %i"
1550f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1551f6ac2354SChristoph Lameter 			   "\n              batch: %i",
15523dfa5721SChristoph Lameter 			   i,
15533dfa5721SChristoph Lameter 			   pageset->pcp.count,
15543dfa5721SChristoph Lameter 			   pageset->pcp.high,
15553dfa5721SChristoph Lameter 			   pageset->pcp.batch);
1556df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1557df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
1558df9ecabaSChristoph Lameter 				pageset->stat_threshold);
1559df9ecabaSChristoph Lameter #endif
1560f6ac2354SChristoph Lameter 	}
1561f6ac2354SChristoph Lameter 	seq_printf(m,
1562599d0c95SMel Gorman 		   "\n  node_unreclaimable:  %u"
1563556adecbSRik van Riel 		   "\n  start_pfn:           %lu"
1564599d0c95SMel Gorman 		   "\n  node_inactive_ratio: %u",
1565c73322d0SJohannes Weiner 		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1566556adecbSRik van Riel 		   zone->zone_start_pfn,
1567599d0c95SMel Gorman 		   zone->zone_pgdat->inactive_ratio);
1568f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1569f6ac2354SChristoph Lameter }
1570467c996cSMel Gorman 
1571467c996cSMel Gorman /*
1572b2bd8598SDavid Rientjes  * Output information about zones in @pgdat.  All zones are printed regardless
1573b2bd8598SDavid Rientjes  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1574b2bd8598SDavid Rientjes  * set of all zones and userspace would not be aware of such zones if they are
1575b2bd8598SDavid Rientjes  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1576467c996cSMel Gorman  */
1577467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1578467c996cSMel Gorman {
1579467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1580727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1581f6ac2354SChristoph Lameter 	return 0;
1582f6ac2354SChristoph Lameter }
1583f6ac2354SChristoph Lameter 
15845c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1585f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1586f6ac2354SChristoph Lameter 			       * fragmentation. */
1587f6ac2354SChristoph Lameter 	.next	= frag_next,
1588f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1589f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1590f6ac2354SChristoph Lameter };
1591f6ac2354SChristoph Lameter 
15925c9fe628SAlexey Dobriyan static int zoneinfo_open(struct inode *inode, struct file *file)
15935c9fe628SAlexey Dobriyan {
15945c9fe628SAlexey Dobriyan 	return seq_open(file, &zoneinfo_op);
15955c9fe628SAlexey Dobriyan }
15965c9fe628SAlexey Dobriyan 
15979d85e15fSAnshuman Khandual static const struct file_operations zoneinfo_file_operations = {
15985c9fe628SAlexey Dobriyan 	.open		= zoneinfo_open,
15995c9fe628SAlexey Dobriyan 	.read		= seq_read,
16005c9fe628SAlexey Dobriyan 	.llseek		= seq_lseek,
16015c9fe628SAlexey Dobriyan 	.release	= seq_release,
16025c9fe628SAlexey Dobriyan };
16035c9fe628SAlexey Dobriyan 
160479da826aSMichael Rubin enum writeback_stat_item {
160579da826aSMichael Rubin 	NR_DIRTY_THRESHOLD,
160679da826aSMichael Rubin 	NR_DIRTY_BG_THRESHOLD,
160779da826aSMichael Rubin 	NR_VM_WRITEBACK_STAT_ITEMS,
160879da826aSMichael Rubin };
160979da826aSMichael Rubin 
1610f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1611f6ac2354SChristoph Lameter {
16122244b95aSChristoph Lameter 	unsigned long *v;
161379da826aSMichael Rubin 	int i, stat_items_size;
1614f6ac2354SChristoph Lameter 
1615f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1616f6ac2354SChristoph Lameter 		return NULL;
161779da826aSMichael Rubin 	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
16183a321d2aSKemi Wang 			  NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
161975ef7184SMel Gorman 			  NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
162079da826aSMichael Rubin 			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1621f6ac2354SChristoph Lameter 
1622f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
162379da826aSMichael Rubin 	stat_items_size += sizeof(struct vm_event_state);
1624f8891e5eSChristoph Lameter #endif
162579da826aSMichael Rubin 
162679da826aSMichael Rubin 	v = kmalloc(stat_items_size, GFP_KERNEL);
16272244b95aSChristoph Lameter 	m->private = v;
16282244b95aSChristoph Lameter 	if (!v)
1629f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
16302244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1631c41f012aSMichal Hocko 		v[i] = global_zone_page_state(i);
163279da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
163379da826aSMichael Rubin 
16343a321d2aSKemi Wang #ifdef CONFIG_NUMA
16353a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
16363a321d2aSKemi Wang 		v[i] = global_numa_state(i);
16373a321d2aSKemi Wang 	v += NR_VM_NUMA_STAT_ITEMS;
16383a321d2aSKemi Wang #endif
16393a321d2aSKemi Wang 
164075ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
164175ef7184SMel Gorman 		v[i] = global_node_page_state(i);
164275ef7184SMel Gorman 	v += NR_VM_NODE_STAT_ITEMS;
164375ef7184SMel Gorman 
164479da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
164579da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
164679da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
164779da826aSMichael Rubin 
1648f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
164979da826aSMichael Rubin 	all_vm_events(v);
165079da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
165179da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1652f8891e5eSChristoph Lameter #endif
1653ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1654f6ac2354SChristoph Lameter }
1655f6ac2354SChristoph Lameter 
1656f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1657f6ac2354SChristoph Lameter {
1658f6ac2354SChristoph Lameter 	(*pos)++;
1659f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1660f6ac2354SChristoph Lameter 		return NULL;
1661f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1662f6ac2354SChristoph Lameter }
1663f6ac2354SChristoph Lameter 
1664f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1665f6ac2354SChristoph Lameter {
1666f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1667f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
166868ba0326SAlexey Dobriyan 
166968ba0326SAlexey Dobriyan 	seq_puts(m, vmstat_text[off]);
167075ba1d07SJoe Perches 	seq_put_decimal_ull(m, " ", *l);
167168ba0326SAlexey Dobriyan 	seq_putc(m, '\n');
1672f6ac2354SChristoph Lameter 	return 0;
1673f6ac2354SChristoph Lameter }
1674f6ac2354SChristoph Lameter 
1675f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1676f6ac2354SChristoph Lameter {
1677f6ac2354SChristoph Lameter 	kfree(m->private);
1678f6ac2354SChristoph Lameter 	m->private = NULL;
1679f6ac2354SChristoph Lameter }
1680f6ac2354SChristoph Lameter 
1681b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1682f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1683f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1684f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1685f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1686f6ac2354SChristoph Lameter };
1687f6ac2354SChristoph Lameter 
1688b6aa44abSAlexey Dobriyan static int vmstat_open(struct inode *inode, struct file *file)
1689b6aa44abSAlexey Dobriyan {
1690b6aa44abSAlexey Dobriyan 	return seq_open(file, &vmstat_op);
1691b6aa44abSAlexey Dobriyan }
1692b6aa44abSAlexey Dobriyan 
16939d85e15fSAnshuman Khandual static const struct file_operations vmstat_file_operations = {
1694b6aa44abSAlexey Dobriyan 	.open		= vmstat_open,
1695b6aa44abSAlexey Dobriyan 	.read		= seq_read,
1696b6aa44abSAlexey Dobriyan 	.llseek		= seq_lseek,
1697b6aa44abSAlexey Dobriyan 	.release	= seq_release,
1698b6aa44abSAlexey Dobriyan };
1699f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1700f6ac2354SChristoph Lameter 
1701df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1702d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
170377461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1704d1187ed2SChristoph Lameter 
170552b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS
170652b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work)
170752b6f46bSHugh Dickins {
170852b6f46bSHugh Dickins 	refresh_cpu_vm_stats(true);
170952b6f46bSHugh Dickins }
171052b6f46bSHugh Dickins 
171152b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write,
171252b6f46bSHugh Dickins 		   void __user *buffer, size_t *lenp, loff_t *ppos)
171352b6f46bSHugh Dickins {
171452b6f46bSHugh Dickins 	long val;
171552b6f46bSHugh Dickins 	int err;
171652b6f46bSHugh Dickins 	int i;
171752b6f46bSHugh Dickins 
171852b6f46bSHugh Dickins 	/*
171952b6f46bSHugh Dickins 	 * The regular update, every sysctl_stat_interval, may come later
172052b6f46bSHugh Dickins 	 * than expected: leaving a significant amount in per_cpu buckets.
172152b6f46bSHugh Dickins 	 * This is particularly misleading when checking a quantity of HUGE
172252b6f46bSHugh Dickins 	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
172352b6f46bSHugh Dickins 	 * which can equally be echo'ed to or cat'ted from (by root),
172452b6f46bSHugh Dickins 	 * can be used to update the stats just before reading them.
172552b6f46bSHugh Dickins 	 *
1726c41f012aSMichal Hocko 	 * Oh, and since global_zone_page_state() etc. are so careful to hide
172752b6f46bSHugh Dickins 	 * transiently negative values, report an error here if any of
172852b6f46bSHugh Dickins 	 * the stats is negative, so we know to go looking for imbalance.
172952b6f46bSHugh Dickins 	 */
173052b6f46bSHugh Dickins 	err = schedule_on_each_cpu(refresh_vm_stats);
173152b6f46bSHugh Dickins 	if (err)
173252b6f46bSHugh Dickins 		return err;
173352b6f46bSHugh Dickins 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
173475ef7184SMel Gorman 		val = atomic_long_read(&vm_zone_stat[i]);
173552b6f46bSHugh Dickins 		if (val < 0) {
173652b6f46bSHugh Dickins 			pr_warn("%s: %s %ld\n",
173752b6f46bSHugh Dickins 				__func__, vmstat_text[i], val);
173852b6f46bSHugh Dickins 			err = -EINVAL;
173952b6f46bSHugh Dickins 		}
174052b6f46bSHugh Dickins 	}
17413a321d2aSKemi Wang #ifdef CONFIG_NUMA
17423a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
17433a321d2aSKemi Wang 		val = atomic_long_read(&vm_numa_stat[i]);
17443a321d2aSKemi Wang 		if (val < 0) {
17453a321d2aSKemi Wang 			pr_warn("%s: %s %ld\n",
17463a321d2aSKemi Wang 				__func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
17473a321d2aSKemi Wang 			err = -EINVAL;
17483a321d2aSKemi Wang 		}
17493a321d2aSKemi Wang 	}
17503a321d2aSKemi Wang #endif
175152b6f46bSHugh Dickins 	if (err)
175252b6f46bSHugh Dickins 		return err;
175352b6f46bSHugh Dickins 	if (write)
175452b6f46bSHugh Dickins 		*ppos += *lenp;
175552b6f46bSHugh Dickins 	else
175652b6f46bSHugh Dickins 		*lenp = 0;
175752b6f46bSHugh Dickins 	return 0;
175852b6f46bSHugh Dickins }
175952b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */
176052b6f46bSHugh Dickins 
1761d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1762d1187ed2SChristoph Lameter {
17630eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
17647cc36bbdSChristoph Lameter 		/*
17657cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
17667cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
17677cc36bbdSChristoph Lameter 		 * update worker thread.
17687cc36bbdSChristoph Lameter 		 */
1769ce612879SMichal Hocko 		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1770176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
177198f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1772f01f17d3SMichal Hocko 	}
1773d1187ed2SChristoph Lameter }
1774d1187ed2SChristoph Lameter 
17757cc36bbdSChristoph Lameter /*
17760eb77e98SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
17770eb77e98SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
17780eb77e98SChristoph Lameter  * invoked when tick processing is not active.
17790eb77e98SChristoph Lameter  */
17800eb77e98SChristoph Lameter /*
17817cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
17827cc36bbdSChristoph Lameter  * an update is needed.
17837cc36bbdSChristoph Lameter  */
17847cc36bbdSChristoph Lameter static bool need_update(int cpu)
1785d1187ed2SChristoph Lameter {
17867cc36bbdSChristoph Lameter 	struct zone *zone;
1787d1187ed2SChristoph Lameter 
17887cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
17897cc36bbdSChristoph Lameter 		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
17907cc36bbdSChristoph Lameter 
17917cc36bbdSChristoph Lameter 		BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
17923a321d2aSKemi Wang #ifdef CONFIG_NUMA
1793*1d90ca89SKemi Wang 		BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
17943a321d2aSKemi Wang #endif
17957cc36bbdSChristoph Lameter 		/*
17967cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
17977cc36bbdSChristoph Lameter 		 * This works because the diffs are byte sized items.
17987cc36bbdSChristoph Lameter 		 */
17997cc36bbdSChristoph Lameter 		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
18007cc36bbdSChristoph Lameter 			return true;
18013a321d2aSKemi Wang #ifdef CONFIG_NUMA
18023a321d2aSKemi Wang 		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
18033a321d2aSKemi Wang 			return true;
18043a321d2aSKemi Wang #endif
18057cc36bbdSChristoph Lameter 	}
18067cc36bbdSChristoph Lameter 	return false;
18077cc36bbdSChristoph Lameter }
18087cc36bbdSChristoph Lameter 
18097b8da4c7SChristoph Lameter /*
18107b8da4c7SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
18117b8da4c7SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
18127b8da4c7SChristoph Lameter  * invoked when tick processing is not active.
18137b8da4c7SChristoph Lameter  */
1814f01f17d3SMichal Hocko void quiet_vmstat(void)
1815f01f17d3SMichal Hocko {
1816f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1817f01f17d3SMichal Hocko 		return;
1818f01f17d3SMichal Hocko 
18197b8da4c7SChristoph Lameter 	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1820f01f17d3SMichal Hocko 		return;
1821f01f17d3SMichal Hocko 
1822f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
1823f01f17d3SMichal Hocko 		return;
1824f01f17d3SMichal Hocko 
1825f01f17d3SMichal Hocko 	/*
1826f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
1827f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
1828f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
1829f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
1830f01f17d3SMichal Hocko 	 */
1831f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
1832f01f17d3SMichal Hocko }
1833f01f17d3SMichal Hocko 
18347cc36bbdSChristoph Lameter /*
18357cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
18367cc36bbdSChristoph Lameter  * differentials of processors that have their worker
18377cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
18387cc36bbdSChristoph Lameter  * inactivity.
18397cc36bbdSChristoph Lameter  */
18407cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
18417cc36bbdSChristoph Lameter 
18420eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
18437cc36bbdSChristoph Lameter 
18447cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
18457cc36bbdSChristoph Lameter {
18467cc36bbdSChristoph Lameter 	int cpu;
18477cc36bbdSChristoph Lameter 
18487cc36bbdSChristoph Lameter 	get_online_cpus();
18497cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
18507b8da4c7SChristoph Lameter 	for_each_online_cpu(cpu) {
1851f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
18527cc36bbdSChristoph Lameter 
18537b8da4c7SChristoph Lameter 		if (!delayed_work_pending(dw) && need_update(cpu))
1854ce612879SMichal Hocko 			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1855f01f17d3SMichal Hocko 	}
18567cc36bbdSChristoph Lameter 	put_online_cpus();
18577cc36bbdSChristoph Lameter 
18587cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
18597cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
18607cc36bbdSChristoph Lameter }
18617cc36bbdSChristoph Lameter 
18627cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
18637cc36bbdSChristoph Lameter {
18647cc36bbdSChristoph Lameter 	int cpu;
18657cc36bbdSChristoph Lameter 
18667cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
1867ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
18687cc36bbdSChristoph Lameter 			vmstat_update);
18697cc36bbdSChristoph Lameter 
18707cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
18717cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
1872d1187ed2SChristoph Lameter }
1873d1187ed2SChristoph Lameter 
187403e86dbaSTim Chen static void __init init_cpu_node_state(void)
187503e86dbaSTim Chen {
18764c501327SSebastian Andrzej Siewior 	int node;
187703e86dbaSTim Chen 
18784c501327SSebastian Andrzej Siewior 	for_each_online_node(node) {
18794c501327SSebastian Andrzej Siewior 		if (cpumask_weight(cpumask_of_node(node)) > 0)
18804c501327SSebastian Andrzej Siewior 			node_set_state(node, N_CPU);
18814c501327SSebastian Andrzej Siewior 	}
188203e86dbaSTim Chen }
188303e86dbaSTim Chen 
18845438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu)
1885807a1bd2SToshi Kani {
18865ee28a44SKAMEZAWA Hiroyuki 	refresh_zone_stat_thresholds();
1887ad596925SChristoph Lameter 	node_set_state(cpu_to_node(cpu), N_CPU);
18885438da97SSebastian Andrzej Siewior 	return 0;
1889df9ecabaSChristoph Lameter }
1890df9ecabaSChristoph Lameter 
18915438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu)
18925438da97SSebastian Andrzej Siewior {
18935438da97SSebastian Andrzej Siewior 	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
18945438da97SSebastian Andrzej Siewior 	return 0;
18955438da97SSebastian Andrzej Siewior }
18965438da97SSebastian Andrzej Siewior 
18975438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu)
18985438da97SSebastian Andrzej Siewior {
18995438da97SSebastian Andrzej Siewior 	const struct cpumask *node_cpus;
19005438da97SSebastian Andrzej Siewior 	int node;
19015438da97SSebastian Andrzej Siewior 
19025438da97SSebastian Andrzej Siewior 	node = cpu_to_node(cpu);
19035438da97SSebastian Andrzej Siewior 
19045438da97SSebastian Andrzej Siewior 	refresh_zone_stat_thresholds();
19055438da97SSebastian Andrzej Siewior 	node_cpus = cpumask_of_node(node);
19065438da97SSebastian Andrzej Siewior 	if (cpumask_weight(node_cpus) > 0)
19075438da97SSebastian Andrzej Siewior 		return 0;
19085438da97SSebastian Andrzej Siewior 
19095438da97SSebastian Andrzej Siewior 	node_clear_state(node, N_CPU);
19105438da97SSebastian Andrzej Siewior 	return 0;
19115438da97SSebastian Andrzej Siewior }
19125438da97SSebastian Andrzej Siewior 
19138f32f7e5SAlexey Dobriyan #endif
1914df9ecabaSChristoph Lameter 
1915ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq;
1916ce612879SMichal Hocko 
1917597b7305SMichal Hocko void __init init_mm_internals(void)
1918df9ecabaSChristoph Lameter {
1919ce612879SMichal Hocko 	int ret __maybe_unused;
19205438da97SSebastian Andrzej Siewior 
192180d136e1SMichal Hocko 	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
1922ce612879SMichal Hocko 
1923ce612879SMichal Hocko #ifdef CONFIG_SMP
19245438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
19255438da97SSebastian Andrzej Siewior 					NULL, vmstat_cpu_dead);
19265438da97SSebastian Andrzej Siewior 	if (ret < 0)
19275438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'dead' hotplug state\n");
19285438da97SSebastian Andrzej Siewior 
19295438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
19305438da97SSebastian Andrzej Siewior 					vmstat_cpu_online,
19315438da97SSebastian Andrzej Siewior 					vmstat_cpu_down_prep);
19325438da97SSebastian Andrzej Siewior 	if (ret < 0)
19335438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'online' hotplug state\n");
19345438da97SSebastian Andrzej Siewior 
19355438da97SSebastian Andrzej Siewior 	get_online_cpus();
193603e86dbaSTim Chen 	init_cpu_node_state();
19375438da97SSebastian Andrzej Siewior 	put_online_cpus();
1938d1187ed2SChristoph Lameter 
19397cc36bbdSChristoph Lameter 	start_shepherd_timer();
19408f32f7e5SAlexey Dobriyan #endif
19418f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
19429d85e15fSAnshuman Khandual 	proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
19439d85e15fSAnshuman Khandual 	proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations);
19449d85e15fSAnshuman Khandual 	proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
19459d85e15fSAnshuman Khandual 	proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
19468f32f7e5SAlexey Dobriyan #endif
1947df9ecabaSChristoph Lameter }
1948d7a5752cSMel Gorman 
1949d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1950d7a5752cSMel Gorman 
1951d7a5752cSMel Gorman /*
1952d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
1953d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
1954d7a5752cSMel Gorman  */
1955d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
1956d7a5752cSMel Gorman 				struct contig_page_info *info)
1957d7a5752cSMel Gorman {
1958d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
1959d7a5752cSMel Gorman 	if (info->free_pages == 0)
1960d7a5752cSMel Gorman 		return 1000;
1961d7a5752cSMel Gorman 
1962d7a5752cSMel Gorman 	/*
1963d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
1964d7a5752cSMel Gorman 	 * decimal places.
1965d7a5752cSMel Gorman 	 *
1966d7a5752cSMel Gorman 	 * 0 => no fragmentation
1967d7a5752cSMel Gorman 	 * 1 => high fragmentation
1968d7a5752cSMel Gorman 	 */
1969d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1970d7a5752cSMel Gorman 
1971d7a5752cSMel Gorman }
1972d7a5752cSMel Gorman 
1973d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
1974d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1975d7a5752cSMel Gorman {
1976d7a5752cSMel Gorman 	unsigned int order;
1977d7a5752cSMel Gorman 	int index;
1978d7a5752cSMel Gorman 	struct contig_page_info info;
1979d7a5752cSMel Gorman 
1980d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1981d7a5752cSMel Gorman 				pgdat->node_id,
1982d7a5752cSMel Gorman 				zone->name);
1983d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1984d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
1985d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
1986d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1987d7a5752cSMel Gorman 	}
1988d7a5752cSMel Gorman 
1989d7a5752cSMel Gorman 	seq_putc(m, '\n');
1990d7a5752cSMel Gorman }
1991d7a5752cSMel Gorman 
1992d7a5752cSMel Gorman /*
1993d7a5752cSMel Gorman  * Display unusable free space index
1994d7a5752cSMel Gorman  *
1995d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
1996d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
1997d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
1998d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
1999d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
2000d7a5752cSMel Gorman  */
2001d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
2002d7a5752cSMel Gorman {
2003d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2004d7a5752cSMel Gorman 
2005d7a5752cSMel Gorman 	/* check memoryless node */
2006a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
2007d7a5752cSMel Gorman 		return 0;
2008d7a5752cSMel Gorman 
2009727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2010d7a5752cSMel Gorman 
2011d7a5752cSMel Gorman 	return 0;
2012d7a5752cSMel Gorman }
2013d7a5752cSMel Gorman 
2014d7a5752cSMel Gorman static const struct seq_operations unusable_op = {
2015d7a5752cSMel Gorman 	.start	= frag_start,
2016d7a5752cSMel Gorman 	.next	= frag_next,
2017d7a5752cSMel Gorman 	.stop	= frag_stop,
2018d7a5752cSMel Gorman 	.show	= unusable_show,
2019d7a5752cSMel Gorman };
2020d7a5752cSMel Gorman 
2021d7a5752cSMel Gorman static int unusable_open(struct inode *inode, struct file *file)
2022d7a5752cSMel Gorman {
2023d7a5752cSMel Gorman 	return seq_open(file, &unusable_op);
2024d7a5752cSMel Gorman }
2025d7a5752cSMel Gorman 
2026d7a5752cSMel Gorman static const struct file_operations unusable_file_ops = {
2027d7a5752cSMel Gorman 	.open		= unusable_open,
2028d7a5752cSMel Gorman 	.read		= seq_read,
2029d7a5752cSMel Gorman 	.llseek		= seq_lseek,
2030d7a5752cSMel Gorman 	.release	= seq_release,
2031d7a5752cSMel Gorman };
2032d7a5752cSMel Gorman 
2033f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
2034f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
2035f1a5ab12SMel Gorman {
2036f1a5ab12SMel Gorman 	unsigned int order;
2037f1a5ab12SMel Gorman 	int index;
2038f1a5ab12SMel Gorman 
2039f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
2040f1a5ab12SMel Gorman 	struct contig_page_info info;
2041f1a5ab12SMel Gorman 
2042f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
2043f1a5ab12SMel Gorman 				pgdat->node_id,
2044f1a5ab12SMel Gorman 				zone->name);
2045f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
2046f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
204756de7263SMel Gorman 		index = __fragmentation_index(order, &info);
2048f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2049f1a5ab12SMel Gorman 	}
2050f1a5ab12SMel Gorman 
2051f1a5ab12SMel Gorman 	seq_putc(m, '\n');
2052f1a5ab12SMel Gorman }
2053f1a5ab12SMel Gorman 
2054f1a5ab12SMel Gorman /*
2055f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
2056f1a5ab12SMel Gorman  */
2057f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
2058f1a5ab12SMel Gorman {
2059f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
2060f1a5ab12SMel Gorman 
2061727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2062f1a5ab12SMel Gorman 
2063f1a5ab12SMel Gorman 	return 0;
2064f1a5ab12SMel Gorman }
2065f1a5ab12SMel Gorman 
2066f1a5ab12SMel Gorman static const struct seq_operations extfrag_op = {
2067f1a5ab12SMel Gorman 	.start	= frag_start,
2068f1a5ab12SMel Gorman 	.next	= frag_next,
2069f1a5ab12SMel Gorman 	.stop	= frag_stop,
2070f1a5ab12SMel Gorman 	.show	= extfrag_show,
2071f1a5ab12SMel Gorman };
2072f1a5ab12SMel Gorman 
2073f1a5ab12SMel Gorman static int extfrag_open(struct inode *inode, struct file *file)
2074f1a5ab12SMel Gorman {
2075f1a5ab12SMel Gorman 	return seq_open(file, &extfrag_op);
2076f1a5ab12SMel Gorman }
2077f1a5ab12SMel Gorman 
2078f1a5ab12SMel Gorman static const struct file_operations extfrag_file_ops = {
2079f1a5ab12SMel Gorman 	.open		= extfrag_open,
2080f1a5ab12SMel Gorman 	.read		= seq_read,
2081f1a5ab12SMel Gorman 	.llseek		= seq_lseek,
2082f1a5ab12SMel Gorman 	.release	= seq_release,
2083f1a5ab12SMel Gorman };
2084f1a5ab12SMel Gorman 
2085d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
2086d7a5752cSMel Gorman {
2087bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
2088bde8bd8aSSasikantha babu 
2089d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2090d7a5752cSMel Gorman 	if (!extfrag_debug_root)
2091d7a5752cSMel Gorman 		return -ENOMEM;
2092d7a5752cSMel Gorman 
2093d7a5752cSMel Gorman 	if (!debugfs_create_file("unusable_index", 0444,
2094d7a5752cSMel Gorman 			extfrag_debug_root, NULL, &unusable_file_ops))
2095bde8bd8aSSasikantha babu 		goto fail;
2096d7a5752cSMel Gorman 
2097f1a5ab12SMel Gorman 	if (!debugfs_create_file("extfrag_index", 0444,
2098f1a5ab12SMel Gorman 			extfrag_debug_root, NULL, &extfrag_file_ops))
2099bde8bd8aSSasikantha babu 		goto fail;
2100f1a5ab12SMel Gorman 
2101d7a5752cSMel Gorman 	return 0;
2102bde8bd8aSSasikantha babu fail:
2103bde8bd8aSSasikantha babu 	debugfs_remove_recursive(extfrag_debug_root);
2104bde8bd8aSSasikantha babu 	return -ENOMEM;
2105d7a5752cSMel Gorman }
2106d7a5752cSMel Gorman 
2107d7a5752cSMel Gorman module_init(extfrag_debug_init);
2108d7a5752cSMel Gorman #endif
2109