xref: /linux/mm/vmstat.c (revision 88f5acf88ae6a9778f6d25d0d5d7ec2d57764a97)
1f6ac2354SChristoph Lameter /*
2f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
3f6ac2354SChristoph Lameter  *
4f6ac2354SChristoph Lameter  *  Manages VM statistics
5f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
62244b95aSChristoph Lameter  *
72244b95aSChristoph Lameter  *  zoned VM statistics
82244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
92244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
10f6ac2354SChristoph Lameter  */
118f32f7e5SAlexey Dobriyan #include <linux/fs.h>
12f6ac2354SChristoph Lameter #include <linux/mm.h>
134e950f6fSAlexey Dobriyan #include <linux/err.h>
142244b95aSChristoph Lameter #include <linux/module.h>
155a0e3ad6STejun Heo #include <linux/slab.h>
16df9ecabaSChristoph Lameter #include <linux/cpu.h>
17c748e134SAdrian Bunk #include <linux/vmstat.h>
18e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
19f1a5ab12SMel Gorman #include <linux/math64.h>
2079da826aSMichael Rubin #include <linux/writeback.h>
2136deb0beSNamhyung Kim #include <linux/compaction.h>
22f6ac2354SChristoph Lameter 
23f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
24f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
25f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
26f8891e5eSChristoph Lameter 
2731f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
28f8891e5eSChristoph Lameter {
299eccf2a8SChristoph Lameter 	int cpu;
30f8891e5eSChristoph Lameter 	int i;
31f8891e5eSChristoph Lameter 
32f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
33f8891e5eSChristoph Lameter 
3431f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
35f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
36f8891e5eSChristoph Lameter 
37f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
38f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
39f8891e5eSChristoph Lameter 	}
40f8891e5eSChristoph Lameter }
41f8891e5eSChristoph Lameter 
42f8891e5eSChristoph Lameter /*
43f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
44f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
45f8891e5eSChristoph Lameter  * during and after execution of this function.
46f8891e5eSChristoph Lameter */
47f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
48f8891e5eSChristoph Lameter {
49b5be1132SKOSAKI Motohiro 	get_online_cpus();
5031f961a8SMinchan Kim 	sum_vm_events(ret);
51b5be1132SKOSAKI Motohiro 	put_online_cpus();
52f8891e5eSChristoph Lameter }
5332dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
54f8891e5eSChristoph Lameter 
55f8891e5eSChristoph Lameter #ifdef CONFIG_HOTPLUG
56f8891e5eSChristoph Lameter /*
57f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
58f8891e5eSChristoph Lameter  *
59f8891e5eSChristoph Lameter  * This is adding to the events on one processor
60f8891e5eSChristoph Lameter  * but keeps the global counts constant.
61f8891e5eSChristoph Lameter  */
62f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
63f8891e5eSChristoph Lameter {
64f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
65f8891e5eSChristoph Lameter 	int i;
66f8891e5eSChristoph Lameter 
67f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
68f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
69f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
70f8891e5eSChristoph Lameter 	}
71f8891e5eSChristoph Lameter }
72f8891e5eSChristoph Lameter #endif /* CONFIG_HOTPLUG */
73f8891e5eSChristoph Lameter 
74f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
75f8891e5eSChristoph Lameter 
762244b95aSChristoph Lameter /*
772244b95aSChristoph Lameter  * Manage combined zone based / global counters
782244b95aSChristoph Lameter  *
792244b95aSChristoph Lameter  * vm_stat contains the global counters
802244b95aSChristoph Lameter  */
812244b95aSChristoph Lameter atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
822244b95aSChristoph Lameter EXPORT_SYMBOL(vm_stat);
832244b95aSChristoph Lameter 
842244b95aSChristoph Lameter #ifdef CONFIG_SMP
852244b95aSChristoph Lameter 
86*88f5acf8SMel Gorman static int calculate_pressure_threshold(struct zone *zone)
87*88f5acf8SMel Gorman {
88*88f5acf8SMel Gorman 	int threshold;
89*88f5acf8SMel Gorman 	int watermark_distance;
90*88f5acf8SMel Gorman 
91*88f5acf8SMel Gorman 	/*
92*88f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
93*88f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
94*88f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
95*88f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
96*88f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
97*88f5acf8SMel Gorman 	 * the min watermark
98*88f5acf8SMel Gorman 	 */
99*88f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
100*88f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
101*88f5acf8SMel Gorman 
102*88f5acf8SMel Gorman 	/*
103*88f5acf8SMel Gorman 	 * Maximum threshold is 125
104*88f5acf8SMel Gorman 	 */
105*88f5acf8SMel Gorman 	threshold = min(125, threshold);
106*88f5acf8SMel Gorman 
107*88f5acf8SMel Gorman 	return threshold;
108*88f5acf8SMel Gorman }
109*88f5acf8SMel Gorman 
110df9ecabaSChristoph Lameter static int calculate_threshold(struct zone *zone)
111df9ecabaSChristoph Lameter {
112df9ecabaSChristoph Lameter 	int threshold;
113df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
1142244b95aSChristoph Lameter 
1152244b95aSChristoph Lameter 	/*
116df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
117df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
118df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
119df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
1202244b95aSChristoph Lameter 	 *
121df9ecabaSChristoph Lameter 	 * Some sample thresholds:
122df9ecabaSChristoph Lameter 	 *
123df9ecabaSChristoph Lameter 	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
124df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
125df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
126df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
127df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
128df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
129df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
130df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
131df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
132df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
133df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
134df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
135df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
136df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
137df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
138df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
139df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
140df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
141df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
142df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
1432244b95aSChristoph Lameter 	 */
144df9ecabaSChristoph Lameter 
145df9ecabaSChristoph Lameter 	mem = zone->present_pages >> (27 - PAGE_SHIFT);
146df9ecabaSChristoph Lameter 
147df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
148df9ecabaSChristoph Lameter 
149df9ecabaSChristoph Lameter 	/*
150df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
151df9ecabaSChristoph Lameter 	 */
152df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
153df9ecabaSChristoph Lameter 
154df9ecabaSChristoph Lameter 	return threshold;
155df9ecabaSChristoph Lameter }
156df9ecabaSChristoph Lameter 
157df9ecabaSChristoph Lameter /*
158df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
159df9ecabaSChristoph Lameter  */
160df9ecabaSChristoph Lameter static void refresh_zone_stat_thresholds(void)
1612244b95aSChristoph Lameter {
162df9ecabaSChristoph Lameter 	struct zone *zone;
163df9ecabaSChristoph Lameter 	int cpu;
164df9ecabaSChristoph Lameter 	int threshold;
165df9ecabaSChristoph Lameter 
166ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
167aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
168aa454840SChristoph Lameter 
169df9ecabaSChristoph Lameter 		threshold = calculate_threshold(zone);
170df9ecabaSChristoph Lameter 
171df9ecabaSChristoph Lameter 		for_each_online_cpu(cpu)
17299dcc3e5SChristoph Lameter 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
17399dcc3e5SChristoph Lameter 							= threshold;
174aa454840SChristoph Lameter 
175aa454840SChristoph Lameter 		/*
176aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
177aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
178aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
179aa454840SChristoph Lameter 		 */
180aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
181aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
182aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
183aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
184aa454840SChristoph Lameter 					max_drift;
185df9ecabaSChristoph Lameter 	}
1862244b95aSChristoph Lameter }
1872244b95aSChristoph Lameter 
188*88f5acf8SMel Gorman void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
189*88f5acf8SMel Gorman {
190*88f5acf8SMel Gorman 	struct zone *zone;
191*88f5acf8SMel Gorman 	int cpu;
192*88f5acf8SMel Gorman 	int threshold;
193*88f5acf8SMel Gorman 	int i;
194*88f5acf8SMel Gorman 
195*88f5acf8SMel Gorman 	get_online_cpus();
196*88f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
197*88f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
198*88f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
199*88f5acf8SMel Gorman 			continue;
200*88f5acf8SMel Gorman 
201*88f5acf8SMel Gorman 		threshold = calculate_pressure_threshold(zone);
202*88f5acf8SMel Gorman 		for_each_online_cpu(cpu)
203*88f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
204*88f5acf8SMel Gorman 							= threshold;
205*88f5acf8SMel Gorman 	}
206*88f5acf8SMel Gorman 	put_online_cpus();
207*88f5acf8SMel Gorman }
208*88f5acf8SMel Gorman 
209*88f5acf8SMel Gorman void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
210*88f5acf8SMel Gorman {
211*88f5acf8SMel Gorman 	struct zone *zone;
212*88f5acf8SMel Gorman 	int cpu;
213*88f5acf8SMel Gorman 	int threshold;
214*88f5acf8SMel Gorman 	int i;
215*88f5acf8SMel Gorman 
216*88f5acf8SMel Gorman 	get_online_cpus();
217*88f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
218*88f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
219*88f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
220*88f5acf8SMel Gorman 			continue;
221*88f5acf8SMel Gorman 
222*88f5acf8SMel Gorman 		threshold = calculate_threshold(zone);
223*88f5acf8SMel Gorman 		for_each_online_cpu(cpu)
224*88f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
225*88f5acf8SMel Gorman 							= threshold;
226*88f5acf8SMel Gorman 	}
227*88f5acf8SMel Gorman 	put_online_cpus();
228*88f5acf8SMel Gorman }
229*88f5acf8SMel Gorman 
2302244b95aSChristoph Lameter /*
2312244b95aSChristoph Lameter  * For use when we know that interrupts are disabled.
2322244b95aSChristoph Lameter  */
2332244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
2342244b95aSChristoph Lameter 				int delta)
2352244b95aSChristoph Lameter {
23612938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
23712938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
2382244b95aSChristoph Lameter 	long x;
23912938a92SChristoph Lameter 	long t;
2402244b95aSChristoph Lameter 
24112938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
2422244b95aSChristoph Lameter 
24312938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
24412938a92SChristoph Lameter 
24512938a92SChristoph Lameter 	if (unlikely(x > t || x < -t)) {
2462244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
2472244b95aSChristoph Lameter 		x = 0;
2482244b95aSChristoph Lameter 	}
24912938a92SChristoph Lameter 	__this_cpu_write(*p, x);
2502244b95aSChristoph Lameter }
2512244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
2522244b95aSChristoph Lameter 
2532244b95aSChristoph Lameter /*
2542244b95aSChristoph Lameter  * Optimized increment and decrement functions.
2552244b95aSChristoph Lameter  *
2562244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
2572244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
2582244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
2592244b95aSChristoph Lameter  *
2602244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
2612244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
2622244b95aSChristoph Lameter  * generate better code.
2632244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
2642244b95aSChristoph Lameter  * be omitted.
2652244b95aSChristoph Lameter  *
266df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
267df9ecabaSChristoph Lameter  * with care.
268df9ecabaSChristoph Lameter  *
2692244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
2702244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
2712244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
2722244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
2732244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
2742244b95aSChristoph Lameter  * in a useful way here.
2752244b95aSChristoph Lameter  */
276c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
2772244b95aSChristoph Lameter {
27812938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
27912938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
28012938a92SChristoph Lameter 	s8 v, t;
2812244b95aSChristoph Lameter 
282908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
28312938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
28412938a92SChristoph Lameter 	if (unlikely(v > t)) {
28512938a92SChristoph Lameter 		s8 overstep = t >> 1;
2862244b95aSChristoph Lameter 
28712938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
28812938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
2892244b95aSChristoph Lameter 	}
2902244b95aSChristoph Lameter }
291ca889e6cSChristoph Lameter 
292ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
293ca889e6cSChristoph Lameter {
294ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
295ca889e6cSChristoph Lameter }
2962244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
2972244b95aSChristoph Lameter 
298c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
2992244b95aSChristoph Lameter {
30012938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
30112938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
30212938a92SChristoph Lameter 	s8 v, t;
3032244b95aSChristoph Lameter 
304908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
30512938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
30612938a92SChristoph Lameter 	if (unlikely(v < - t)) {
30712938a92SChristoph Lameter 		s8 overstep = t >> 1;
3082244b95aSChristoph Lameter 
30912938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
31012938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
3112244b95aSChristoph Lameter 	}
3122244b95aSChristoph Lameter }
313c8785385SChristoph Lameter 
314c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
315c8785385SChristoph Lameter {
316c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
317c8785385SChristoph Lameter }
3182244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
3192244b95aSChristoph Lameter 
3207c839120SChristoph Lameter #ifdef CONFIG_CMPXCHG_LOCAL
3217c839120SChristoph Lameter /*
3227c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
3237c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
3247c839120SChristoph Lameter  *
3257c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
3267c839120SChristoph Lameter  * operations.
3277c839120SChristoph Lameter  *
3287c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
3297c839120SChristoph Lameter  *     0       No overstepping
3307c839120SChristoph Lameter  *     1       Overstepping half of threshold
3317c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
3327c839120SChristoph Lameter */
3337c839120SChristoph Lameter static inline void mod_state(struct zone *zone,
3347c839120SChristoph Lameter        enum zone_stat_item item, int delta, int overstep_mode)
3357c839120SChristoph Lameter {
3367c839120SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
3377c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
3387c839120SChristoph Lameter 	long o, n, t, z;
3397c839120SChristoph Lameter 
3407c839120SChristoph Lameter 	do {
3417c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
3427c839120SChristoph Lameter 
3437c839120SChristoph Lameter 		/*
3447c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
3457c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
3467c839120SChristoph Lameter 		 * rescheduled while executing here. However, the following
3477c839120SChristoph Lameter 		 * will apply the threshold again and therefore bring the
3487c839120SChristoph Lameter 		 * counter under the threshold.
3497c839120SChristoph Lameter 		 */
3507c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
3517c839120SChristoph Lameter 
3527c839120SChristoph Lameter 		o = this_cpu_read(*p);
3537c839120SChristoph Lameter 		n = delta + o;
3547c839120SChristoph Lameter 
3557c839120SChristoph Lameter 		if (n > t || n < -t) {
3567c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
3577c839120SChristoph Lameter 
3587c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
3597c839120SChristoph Lameter 			z = n + os;
3607c839120SChristoph Lameter 			n = -os;
3617c839120SChristoph Lameter 		}
3627c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
3637c839120SChristoph Lameter 
3647c839120SChristoph Lameter 	if (z)
3657c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
3667c839120SChristoph Lameter }
3677c839120SChristoph Lameter 
3687c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3697c839120SChristoph Lameter 					int delta)
3707c839120SChristoph Lameter {
3717c839120SChristoph Lameter 	mod_state(zone, item, delta, 0);
3727c839120SChristoph Lameter }
3737c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
3747c839120SChristoph Lameter 
3757c839120SChristoph Lameter void inc_zone_state(struct zone *zone, enum zone_stat_item item)
3767c839120SChristoph Lameter {
3777c839120SChristoph Lameter 	mod_state(zone, item, 1, 1);
3787c839120SChristoph Lameter }
3797c839120SChristoph Lameter 
3807c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
3817c839120SChristoph Lameter {
3827c839120SChristoph Lameter 	mod_state(page_zone(page), item, 1, 1);
3837c839120SChristoph Lameter }
3847c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
3857c839120SChristoph Lameter 
3867c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
3877c839120SChristoph Lameter {
3887c839120SChristoph Lameter 	mod_state(page_zone(page), item, -1, -1);
3897c839120SChristoph Lameter }
3907c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
3917c839120SChristoph Lameter #else
3927c839120SChristoph Lameter /*
3937c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
3947c839120SChristoph Lameter  */
3957c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3967c839120SChristoph Lameter 					int delta)
3977c839120SChristoph Lameter {
3987c839120SChristoph Lameter 	unsigned long flags;
3997c839120SChristoph Lameter 
4007c839120SChristoph Lameter 	local_irq_save(flags);
4017c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
4027c839120SChristoph Lameter 	local_irq_restore(flags);
4037c839120SChristoph Lameter }
4047c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
4057c839120SChristoph Lameter 
406ca889e6cSChristoph Lameter void inc_zone_state(struct zone *zone, enum zone_stat_item item)
407ca889e6cSChristoph Lameter {
408ca889e6cSChristoph Lameter 	unsigned long flags;
409ca889e6cSChristoph Lameter 
410ca889e6cSChristoph Lameter 	local_irq_save(flags);
411ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
412ca889e6cSChristoph Lameter 	local_irq_restore(flags);
413ca889e6cSChristoph Lameter }
414ca889e6cSChristoph Lameter 
4152244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
4162244b95aSChristoph Lameter {
4172244b95aSChristoph Lameter 	unsigned long flags;
4182244b95aSChristoph Lameter 	struct zone *zone;
4192244b95aSChristoph Lameter 
4202244b95aSChristoph Lameter 	zone = page_zone(page);
4212244b95aSChristoph Lameter 	local_irq_save(flags);
422ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
4232244b95aSChristoph Lameter 	local_irq_restore(flags);
4242244b95aSChristoph Lameter }
4252244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
4262244b95aSChristoph Lameter 
4272244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
4282244b95aSChristoph Lameter {
4292244b95aSChristoph Lameter 	unsigned long flags;
4302244b95aSChristoph Lameter 
4312244b95aSChristoph Lameter 	local_irq_save(flags);
432a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
4332244b95aSChristoph Lameter 	local_irq_restore(flags);
4342244b95aSChristoph Lameter }
4352244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
4367c839120SChristoph Lameter #endif
4372244b95aSChristoph Lameter 
4382244b95aSChristoph Lameter /*
4392244b95aSChristoph Lameter  * Update the zone counters for one cpu.
4404037d452SChristoph Lameter  *
441a7f75e25SChristoph Lameter  * The cpu specified must be either the current cpu or a processor that
442a7f75e25SChristoph Lameter  * is not online. If it is the current cpu then the execution thread must
443a7f75e25SChristoph Lameter  * be pinned to the current cpu.
444a7f75e25SChristoph Lameter  *
4454037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
4464037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
4474037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
4484037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
4494037d452SChristoph Lameter  * the processor.
4504037d452SChristoph Lameter  *
4514037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
4524037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
4534037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
4544037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
4552244b95aSChristoph Lameter  */
4562244b95aSChristoph Lameter void refresh_cpu_vm_stats(int cpu)
4572244b95aSChristoph Lameter {
4582244b95aSChristoph Lameter 	struct zone *zone;
4592244b95aSChristoph Lameter 	int i;
460a7f75e25SChristoph Lameter 	int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
4612244b95aSChristoph Lameter 
462ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
4634037d452SChristoph Lameter 		struct per_cpu_pageset *p;
4642244b95aSChristoph Lameter 
46599dcc3e5SChristoph Lameter 		p = per_cpu_ptr(zone->pageset, cpu);
4662244b95aSChristoph Lameter 
4672244b95aSChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
4684037d452SChristoph Lameter 			if (p->vm_stat_diff[i]) {
469a7f75e25SChristoph Lameter 				unsigned long flags;
470a7f75e25SChristoph Lameter 				int v;
471a7f75e25SChristoph Lameter 
4722244b95aSChristoph Lameter 				local_irq_save(flags);
473a7f75e25SChristoph Lameter 				v = p->vm_stat_diff[i];
4744037d452SChristoph Lameter 				p->vm_stat_diff[i] = 0;
475a7f75e25SChristoph Lameter 				local_irq_restore(flags);
476a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
477a7f75e25SChristoph Lameter 				global_diff[i] += v;
4784037d452SChristoph Lameter #ifdef CONFIG_NUMA
4794037d452SChristoph Lameter 				/* 3 seconds idle till flush */
4804037d452SChristoph Lameter 				p->expire = 3;
4814037d452SChristoph Lameter #endif
4822244b95aSChristoph Lameter 			}
483468fd62eSDimitri Sivanich 		cond_resched();
4844037d452SChristoph Lameter #ifdef CONFIG_NUMA
4854037d452SChristoph Lameter 		/*
4864037d452SChristoph Lameter 		 * Deal with draining the remote pageset of this
4874037d452SChristoph Lameter 		 * processor
4884037d452SChristoph Lameter 		 *
4894037d452SChristoph Lameter 		 * Check if there are pages remaining in this pageset
4904037d452SChristoph Lameter 		 * if not then there is nothing to expire.
4914037d452SChristoph Lameter 		 */
4923dfa5721SChristoph Lameter 		if (!p->expire || !p->pcp.count)
4934037d452SChristoph Lameter 			continue;
4944037d452SChristoph Lameter 
4954037d452SChristoph Lameter 		/*
4964037d452SChristoph Lameter 		 * We never drain zones local to this processor.
4974037d452SChristoph Lameter 		 */
4984037d452SChristoph Lameter 		if (zone_to_nid(zone) == numa_node_id()) {
4994037d452SChristoph Lameter 			p->expire = 0;
5004037d452SChristoph Lameter 			continue;
5014037d452SChristoph Lameter 		}
5024037d452SChristoph Lameter 
5034037d452SChristoph Lameter 		p->expire--;
5044037d452SChristoph Lameter 		if (p->expire)
5054037d452SChristoph Lameter 			continue;
5064037d452SChristoph Lameter 
5073dfa5721SChristoph Lameter 		if (p->pcp.count)
5083dfa5721SChristoph Lameter 			drain_zone_pages(zone, &p->pcp);
5094037d452SChristoph Lameter #endif
5102244b95aSChristoph Lameter 	}
511a7f75e25SChristoph Lameter 
512a7f75e25SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
513a7f75e25SChristoph Lameter 		if (global_diff[i])
514a7f75e25SChristoph Lameter 			atomic_long_add(global_diff[i], &vm_stat[i]);
5152244b95aSChristoph Lameter }
5162244b95aSChristoph Lameter 
5172244b95aSChristoph Lameter #endif
5182244b95aSChristoph Lameter 
519ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
520ca889e6cSChristoph Lameter /*
521ca889e6cSChristoph Lameter  * zonelist = the list of zones passed to the allocator
522ca889e6cSChristoph Lameter  * z 	    = the zone from which the allocation occurred.
523ca889e6cSChristoph Lameter  *
524ca889e6cSChristoph Lameter  * Must be called with interrupts disabled.
525ca889e6cSChristoph Lameter  */
52618ea7e71SMel Gorman void zone_statistics(struct zone *preferred_zone, struct zone *z)
527ca889e6cSChristoph Lameter {
52818ea7e71SMel Gorman 	if (z->zone_pgdat == preferred_zone->zone_pgdat) {
529ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_HIT);
530ca889e6cSChristoph Lameter 	} else {
531ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_MISS);
53218ea7e71SMel Gorman 		__inc_zone_state(preferred_zone, NUMA_FOREIGN);
533ca889e6cSChristoph Lameter 	}
5345d292343SChristoph Lameter 	if (z->node == numa_node_id())
535ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_LOCAL);
536ca889e6cSChristoph Lameter 	else
537ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_OTHER);
538ca889e6cSChristoph Lameter }
539ca889e6cSChristoph Lameter #endif
540ca889e6cSChristoph Lameter 
541d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
54236deb0beSNamhyung Kim 
543d7a5752cSMel Gorman struct contig_page_info {
544d7a5752cSMel Gorman 	unsigned long free_pages;
545d7a5752cSMel Gorman 	unsigned long free_blocks_total;
546d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
547d7a5752cSMel Gorman };
548d7a5752cSMel Gorman 
549d7a5752cSMel Gorman /*
550d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
551d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
552d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
553d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
554d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
555d7a5752cSMel Gorman  * figured out from userspace
556d7a5752cSMel Gorman  */
557d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
558d7a5752cSMel Gorman 				unsigned int suitable_order,
559d7a5752cSMel Gorman 				struct contig_page_info *info)
560d7a5752cSMel Gorman {
561d7a5752cSMel Gorman 	unsigned int order;
562d7a5752cSMel Gorman 
563d7a5752cSMel Gorman 	info->free_pages = 0;
564d7a5752cSMel Gorman 	info->free_blocks_total = 0;
565d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
566d7a5752cSMel Gorman 
567d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
568d7a5752cSMel Gorman 		unsigned long blocks;
569d7a5752cSMel Gorman 
570d7a5752cSMel Gorman 		/* Count number of free blocks */
571d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
572d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
573d7a5752cSMel Gorman 
574d7a5752cSMel Gorman 		/* Count free base pages */
575d7a5752cSMel Gorman 		info->free_pages += blocks << order;
576d7a5752cSMel Gorman 
577d7a5752cSMel Gorman 		/* Count the suitable free blocks */
578d7a5752cSMel Gorman 		if (order >= suitable_order)
579d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
580d7a5752cSMel Gorman 						(order - suitable_order);
581d7a5752cSMel Gorman 	}
582d7a5752cSMel Gorman }
583f1a5ab12SMel Gorman 
584f1a5ab12SMel Gorman /*
585f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
586f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
587f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
588f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
589f1a5ab12SMel Gorman  * should be used
590f1a5ab12SMel Gorman  */
59156de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
592f1a5ab12SMel Gorman {
593f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
594f1a5ab12SMel Gorman 
595f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
596f1a5ab12SMel Gorman 		return 0;
597f1a5ab12SMel Gorman 
598f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
599f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
600f1a5ab12SMel Gorman 		return -1000;
601f1a5ab12SMel Gorman 
602f1a5ab12SMel Gorman 	/*
603f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
604f1a5ab12SMel Gorman 	 *
605f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
606f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
607f1a5ab12SMel Gorman 	 */
608f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
609f1a5ab12SMel Gorman }
61056de7263SMel Gorman 
61156de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
61256de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
61356de7263SMel Gorman {
61456de7263SMel Gorman 	struct contig_page_info info;
61556de7263SMel Gorman 
61656de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
61756de7263SMel Gorman 	return __fragmentation_index(order, &info);
61856de7263SMel Gorman }
619d7a5752cSMel Gorman #endif
620d7a5752cSMel Gorman 
621d7a5752cSMel Gorman #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
6228f32f7e5SAlexey Dobriyan #include <linux/proc_fs.h>
623f6ac2354SChristoph Lameter #include <linux/seq_file.h>
624f6ac2354SChristoph Lameter 
625467c996cSMel Gorman static char * const migratetype_names[MIGRATE_TYPES] = {
626467c996cSMel Gorman 	"Unmovable",
627467c996cSMel Gorman 	"Reclaimable",
628467c996cSMel Gorman 	"Movable",
629467c996cSMel Gorman 	"Reserve",
63091446b06SKOSAKI Motohiro 	"Isolate",
631467c996cSMel Gorman };
632467c996cSMel Gorman 
633f6ac2354SChristoph Lameter static void *frag_start(struct seq_file *m, loff_t *pos)
634f6ac2354SChristoph Lameter {
635f6ac2354SChristoph Lameter 	pg_data_t *pgdat;
636f6ac2354SChristoph Lameter 	loff_t node = *pos;
637f6ac2354SChristoph Lameter 	for (pgdat = first_online_pgdat();
638f6ac2354SChristoph Lameter 	     pgdat && node;
639f6ac2354SChristoph Lameter 	     pgdat = next_online_pgdat(pgdat))
640f6ac2354SChristoph Lameter 		--node;
641f6ac2354SChristoph Lameter 
642f6ac2354SChristoph Lameter 	return pgdat;
643f6ac2354SChristoph Lameter }
644f6ac2354SChristoph Lameter 
645f6ac2354SChristoph Lameter static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
646f6ac2354SChristoph Lameter {
647f6ac2354SChristoph Lameter 	pg_data_t *pgdat = (pg_data_t *)arg;
648f6ac2354SChristoph Lameter 
649f6ac2354SChristoph Lameter 	(*pos)++;
650f6ac2354SChristoph Lameter 	return next_online_pgdat(pgdat);
651f6ac2354SChristoph Lameter }
652f6ac2354SChristoph Lameter 
653f6ac2354SChristoph Lameter static void frag_stop(struct seq_file *m, void *arg)
654f6ac2354SChristoph Lameter {
655f6ac2354SChristoph Lameter }
656f6ac2354SChristoph Lameter 
657467c996cSMel Gorman /* Walk all the zones in a node and print using a callback */
658467c996cSMel Gorman static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
659467c996cSMel Gorman 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
660f6ac2354SChristoph Lameter {
661f6ac2354SChristoph Lameter 	struct zone *zone;
662f6ac2354SChristoph Lameter 	struct zone *node_zones = pgdat->node_zones;
663f6ac2354SChristoph Lameter 	unsigned long flags;
664f6ac2354SChristoph Lameter 
665f6ac2354SChristoph Lameter 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
666f6ac2354SChristoph Lameter 		if (!populated_zone(zone))
667f6ac2354SChristoph Lameter 			continue;
668f6ac2354SChristoph Lameter 
669f6ac2354SChristoph Lameter 		spin_lock_irqsave(&zone->lock, flags);
670467c996cSMel Gorman 		print(m, pgdat, zone);
671467c996cSMel Gorman 		spin_unlock_irqrestore(&zone->lock, flags);
672467c996cSMel Gorman 	}
673467c996cSMel Gorman }
674d7a5752cSMel Gorman #endif
675467c996cSMel Gorman 
676d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
677467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
678467c996cSMel Gorman 						struct zone *zone)
679467c996cSMel Gorman {
680467c996cSMel Gorman 	int order;
681467c996cSMel Gorman 
682f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
683f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
684f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
685f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
686f6ac2354SChristoph Lameter }
687467c996cSMel Gorman 
688467c996cSMel Gorman /*
689467c996cSMel Gorman  * This walks the free areas for each zone.
690467c996cSMel Gorman  */
691467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
692467c996cSMel Gorman {
693467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
694467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, frag_show_print);
695467c996cSMel Gorman 	return 0;
696467c996cSMel Gorman }
697467c996cSMel Gorman 
698467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
699467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
700467c996cSMel Gorman {
701467c996cSMel Gorman 	int order, mtype;
702467c996cSMel Gorman 
703467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
704467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
705467c996cSMel Gorman 					pgdat->node_id,
706467c996cSMel Gorman 					zone->name,
707467c996cSMel Gorman 					migratetype_names[mtype]);
708467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
709467c996cSMel Gorman 			unsigned long freecount = 0;
710467c996cSMel Gorman 			struct free_area *area;
711467c996cSMel Gorman 			struct list_head *curr;
712467c996cSMel Gorman 
713467c996cSMel Gorman 			area = &(zone->free_area[order]);
714467c996cSMel Gorman 
715467c996cSMel Gorman 			list_for_each(curr, &area->free_list[mtype])
716467c996cSMel Gorman 				freecount++;
717467c996cSMel Gorman 			seq_printf(m, "%6lu ", freecount);
718467c996cSMel Gorman 		}
719467c996cSMel Gorman 		seq_putc(m, '\n');
720467c996cSMel Gorman 	}
721467c996cSMel Gorman }
722467c996cSMel Gorman 
723467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
724467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
725467c996cSMel Gorman {
726467c996cSMel Gorman 	int order;
727467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
728467c996cSMel Gorman 
729467c996cSMel Gorman 	/* Print header */
730467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
731467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
732467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
733467c996cSMel Gorman 	seq_putc(m, '\n');
734467c996cSMel Gorman 
735467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
736467c996cSMel Gorman 
737467c996cSMel Gorman 	return 0;
738467c996cSMel Gorman }
739467c996cSMel Gorman 
740467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
741467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
742467c996cSMel Gorman {
743467c996cSMel Gorman 	int mtype;
744467c996cSMel Gorman 	unsigned long pfn;
745467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
746467c996cSMel Gorman 	unsigned long end_pfn = start_pfn + zone->spanned_pages;
747467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
748467c996cSMel Gorman 
749467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
750467c996cSMel Gorman 		struct page *page;
751467c996cSMel Gorman 
752467c996cSMel Gorman 		if (!pfn_valid(pfn))
753467c996cSMel Gorman 			continue;
754467c996cSMel Gorman 
755467c996cSMel Gorman 		page = pfn_to_page(pfn);
756eb33575cSMel Gorman 
757eb33575cSMel Gorman 		/* Watch for unexpected holes punched in the memmap */
758eb33575cSMel Gorman 		if (!memmap_valid_within(pfn, page, zone))
759e80d6a24SMel Gorman 			continue;
760eb33575cSMel Gorman 
761467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
762467c996cSMel Gorman 
763e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
764467c996cSMel Gorman 			count[mtype]++;
765467c996cSMel Gorman 	}
766467c996cSMel Gorman 
767467c996cSMel Gorman 	/* Print counts */
768467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
769467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
770467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
771467c996cSMel Gorman 	seq_putc(m, '\n');
772467c996cSMel Gorman }
773467c996cSMel Gorman 
774467c996cSMel Gorman /* Print out the free pages at each order for each migratetype */
775467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
776467c996cSMel Gorman {
777467c996cSMel Gorman 	int mtype;
778467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
779467c996cSMel Gorman 
780467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
781467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
782467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
783467c996cSMel Gorman 	seq_putc(m, '\n');
784467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
785467c996cSMel Gorman 
786467c996cSMel Gorman 	return 0;
787467c996cSMel Gorman }
788467c996cSMel Gorman 
789467c996cSMel Gorman /*
790467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
791467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
792467c996cSMel Gorman  */
793467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
794467c996cSMel Gorman {
795467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
796467c996cSMel Gorman 
79741b25a37SKOSAKI Motohiro 	/* check memoryless node */
79841b25a37SKOSAKI Motohiro 	if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
79941b25a37SKOSAKI Motohiro 		return 0;
80041b25a37SKOSAKI Motohiro 
801467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
802467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
803467c996cSMel Gorman 	seq_putc(m, '\n');
804467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
805467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
806467c996cSMel Gorman 
807f6ac2354SChristoph Lameter 	return 0;
808f6ac2354SChristoph Lameter }
809f6ac2354SChristoph Lameter 
8108f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
811f6ac2354SChristoph Lameter 	.start	= frag_start,
812f6ac2354SChristoph Lameter 	.next	= frag_next,
813f6ac2354SChristoph Lameter 	.stop	= frag_stop,
814f6ac2354SChristoph Lameter 	.show	= frag_show,
815f6ac2354SChristoph Lameter };
816f6ac2354SChristoph Lameter 
8178f32f7e5SAlexey Dobriyan static int fragmentation_open(struct inode *inode, struct file *file)
8188f32f7e5SAlexey Dobriyan {
8198f32f7e5SAlexey Dobriyan 	return seq_open(file, &fragmentation_op);
8208f32f7e5SAlexey Dobriyan }
8218f32f7e5SAlexey Dobriyan 
8228f32f7e5SAlexey Dobriyan static const struct file_operations fragmentation_file_operations = {
8238f32f7e5SAlexey Dobriyan 	.open		= fragmentation_open,
8248f32f7e5SAlexey Dobriyan 	.read		= seq_read,
8258f32f7e5SAlexey Dobriyan 	.llseek		= seq_lseek,
8268f32f7e5SAlexey Dobriyan 	.release	= seq_release,
8278f32f7e5SAlexey Dobriyan };
8288f32f7e5SAlexey Dobriyan 
82974e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
830467c996cSMel Gorman 	.start	= frag_start,
831467c996cSMel Gorman 	.next	= frag_next,
832467c996cSMel Gorman 	.stop	= frag_stop,
833467c996cSMel Gorman 	.show	= pagetypeinfo_show,
834467c996cSMel Gorman };
835467c996cSMel Gorman 
83674e2e8e8SAlexey Dobriyan static int pagetypeinfo_open(struct inode *inode, struct file *file)
83774e2e8e8SAlexey Dobriyan {
83874e2e8e8SAlexey Dobriyan 	return seq_open(file, &pagetypeinfo_op);
83974e2e8e8SAlexey Dobriyan }
84074e2e8e8SAlexey Dobriyan 
84174e2e8e8SAlexey Dobriyan static const struct file_operations pagetypeinfo_file_ops = {
84274e2e8e8SAlexey Dobriyan 	.open		= pagetypeinfo_open,
84374e2e8e8SAlexey Dobriyan 	.read		= seq_read,
84474e2e8e8SAlexey Dobriyan 	.llseek		= seq_lseek,
84574e2e8e8SAlexey Dobriyan 	.release	= seq_release,
84674e2e8e8SAlexey Dobriyan };
84774e2e8e8SAlexey Dobriyan 
8484b51d669SChristoph Lameter #ifdef CONFIG_ZONE_DMA
8494b51d669SChristoph Lameter #define TEXT_FOR_DMA(xx) xx "_dma",
8504b51d669SChristoph Lameter #else
8514b51d669SChristoph Lameter #define TEXT_FOR_DMA(xx)
8524b51d669SChristoph Lameter #endif
8534b51d669SChristoph Lameter 
85427bf71c2SChristoph Lameter #ifdef CONFIG_ZONE_DMA32
85527bf71c2SChristoph Lameter #define TEXT_FOR_DMA32(xx) xx "_dma32",
85627bf71c2SChristoph Lameter #else
85727bf71c2SChristoph Lameter #define TEXT_FOR_DMA32(xx)
85827bf71c2SChristoph Lameter #endif
85927bf71c2SChristoph Lameter 
86027bf71c2SChristoph Lameter #ifdef CONFIG_HIGHMEM
86127bf71c2SChristoph Lameter #define TEXT_FOR_HIGHMEM(xx) xx "_high",
86227bf71c2SChristoph Lameter #else
86327bf71c2SChristoph Lameter #define TEXT_FOR_HIGHMEM(xx)
86427bf71c2SChristoph Lameter #endif
86527bf71c2SChristoph Lameter 
8664b51d669SChristoph Lameter #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
8672a1e274aSMel Gorman 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
86827bf71c2SChristoph Lameter 
86915ad7cdcSHelge Deller static const char * const vmstat_text[] = {
8702244b95aSChristoph Lameter 	/* Zoned VM counters */
871d23ad423SChristoph Lameter 	"nr_free_pages",
8724f98a2feSRik van Riel 	"nr_inactive_anon",
8734f98a2feSRik van Riel 	"nr_active_anon",
8744f98a2feSRik van Riel 	"nr_inactive_file",
8754f98a2feSRik van Riel 	"nr_active_file",
8767b854121SLee Schermerhorn 	"nr_unevictable",
8775344b7e6SNick Piggin 	"nr_mlock",
878f3dbd344SChristoph Lameter 	"nr_anon_pages",
87965ba55f5SChristoph Lameter 	"nr_mapped",
880347ce434SChristoph Lameter 	"nr_file_pages",
88151ed4491SChristoph Lameter 	"nr_dirty",
88251ed4491SChristoph Lameter 	"nr_writeback",
883972d1a7bSChristoph Lameter 	"nr_slab_reclaimable",
884972d1a7bSChristoph Lameter 	"nr_slab_unreclaimable",
885df849a15SChristoph Lameter 	"nr_page_table_pages",
886c6a7f572SKOSAKI Motohiro 	"nr_kernel_stack",
887f6ac2354SChristoph Lameter 	"nr_unstable",
888d2c5e30cSChristoph Lameter 	"nr_bounce",
889e129b5c2SAndrew Morton 	"nr_vmscan_write",
890fc3ba692SMiklos Szeredi 	"nr_writeback_temp",
891a731286dSKOSAKI Motohiro 	"nr_isolated_anon",
892a731286dSKOSAKI Motohiro 	"nr_isolated_file",
8934b02108aSKOSAKI Motohiro 	"nr_shmem",
894ea941f0eSMichael Rubin 	"nr_dirtied",
895ea941f0eSMichael Rubin 	"nr_written",
896ea941f0eSMichael Rubin 
897ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
898ca889e6cSChristoph Lameter 	"numa_hit",
899ca889e6cSChristoph Lameter 	"numa_miss",
900ca889e6cSChristoph Lameter 	"numa_foreign",
901ca889e6cSChristoph Lameter 	"numa_interleave",
902ca889e6cSChristoph Lameter 	"numa_local",
903ca889e6cSChristoph Lameter 	"numa_other",
904ca889e6cSChristoph Lameter #endif
905e172662dSWu Fengguang 	"nr_dirty_threshold",
906e172662dSWu Fengguang 	"nr_dirty_background_threshold",
907ca889e6cSChristoph Lameter 
908f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
909f6ac2354SChristoph Lameter 	"pgpgin",
910f6ac2354SChristoph Lameter 	"pgpgout",
911f6ac2354SChristoph Lameter 	"pswpin",
912f6ac2354SChristoph Lameter 	"pswpout",
913f6ac2354SChristoph Lameter 
91427bf71c2SChristoph Lameter 	TEXTS_FOR_ZONES("pgalloc")
915f6ac2354SChristoph Lameter 
916f6ac2354SChristoph Lameter 	"pgfree",
917f6ac2354SChristoph Lameter 	"pgactivate",
918f6ac2354SChristoph Lameter 	"pgdeactivate",
919f6ac2354SChristoph Lameter 
920f6ac2354SChristoph Lameter 	"pgfault",
921f6ac2354SChristoph Lameter 	"pgmajfault",
922f6ac2354SChristoph Lameter 
92327bf71c2SChristoph Lameter 	TEXTS_FOR_ZONES("pgrefill")
92427bf71c2SChristoph Lameter 	TEXTS_FOR_ZONES("pgsteal")
92527bf71c2SChristoph Lameter 	TEXTS_FOR_ZONES("pgscan_kswapd")
92627bf71c2SChristoph Lameter 	TEXTS_FOR_ZONES("pgscan_direct")
927f6ac2354SChristoph Lameter 
92824cf7251SMel Gorman #ifdef CONFIG_NUMA
92924cf7251SMel Gorman 	"zone_reclaim_failed",
93024cf7251SMel Gorman #endif
931f6ac2354SChristoph Lameter 	"pginodesteal",
932f6ac2354SChristoph Lameter 	"slabs_scanned",
933f6ac2354SChristoph Lameter 	"kswapd_steal",
934f6ac2354SChristoph Lameter 	"kswapd_inodesteal",
935bb3ab596SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
936bb3ab596SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
937bb3ab596SKOSAKI Motohiro 	"kswapd_skip_congestion_wait",
938f6ac2354SChristoph Lameter 	"pageoutrun",
939f6ac2354SChristoph Lameter 	"allocstall",
940f6ac2354SChristoph Lameter 
941f6ac2354SChristoph Lameter 	"pgrotated",
942748446bbSMel Gorman 
943748446bbSMel Gorman #ifdef CONFIG_COMPACTION
944748446bbSMel Gorman 	"compact_blocks_moved",
945748446bbSMel Gorman 	"compact_pages_moved",
946748446bbSMel Gorman 	"compact_pagemigrate_failed",
94756de7263SMel Gorman 	"compact_stall",
94856de7263SMel Gorman 	"compact_fail",
94956de7263SMel Gorman 	"compact_success",
950748446bbSMel Gorman #endif
951748446bbSMel Gorman 
9523b116300SAdam Litke #ifdef CONFIG_HUGETLB_PAGE
9533b116300SAdam Litke 	"htlb_buddy_alloc_success",
9543b116300SAdam Litke 	"htlb_buddy_alloc_fail",
9553b116300SAdam Litke #endif
956bbfd28eeSLee Schermerhorn 	"unevictable_pgs_culled",
957bbfd28eeSLee Schermerhorn 	"unevictable_pgs_scanned",
958bbfd28eeSLee Schermerhorn 	"unevictable_pgs_rescued",
9595344b7e6SNick Piggin 	"unevictable_pgs_mlocked",
9605344b7e6SNick Piggin 	"unevictable_pgs_munlocked",
9615344b7e6SNick Piggin 	"unevictable_pgs_cleared",
9625344b7e6SNick Piggin 	"unevictable_pgs_stranded",
963985737cfSLee Schermerhorn 	"unevictable_pgs_mlockfreed",
964bbfd28eeSLee Schermerhorn #endif
965f6ac2354SChristoph Lameter };
966f6ac2354SChristoph Lameter 
967467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
968467c996cSMel Gorman 							struct zone *zone)
969f6ac2354SChristoph Lameter {
970f6ac2354SChristoph Lameter 	int i;
971f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
972f6ac2354SChristoph Lameter 	seq_printf(m,
973f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
974f6ac2354SChristoph Lameter 		   "\n        min      %lu"
975f6ac2354SChristoph Lameter 		   "\n        low      %lu"
976f6ac2354SChristoph Lameter 		   "\n        high     %lu"
97708d9ae7cSWu Fengguang 		   "\n        scanned  %lu"
978f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
979f6ac2354SChristoph Lameter 		   "\n        present  %lu",
980*88f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
98141858966SMel Gorman 		   min_wmark_pages(zone),
98241858966SMel Gorman 		   low_wmark_pages(zone),
98341858966SMel Gorman 		   high_wmark_pages(zone),
984f6ac2354SChristoph Lameter 		   zone->pages_scanned,
985f6ac2354SChristoph Lameter 		   zone->spanned_pages,
986f6ac2354SChristoph Lameter 		   zone->present_pages);
9872244b95aSChristoph Lameter 
9882244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
9892244b95aSChristoph Lameter 		seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
9902244b95aSChristoph Lameter 				zone_page_state(zone, i));
9912244b95aSChristoph Lameter 
992f6ac2354SChristoph Lameter 	seq_printf(m,
993f6ac2354SChristoph Lameter 		   "\n        protection: (%lu",
994f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
995f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
996f6ac2354SChristoph Lameter 		seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
997f6ac2354SChristoph Lameter 	seq_printf(m,
998f6ac2354SChristoph Lameter 		   ")"
999f6ac2354SChristoph Lameter 		   "\n  pagesets");
1000f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
1001f6ac2354SChristoph Lameter 		struct per_cpu_pageset *pageset;
1002f6ac2354SChristoph Lameter 
100399dcc3e5SChristoph Lameter 		pageset = per_cpu_ptr(zone->pageset, i);
1004f6ac2354SChristoph Lameter 		seq_printf(m,
10053dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1006f6ac2354SChristoph Lameter 			   "\n              count: %i"
1007f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1008f6ac2354SChristoph Lameter 			   "\n              batch: %i",
10093dfa5721SChristoph Lameter 			   i,
10103dfa5721SChristoph Lameter 			   pageset->pcp.count,
10113dfa5721SChristoph Lameter 			   pageset->pcp.high,
10123dfa5721SChristoph Lameter 			   pageset->pcp.batch);
1013df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1014df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
1015df9ecabaSChristoph Lameter 				pageset->stat_threshold);
1016df9ecabaSChristoph Lameter #endif
1017f6ac2354SChristoph Lameter 	}
1018f6ac2354SChristoph Lameter 	seq_printf(m,
1019f6ac2354SChristoph Lameter 		   "\n  all_unreclaimable: %u"
1020556adecbSRik van Riel 		   "\n  start_pfn:         %lu"
1021556adecbSRik van Riel 		   "\n  inactive_ratio:    %u",
102293e4a89aSKOSAKI Motohiro 		   zone->all_unreclaimable,
1023556adecbSRik van Riel 		   zone->zone_start_pfn,
1024556adecbSRik van Riel 		   zone->inactive_ratio);
1025f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1026f6ac2354SChristoph Lameter }
1027467c996cSMel Gorman 
1028467c996cSMel Gorman /*
1029467c996cSMel Gorman  * Output information about zones in @pgdat.
1030467c996cSMel Gorman  */
1031467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1032467c996cSMel Gorman {
1033467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1034467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1035f6ac2354SChristoph Lameter 	return 0;
1036f6ac2354SChristoph Lameter }
1037f6ac2354SChristoph Lameter 
10385c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1039f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1040f6ac2354SChristoph Lameter 			       * fragmentation. */
1041f6ac2354SChristoph Lameter 	.next	= frag_next,
1042f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1043f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1044f6ac2354SChristoph Lameter };
1045f6ac2354SChristoph Lameter 
10465c9fe628SAlexey Dobriyan static int zoneinfo_open(struct inode *inode, struct file *file)
10475c9fe628SAlexey Dobriyan {
10485c9fe628SAlexey Dobriyan 	return seq_open(file, &zoneinfo_op);
10495c9fe628SAlexey Dobriyan }
10505c9fe628SAlexey Dobriyan 
10515c9fe628SAlexey Dobriyan static const struct file_operations proc_zoneinfo_file_operations = {
10525c9fe628SAlexey Dobriyan 	.open		= zoneinfo_open,
10535c9fe628SAlexey Dobriyan 	.read		= seq_read,
10545c9fe628SAlexey Dobriyan 	.llseek		= seq_lseek,
10555c9fe628SAlexey Dobriyan 	.release	= seq_release,
10565c9fe628SAlexey Dobriyan };
10575c9fe628SAlexey Dobriyan 
105879da826aSMichael Rubin enum writeback_stat_item {
105979da826aSMichael Rubin 	NR_DIRTY_THRESHOLD,
106079da826aSMichael Rubin 	NR_DIRTY_BG_THRESHOLD,
106179da826aSMichael Rubin 	NR_VM_WRITEBACK_STAT_ITEMS,
106279da826aSMichael Rubin };
106379da826aSMichael Rubin 
1064f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1065f6ac2354SChristoph Lameter {
10662244b95aSChristoph Lameter 	unsigned long *v;
106779da826aSMichael Rubin 	int i, stat_items_size;
1068f6ac2354SChristoph Lameter 
1069f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1070f6ac2354SChristoph Lameter 		return NULL;
107179da826aSMichael Rubin 	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
107279da826aSMichael Rubin 			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1073f6ac2354SChristoph Lameter 
1074f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
107579da826aSMichael Rubin 	stat_items_size += sizeof(struct vm_event_state);
1076f8891e5eSChristoph Lameter #endif
107779da826aSMichael Rubin 
107879da826aSMichael Rubin 	v = kmalloc(stat_items_size, GFP_KERNEL);
10792244b95aSChristoph Lameter 	m->private = v;
10802244b95aSChristoph Lameter 	if (!v)
1081f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
10822244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
10832244b95aSChristoph Lameter 		v[i] = global_page_state(i);
108479da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
108579da826aSMichael Rubin 
108679da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
108779da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
108879da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
108979da826aSMichael Rubin 
1090f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
109179da826aSMichael Rubin 	all_vm_events(v);
109279da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
109379da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1094f8891e5eSChristoph Lameter #endif
1095ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1096f6ac2354SChristoph Lameter }
1097f6ac2354SChristoph Lameter 
1098f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1099f6ac2354SChristoph Lameter {
1100f6ac2354SChristoph Lameter 	(*pos)++;
1101f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1102f6ac2354SChristoph Lameter 		return NULL;
1103f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1104f6ac2354SChristoph Lameter }
1105f6ac2354SChristoph Lameter 
1106f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1107f6ac2354SChristoph Lameter {
1108f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1109f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
1110f6ac2354SChristoph Lameter 
1111f6ac2354SChristoph Lameter 	seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1112f6ac2354SChristoph Lameter 	return 0;
1113f6ac2354SChristoph Lameter }
1114f6ac2354SChristoph Lameter 
1115f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1116f6ac2354SChristoph Lameter {
1117f6ac2354SChristoph Lameter 	kfree(m->private);
1118f6ac2354SChristoph Lameter 	m->private = NULL;
1119f6ac2354SChristoph Lameter }
1120f6ac2354SChristoph Lameter 
1121b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1122f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1123f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1124f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1125f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1126f6ac2354SChristoph Lameter };
1127f6ac2354SChristoph Lameter 
1128b6aa44abSAlexey Dobriyan static int vmstat_open(struct inode *inode, struct file *file)
1129b6aa44abSAlexey Dobriyan {
1130b6aa44abSAlexey Dobriyan 	return seq_open(file, &vmstat_op);
1131b6aa44abSAlexey Dobriyan }
1132b6aa44abSAlexey Dobriyan 
1133b6aa44abSAlexey Dobriyan static const struct file_operations proc_vmstat_file_operations = {
1134b6aa44abSAlexey Dobriyan 	.open		= vmstat_open,
1135b6aa44abSAlexey Dobriyan 	.read		= seq_read,
1136b6aa44abSAlexey Dobriyan 	.llseek		= seq_lseek,
1137b6aa44abSAlexey Dobriyan 	.release	= seq_release,
1138b6aa44abSAlexey Dobriyan };
1139f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1140f6ac2354SChristoph Lameter 
1141df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1142d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
114377461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1144d1187ed2SChristoph Lameter 
1145d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1146d1187ed2SChristoph Lameter {
1147d1187ed2SChristoph Lameter 	refresh_cpu_vm_stats(smp_processor_id());
114877461ab3SChristoph Lameter 	schedule_delayed_work(&__get_cpu_var(vmstat_work),
114998f4ebb2SAnton Blanchard 		round_jiffies_relative(sysctl_stat_interval));
1150d1187ed2SChristoph Lameter }
1151d1187ed2SChristoph Lameter 
115242614fcdSRandy Dunlap static void __cpuinit start_cpu_timer(int cpu)
1153d1187ed2SChristoph Lameter {
11541871e52cSTejun Heo 	struct delayed_work *work = &per_cpu(vmstat_work, cpu);
1155d1187ed2SChristoph Lameter 
11561871e52cSTejun Heo 	INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update);
11571871e52cSTejun Heo 	schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
1158d1187ed2SChristoph Lameter }
1159d1187ed2SChristoph Lameter 
1160df9ecabaSChristoph Lameter /*
1161df9ecabaSChristoph Lameter  * Use the cpu notifier to insure that the thresholds are recalculated
1162df9ecabaSChristoph Lameter  * when necessary.
1163df9ecabaSChristoph Lameter  */
1164df9ecabaSChristoph Lameter static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
1165df9ecabaSChristoph Lameter 		unsigned long action,
1166df9ecabaSChristoph Lameter 		void *hcpu)
1167df9ecabaSChristoph Lameter {
1168d1187ed2SChristoph Lameter 	long cpu = (long)hcpu;
1169d1187ed2SChristoph Lameter 
1170df9ecabaSChristoph Lameter 	switch (action) {
1171d1187ed2SChristoph Lameter 	case CPU_ONLINE:
1172d1187ed2SChristoph Lameter 	case CPU_ONLINE_FROZEN:
11735ee28a44SKAMEZAWA Hiroyuki 		refresh_zone_stat_thresholds();
1174d1187ed2SChristoph Lameter 		start_cpu_timer(cpu);
1175ad596925SChristoph Lameter 		node_set_state(cpu_to_node(cpu), N_CPU);
1176d1187ed2SChristoph Lameter 		break;
1177d1187ed2SChristoph Lameter 	case CPU_DOWN_PREPARE:
1178d1187ed2SChristoph Lameter 	case CPU_DOWN_PREPARE_FROZEN:
1179afe2c511STejun Heo 		cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1180d1187ed2SChristoph Lameter 		per_cpu(vmstat_work, cpu).work.func = NULL;
1181d1187ed2SChristoph Lameter 		break;
1182d1187ed2SChristoph Lameter 	case CPU_DOWN_FAILED:
1183d1187ed2SChristoph Lameter 	case CPU_DOWN_FAILED_FROZEN:
1184d1187ed2SChristoph Lameter 		start_cpu_timer(cpu);
1185d1187ed2SChristoph Lameter 		break;
1186df9ecabaSChristoph Lameter 	case CPU_DEAD:
11878bb78442SRafael J. Wysocki 	case CPU_DEAD_FROZEN:
1188df9ecabaSChristoph Lameter 		refresh_zone_stat_thresholds();
1189df9ecabaSChristoph Lameter 		break;
1190df9ecabaSChristoph Lameter 	default:
1191df9ecabaSChristoph Lameter 		break;
1192df9ecabaSChristoph Lameter 	}
1193df9ecabaSChristoph Lameter 	return NOTIFY_OK;
1194df9ecabaSChristoph Lameter }
1195df9ecabaSChristoph Lameter 
1196df9ecabaSChristoph Lameter static struct notifier_block __cpuinitdata vmstat_notifier =
1197df9ecabaSChristoph Lameter 	{ &vmstat_cpuup_callback, NULL, 0 };
11988f32f7e5SAlexey Dobriyan #endif
1199df9ecabaSChristoph Lameter 
1200e2fc88d0SAdrian Bunk static int __init setup_vmstat(void)
1201df9ecabaSChristoph Lameter {
12028f32f7e5SAlexey Dobriyan #ifdef CONFIG_SMP
1203d1187ed2SChristoph Lameter 	int cpu;
1204d1187ed2SChristoph Lameter 
1205df9ecabaSChristoph Lameter 	refresh_zone_stat_thresholds();
1206df9ecabaSChristoph Lameter 	register_cpu_notifier(&vmstat_notifier);
1207d1187ed2SChristoph Lameter 
1208d1187ed2SChristoph Lameter 	for_each_online_cpu(cpu)
1209d1187ed2SChristoph Lameter 		start_cpu_timer(cpu);
12108f32f7e5SAlexey Dobriyan #endif
12118f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
12128f32f7e5SAlexey Dobriyan 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
121374e2e8e8SAlexey Dobriyan 	proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1214b6aa44abSAlexey Dobriyan 	proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
12155c9fe628SAlexey Dobriyan 	proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
12168f32f7e5SAlexey Dobriyan #endif
1217df9ecabaSChristoph Lameter 	return 0;
1218df9ecabaSChristoph Lameter }
1219df9ecabaSChristoph Lameter module_init(setup_vmstat)
1220d7a5752cSMel Gorman 
1221d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1222d7a5752cSMel Gorman #include <linux/debugfs.h>
1223d7a5752cSMel Gorman 
1224d7a5752cSMel Gorman static struct dentry *extfrag_debug_root;
1225d7a5752cSMel Gorman 
1226d7a5752cSMel Gorman /*
1227d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
1228d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
1229d7a5752cSMel Gorman  */
1230d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
1231d7a5752cSMel Gorman 				struct contig_page_info *info)
1232d7a5752cSMel Gorman {
1233d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
1234d7a5752cSMel Gorman 	if (info->free_pages == 0)
1235d7a5752cSMel Gorman 		return 1000;
1236d7a5752cSMel Gorman 
1237d7a5752cSMel Gorman 	/*
1238d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
1239d7a5752cSMel Gorman 	 * decimal places.
1240d7a5752cSMel Gorman 	 *
1241d7a5752cSMel Gorman 	 * 0 => no fragmentation
1242d7a5752cSMel Gorman 	 * 1 => high fragmentation
1243d7a5752cSMel Gorman 	 */
1244d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1245d7a5752cSMel Gorman 
1246d7a5752cSMel Gorman }
1247d7a5752cSMel Gorman 
1248d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
1249d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1250d7a5752cSMel Gorman {
1251d7a5752cSMel Gorman 	unsigned int order;
1252d7a5752cSMel Gorman 	int index;
1253d7a5752cSMel Gorman 	struct contig_page_info info;
1254d7a5752cSMel Gorman 
1255d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1256d7a5752cSMel Gorman 				pgdat->node_id,
1257d7a5752cSMel Gorman 				zone->name);
1258d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1259d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
1260d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
1261d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1262d7a5752cSMel Gorman 	}
1263d7a5752cSMel Gorman 
1264d7a5752cSMel Gorman 	seq_putc(m, '\n');
1265d7a5752cSMel Gorman }
1266d7a5752cSMel Gorman 
1267d7a5752cSMel Gorman /*
1268d7a5752cSMel Gorman  * Display unusable free space index
1269d7a5752cSMel Gorman  *
1270d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
1271d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
1272d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
1273d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
1274d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
1275d7a5752cSMel Gorman  */
1276d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
1277d7a5752cSMel Gorman {
1278d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1279d7a5752cSMel Gorman 
1280d7a5752cSMel Gorman 	/* check memoryless node */
1281d7a5752cSMel Gorman 	if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
1282d7a5752cSMel Gorman 		return 0;
1283d7a5752cSMel Gorman 
1284d7a5752cSMel Gorman 	walk_zones_in_node(m, pgdat, unusable_show_print);
1285d7a5752cSMel Gorman 
1286d7a5752cSMel Gorman 	return 0;
1287d7a5752cSMel Gorman }
1288d7a5752cSMel Gorman 
1289d7a5752cSMel Gorman static const struct seq_operations unusable_op = {
1290d7a5752cSMel Gorman 	.start	= frag_start,
1291d7a5752cSMel Gorman 	.next	= frag_next,
1292d7a5752cSMel Gorman 	.stop	= frag_stop,
1293d7a5752cSMel Gorman 	.show	= unusable_show,
1294d7a5752cSMel Gorman };
1295d7a5752cSMel Gorman 
1296d7a5752cSMel Gorman static int unusable_open(struct inode *inode, struct file *file)
1297d7a5752cSMel Gorman {
1298d7a5752cSMel Gorman 	return seq_open(file, &unusable_op);
1299d7a5752cSMel Gorman }
1300d7a5752cSMel Gorman 
1301d7a5752cSMel Gorman static const struct file_operations unusable_file_ops = {
1302d7a5752cSMel Gorman 	.open		= unusable_open,
1303d7a5752cSMel Gorman 	.read		= seq_read,
1304d7a5752cSMel Gorman 	.llseek		= seq_lseek,
1305d7a5752cSMel Gorman 	.release	= seq_release,
1306d7a5752cSMel Gorman };
1307d7a5752cSMel Gorman 
1308f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
1309f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1310f1a5ab12SMel Gorman {
1311f1a5ab12SMel Gorman 	unsigned int order;
1312f1a5ab12SMel Gorman 	int index;
1313f1a5ab12SMel Gorman 
1314f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
1315f1a5ab12SMel Gorman 	struct contig_page_info info;
1316f1a5ab12SMel Gorman 
1317f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1318f1a5ab12SMel Gorman 				pgdat->node_id,
1319f1a5ab12SMel Gorman 				zone->name);
1320f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1321f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
132256de7263SMel Gorman 		index = __fragmentation_index(order, &info);
1323f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1324f1a5ab12SMel Gorman 	}
1325f1a5ab12SMel Gorman 
1326f1a5ab12SMel Gorman 	seq_putc(m, '\n');
1327f1a5ab12SMel Gorman }
1328f1a5ab12SMel Gorman 
1329f1a5ab12SMel Gorman /*
1330f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
1331f1a5ab12SMel Gorman  */
1332f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
1333f1a5ab12SMel Gorman {
1334f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1335f1a5ab12SMel Gorman 
1336f1a5ab12SMel Gorman 	walk_zones_in_node(m, pgdat, extfrag_show_print);
1337f1a5ab12SMel Gorman 
1338f1a5ab12SMel Gorman 	return 0;
1339f1a5ab12SMel Gorman }
1340f1a5ab12SMel Gorman 
1341f1a5ab12SMel Gorman static const struct seq_operations extfrag_op = {
1342f1a5ab12SMel Gorman 	.start	= frag_start,
1343f1a5ab12SMel Gorman 	.next	= frag_next,
1344f1a5ab12SMel Gorman 	.stop	= frag_stop,
1345f1a5ab12SMel Gorman 	.show	= extfrag_show,
1346f1a5ab12SMel Gorman };
1347f1a5ab12SMel Gorman 
1348f1a5ab12SMel Gorman static int extfrag_open(struct inode *inode, struct file *file)
1349f1a5ab12SMel Gorman {
1350f1a5ab12SMel Gorman 	return seq_open(file, &extfrag_op);
1351f1a5ab12SMel Gorman }
1352f1a5ab12SMel Gorman 
1353f1a5ab12SMel Gorman static const struct file_operations extfrag_file_ops = {
1354f1a5ab12SMel Gorman 	.open		= extfrag_open,
1355f1a5ab12SMel Gorman 	.read		= seq_read,
1356f1a5ab12SMel Gorman 	.llseek		= seq_lseek,
1357f1a5ab12SMel Gorman 	.release	= seq_release,
1358f1a5ab12SMel Gorman };
1359f1a5ab12SMel Gorman 
1360d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
1361d7a5752cSMel Gorman {
1362d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1363d7a5752cSMel Gorman 	if (!extfrag_debug_root)
1364d7a5752cSMel Gorman 		return -ENOMEM;
1365d7a5752cSMel Gorman 
1366d7a5752cSMel Gorman 	if (!debugfs_create_file("unusable_index", 0444,
1367d7a5752cSMel Gorman 			extfrag_debug_root, NULL, &unusable_file_ops))
1368d7a5752cSMel Gorman 		return -ENOMEM;
1369d7a5752cSMel Gorman 
1370f1a5ab12SMel Gorman 	if (!debugfs_create_file("extfrag_index", 0444,
1371f1a5ab12SMel Gorman 			extfrag_debug_root, NULL, &extfrag_file_ops))
1372f1a5ab12SMel Gorman 		return -ENOMEM;
1373f1a5ab12SMel Gorman 
1374d7a5752cSMel Gorman 	return 0;
1375d7a5752cSMel Gorman }
1376d7a5752cSMel Gorman 
1377d7a5752cSMel Gorman module_init(extfrag_debug_init);
1378d7a5752cSMel Gorman #endif
1379