xref: /linux/mm/vmstat.c (revision ccde8bd4014eb2f01102f7a64f0fad3df193b758)
1f6ac2354SChristoph Lameter /*
2f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
3f6ac2354SChristoph Lameter  *
4f6ac2354SChristoph Lameter  *  Manages VM statistics
5f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
62244b95aSChristoph Lameter  *
72244b95aSChristoph Lameter  *  zoned VM statistics
82244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
92244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
107cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
11f6ac2354SChristoph Lameter  */
128f32f7e5SAlexey Dobriyan #include <linux/fs.h>
13f6ac2354SChristoph Lameter #include <linux/mm.h>
144e950f6fSAlexey Dobriyan #include <linux/err.h>
152244b95aSChristoph Lameter #include <linux/module.h>
165a0e3ad6STejun Heo #include <linux/slab.h>
17df9ecabaSChristoph Lameter #include <linux/cpu.h>
187cc36bbdSChristoph Lameter #include <linux/cpumask.h>
19c748e134SAdrian Bunk #include <linux/vmstat.h>
203c486871SAndrew Morton #include <linux/proc_fs.h>
213c486871SAndrew Morton #include <linux/seq_file.h>
223c486871SAndrew Morton #include <linux/debugfs.h>
23e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
24f1a5ab12SMel Gorman #include <linux/math64.h>
2579da826aSMichael Rubin #include <linux/writeback.h>
2636deb0beSNamhyung Kim #include <linux/compaction.h>
276e543d57SLisa Du #include <linux/mm_inline.h>
2848c96a36SJoonsoo Kim #include <linux/page_ext.h>
2948c96a36SJoonsoo Kim #include <linux/page_owner.h>
306e543d57SLisa Du 
316e543d57SLisa Du #include "internal.h"
32f6ac2354SChristoph Lameter 
33f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
34f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
35f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
36f8891e5eSChristoph Lameter 
3731f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
38f8891e5eSChristoph Lameter {
399eccf2a8SChristoph Lameter 	int cpu;
40f8891e5eSChristoph Lameter 	int i;
41f8891e5eSChristoph Lameter 
42f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
43f8891e5eSChristoph Lameter 
4431f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
45f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
46f8891e5eSChristoph Lameter 
47f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
48f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
49f8891e5eSChristoph Lameter 	}
50f8891e5eSChristoph Lameter }
51f8891e5eSChristoph Lameter 
52f8891e5eSChristoph Lameter /*
53f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
54f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
55f8891e5eSChristoph Lameter  * during and after execution of this function.
56f8891e5eSChristoph Lameter */
57f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
58f8891e5eSChristoph Lameter {
59b5be1132SKOSAKI Motohiro 	get_online_cpus();
6031f961a8SMinchan Kim 	sum_vm_events(ret);
61b5be1132SKOSAKI Motohiro 	put_online_cpus();
62f8891e5eSChristoph Lameter }
6332dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
64f8891e5eSChristoph Lameter 
65f8891e5eSChristoph Lameter /*
66f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
67f8891e5eSChristoph Lameter  *
68f8891e5eSChristoph Lameter  * This is adding to the events on one processor
69f8891e5eSChristoph Lameter  * but keeps the global counts constant.
70f8891e5eSChristoph Lameter  */
71f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
72f8891e5eSChristoph Lameter {
73f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
74f8891e5eSChristoph Lameter 	int i;
75f8891e5eSChristoph Lameter 
76f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
77f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
78f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
79f8891e5eSChristoph Lameter 	}
80f8891e5eSChristoph Lameter }
81f8891e5eSChristoph Lameter 
82f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
83f8891e5eSChristoph Lameter 
842244b95aSChristoph Lameter /*
852244b95aSChristoph Lameter  * Manage combined zone based / global counters
862244b95aSChristoph Lameter  *
872244b95aSChristoph Lameter  * vm_stat contains the global counters
882244b95aSChristoph Lameter  */
89a1cb2c60SDimitri Sivanich atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
902244b95aSChristoph Lameter EXPORT_SYMBOL(vm_stat);
912244b95aSChristoph Lameter 
922244b95aSChristoph Lameter #ifdef CONFIG_SMP
932244b95aSChristoph Lameter 
94b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
9588f5acf8SMel Gorman {
9688f5acf8SMel Gorman 	int threshold;
9788f5acf8SMel Gorman 	int watermark_distance;
9888f5acf8SMel Gorman 
9988f5acf8SMel Gorman 	/*
10088f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
10188f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
10288f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
10388f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
10488f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
10588f5acf8SMel Gorman 	 * the min watermark
10688f5acf8SMel Gorman 	 */
10788f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
10888f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
10988f5acf8SMel Gorman 
11088f5acf8SMel Gorman 	/*
11188f5acf8SMel Gorman 	 * Maximum threshold is 125
11288f5acf8SMel Gorman 	 */
11388f5acf8SMel Gorman 	threshold = min(125, threshold);
11488f5acf8SMel Gorman 
11588f5acf8SMel Gorman 	return threshold;
11688f5acf8SMel Gorman }
11788f5acf8SMel Gorman 
118b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
119df9ecabaSChristoph Lameter {
120df9ecabaSChristoph Lameter 	int threshold;
121df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
1222244b95aSChristoph Lameter 
1232244b95aSChristoph Lameter 	/*
124df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
125df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
126df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
127df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
1282244b95aSChristoph Lameter 	 *
129df9ecabaSChristoph Lameter 	 * Some sample thresholds:
130df9ecabaSChristoph Lameter 	 *
131df9ecabaSChristoph Lameter 	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
132df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
133df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
134df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
135df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
136df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
137df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
138df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
139df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
140df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
141df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
142df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
143df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
144df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
145df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
146df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
147df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
148df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
149df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
150df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
1512244b95aSChristoph Lameter 	 */
152df9ecabaSChristoph Lameter 
153b40da049SJiang Liu 	mem = zone->managed_pages >> (27 - PAGE_SHIFT);
154df9ecabaSChristoph Lameter 
155df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
156df9ecabaSChristoph Lameter 
157df9ecabaSChristoph Lameter 	/*
158df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
159df9ecabaSChristoph Lameter 	 */
160df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
161df9ecabaSChristoph Lameter 
162df9ecabaSChristoph Lameter 	return threshold;
163df9ecabaSChristoph Lameter }
164df9ecabaSChristoph Lameter 
165df9ecabaSChristoph Lameter /*
166df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
167df9ecabaSChristoph Lameter  */
168a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
1692244b95aSChristoph Lameter {
170df9ecabaSChristoph Lameter 	struct zone *zone;
171df9ecabaSChristoph Lameter 	int cpu;
172df9ecabaSChristoph Lameter 	int threshold;
173df9ecabaSChristoph Lameter 
174ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
175aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
176aa454840SChristoph Lameter 
177b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
178df9ecabaSChristoph Lameter 
179df9ecabaSChristoph Lameter 		for_each_online_cpu(cpu)
18099dcc3e5SChristoph Lameter 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
18199dcc3e5SChristoph Lameter 							= threshold;
182aa454840SChristoph Lameter 
183aa454840SChristoph Lameter 		/*
184aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
185aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
186aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
187aa454840SChristoph Lameter 		 */
188aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
189aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
190aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
191aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
192aa454840SChristoph Lameter 					max_drift;
193df9ecabaSChristoph Lameter 	}
1942244b95aSChristoph Lameter }
1952244b95aSChristoph Lameter 
196b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
197b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
19888f5acf8SMel Gorman {
19988f5acf8SMel Gorman 	struct zone *zone;
20088f5acf8SMel Gorman 	int cpu;
20188f5acf8SMel Gorman 	int threshold;
20288f5acf8SMel Gorman 	int i;
20388f5acf8SMel Gorman 
20488f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
20588f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
20688f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
20788f5acf8SMel Gorman 			continue;
20888f5acf8SMel Gorman 
209b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
210bb0b6dffSMel Gorman 		for_each_online_cpu(cpu)
21188f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
21288f5acf8SMel Gorman 							= threshold;
21388f5acf8SMel Gorman 	}
21488f5acf8SMel Gorman }
21588f5acf8SMel Gorman 
2162244b95aSChristoph Lameter /*
217bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
218bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
219bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
2202244b95aSChristoph Lameter  */
2212244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
2226cdb18adSHeiko Carstens 			   long delta)
2232244b95aSChristoph Lameter {
22412938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
22512938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
2262244b95aSChristoph Lameter 	long x;
22712938a92SChristoph Lameter 	long t;
2282244b95aSChristoph Lameter 
22912938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
2302244b95aSChristoph Lameter 
23112938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
23212938a92SChristoph Lameter 
23312938a92SChristoph Lameter 	if (unlikely(x > t || x < -t)) {
2342244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
2352244b95aSChristoph Lameter 		x = 0;
2362244b95aSChristoph Lameter 	}
23712938a92SChristoph Lameter 	__this_cpu_write(*p, x);
2382244b95aSChristoph Lameter }
2392244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
2402244b95aSChristoph Lameter 
2412244b95aSChristoph Lameter /*
2422244b95aSChristoph Lameter  * Optimized increment and decrement functions.
2432244b95aSChristoph Lameter  *
2442244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
2452244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
2462244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
2472244b95aSChristoph Lameter  *
2482244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
2492244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
2502244b95aSChristoph Lameter  * generate better code.
2512244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
2522244b95aSChristoph Lameter  * be omitted.
2532244b95aSChristoph Lameter  *
254df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
255df9ecabaSChristoph Lameter  * with care.
256df9ecabaSChristoph Lameter  *
2572244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
2582244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
2592244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
2602244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
2612244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
2622244b95aSChristoph Lameter  * in a useful way here.
2632244b95aSChristoph Lameter  */
264c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
2652244b95aSChristoph Lameter {
26612938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
26712938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
26812938a92SChristoph Lameter 	s8 v, t;
2692244b95aSChristoph Lameter 
270908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
27112938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
27212938a92SChristoph Lameter 	if (unlikely(v > t)) {
27312938a92SChristoph Lameter 		s8 overstep = t >> 1;
2742244b95aSChristoph Lameter 
27512938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
27612938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
2772244b95aSChristoph Lameter 	}
2782244b95aSChristoph Lameter }
279ca889e6cSChristoph Lameter 
280ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
281ca889e6cSChristoph Lameter {
282ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
283ca889e6cSChristoph Lameter }
2842244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
2852244b95aSChristoph Lameter 
286c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
2872244b95aSChristoph Lameter {
28812938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
28912938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
29012938a92SChristoph Lameter 	s8 v, t;
2912244b95aSChristoph Lameter 
292908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
29312938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
29412938a92SChristoph Lameter 	if (unlikely(v < - t)) {
29512938a92SChristoph Lameter 		s8 overstep = t >> 1;
2962244b95aSChristoph Lameter 
29712938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
29812938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
2992244b95aSChristoph Lameter 	}
3002244b95aSChristoph Lameter }
301c8785385SChristoph Lameter 
302c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
303c8785385SChristoph Lameter {
304c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
305c8785385SChristoph Lameter }
3062244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
3072244b95aSChristoph Lameter 
3084156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
3097c839120SChristoph Lameter /*
3107c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
3117c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
3127c839120SChristoph Lameter  *
3137c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
3147c839120SChristoph Lameter  * operations.
3157c839120SChristoph Lameter  *
3167c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
3177c839120SChristoph Lameter  *     0       No overstepping
3187c839120SChristoph Lameter  *     1       Overstepping half of threshold
3197c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
3207c839120SChristoph Lameter */
3216cdb18adSHeiko Carstens static inline void mod_state(struct zone *zone, enum zone_stat_item item,
3226cdb18adSHeiko Carstens 			     long delta, int overstep_mode)
3237c839120SChristoph Lameter {
3247c839120SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
3257c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
3267c839120SChristoph Lameter 	long o, n, t, z;
3277c839120SChristoph Lameter 
3287c839120SChristoph Lameter 	do {
3297c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
3307c839120SChristoph Lameter 
3317c839120SChristoph Lameter 		/*
3327c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
3337c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
334d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
335d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
336d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
337d3bc2367SChristoph Lameter 		 *
338d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
339d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
3407c839120SChristoph Lameter 		 */
3417c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
3427c839120SChristoph Lameter 
3437c839120SChristoph Lameter 		o = this_cpu_read(*p);
3447c839120SChristoph Lameter 		n = delta + o;
3457c839120SChristoph Lameter 
3467c839120SChristoph Lameter 		if (n > t || n < -t) {
3477c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
3487c839120SChristoph Lameter 
3497c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
3507c839120SChristoph Lameter 			z = n + os;
3517c839120SChristoph Lameter 			n = -os;
3527c839120SChristoph Lameter 		}
3537c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
3547c839120SChristoph Lameter 
3557c839120SChristoph Lameter 	if (z)
3567c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
3577c839120SChristoph Lameter }
3587c839120SChristoph Lameter 
3597c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3606cdb18adSHeiko Carstens 			 long delta)
3617c839120SChristoph Lameter {
3627c839120SChristoph Lameter 	mod_state(zone, item, delta, 0);
3637c839120SChristoph Lameter }
3647c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
3657c839120SChristoph Lameter 
3667c839120SChristoph Lameter void inc_zone_state(struct zone *zone, enum zone_stat_item item)
3677c839120SChristoph Lameter {
3687c839120SChristoph Lameter 	mod_state(zone, item, 1, 1);
3697c839120SChristoph Lameter }
3707c839120SChristoph Lameter 
3717c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
3727c839120SChristoph Lameter {
3737c839120SChristoph Lameter 	mod_state(page_zone(page), item, 1, 1);
3747c839120SChristoph Lameter }
3757c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
3767c839120SChristoph Lameter 
3777c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
3787c839120SChristoph Lameter {
3797c839120SChristoph Lameter 	mod_state(page_zone(page), item, -1, -1);
3807c839120SChristoph Lameter }
3817c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
3827c839120SChristoph Lameter #else
3837c839120SChristoph Lameter /*
3847c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
3857c839120SChristoph Lameter  */
3867c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
3876cdb18adSHeiko Carstens 			 long delta)
3887c839120SChristoph Lameter {
3897c839120SChristoph Lameter 	unsigned long flags;
3907c839120SChristoph Lameter 
3917c839120SChristoph Lameter 	local_irq_save(flags);
3927c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
3937c839120SChristoph Lameter 	local_irq_restore(flags);
3947c839120SChristoph Lameter }
3957c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
3967c839120SChristoph Lameter 
397ca889e6cSChristoph Lameter void inc_zone_state(struct zone *zone, enum zone_stat_item item)
398ca889e6cSChristoph Lameter {
399ca889e6cSChristoph Lameter 	unsigned long flags;
400ca889e6cSChristoph Lameter 
401ca889e6cSChristoph Lameter 	local_irq_save(flags);
402ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
403ca889e6cSChristoph Lameter 	local_irq_restore(flags);
404ca889e6cSChristoph Lameter }
405ca889e6cSChristoph Lameter 
4062244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
4072244b95aSChristoph Lameter {
4082244b95aSChristoph Lameter 	unsigned long flags;
4092244b95aSChristoph Lameter 	struct zone *zone;
4102244b95aSChristoph Lameter 
4112244b95aSChristoph Lameter 	zone = page_zone(page);
4122244b95aSChristoph Lameter 	local_irq_save(flags);
413ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
4142244b95aSChristoph Lameter 	local_irq_restore(flags);
4152244b95aSChristoph Lameter }
4162244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
4172244b95aSChristoph Lameter 
4182244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
4192244b95aSChristoph Lameter {
4202244b95aSChristoph Lameter 	unsigned long flags;
4212244b95aSChristoph Lameter 
4222244b95aSChristoph Lameter 	local_irq_save(flags);
423a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
4242244b95aSChristoph Lameter 	local_irq_restore(flags);
4252244b95aSChristoph Lameter }
4262244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
4277c839120SChristoph Lameter #endif
4282244b95aSChristoph Lameter 
4297cc36bbdSChristoph Lameter 
4307cc36bbdSChristoph Lameter /*
4317cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
4327cc36bbdSChristoph Lameter  * Returns the number of counters updated.
4337cc36bbdSChristoph Lameter  */
4347cc36bbdSChristoph Lameter static int fold_diff(int *diff)
4354edb0748SChristoph Lameter {
4364edb0748SChristoph Lameter 	int i;
4377cc36bbdSChristoph Lameter 	int changes = 0;
4384edb0748SChristoph Lameter 
4394edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
4407cc36bbdSChristoph Lameter 		if (diff[i]) {
4414edb0748SChristoph Lameter 			atomic_long_add(diff[i], &vm_stat[i]);
4427cc36bbdSChristoph Lameter 			changes++;
4437cc36bbdSChristoph Lameter 	}
4447cc36bbdSChristoph Lameter 	return changes;
4454edb0748SChristoph Lameter }
4464edb0748SChristoph Lameter 
4472244b95aSChristoph Lameter /*
4482bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
449a7f75e25SChristoph Lameter  *
4504037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
4514037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
4524037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
4534037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
4544037d452SChristoph Lameter  * the processor.
4554037d452SChristoph Lameter  *
4564037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
4574037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
4584037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
4594037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
4607cc36bbdSChristoph Lameter  *
4617cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
4622244b95aSChristoph Lameter  */
4630eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
4642244b95aSChristoph Lameter {
4652244b95aSChristoph Lameter 	struct zone *zone;
4662244b95aSChristoph Lameter 	int i;
467a7f75e25SChristoph Lameter 	int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
4687cc36bbdSChristoph Lameter 	int changes = 0;
4692244b95aSChristoph Lameter 
470ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
471fbc2edb0SChristoph Lameter 		struct per_cpu_pageset __percpu *p = zone->pageset;
4722244b95aSChristoph Lameter 
473fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
474a7f75e25SChristoph Lameter 			int v;
475a7f75e25SChristoph Lameter 
476fbc2edb0SChristoph Lameter 			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
477fbc2edb0SChristoph Lameter 			if (v) {
478fbc2edb0SChristoph Lameter 
479a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
480a7f75e25SChristoph Lameter 				global_diff[i] += v;
4814037d452SChristoph Lameter #ifdef CONFIG_NUMA
4824037d452SChristoph Lameter 				/* 3 seconds idle till flush */
483fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 3);
4844037d452SChristoph Lameter #endif
4852244b95aSChristoph Lameter 			}
486fbc2edb0SChristoph Lameter 		}
4874037d452SChristoph Lameter #ifdef CONFIG_NUMA
4880eb77e98SChristoph Lameter 		if (do_pagesets) {
4890eb77e98SChristoph Lameter 			cond_resched();
4904037d452SChristoph Lameter 			/*
4914037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
4924037d452SChristoph Lameter 			 * processor
4934037d452SChristoph Lameter 			 *
4944037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
4954037d452SChristoph Lameter 			 * if not then there is nothing to expire.
4964037d452SChristoph Lameter 			 */
497fbc2edb0SChristoph Lameter 			if (!__this_cpu_read(p->expire) ||
498fbc2edb0SChristoph Lameter 			       !__this_cpu_read(p->pcp.count))
4994037d452SChristoph Lameter 				continue;
5004037d452SChristoph Lameter 
5014037d452SChristoph Lameter 			/*
5024037d452SChristoph Lameter 			 * We never drain zones local to this processor.
5034037d452SChristoph Lameter 			 */
5044037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
505fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 0);
5064037d452SChristoph Lameter 				continue;
5074037d452SChristoph Lameter 			}
5084037d452SChristoph Lameter 
509fbc2edb0SChristoph Lameter 			if (__this_cpu_dec_return(p->expire))
5104037d452SChristoph Lameter 				continue;
5114037d452SChristoph Lameter 
5127cc36bbdSChristoph Lameter 			if (__this_cpu_read(p->pcp.count)) {
5137c8e0181SChristoph Lameter 				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
5147cc36bbdSChristoph Lameter 				changes++;
5157cc36bbdSChristoph Lameter 			}
5160eb77e98SChristoph Lameter 		}
5174037d452SChristoph Lameter #endif
5182244b95aSChristoph Lameter 	}
5197cc36bbdSChristoph Lameter 	changes += fold_diff(global_diff);
5207cc36bbdSChristoph Lameter 	return changes;
5212244b95aSChristoph Lameter }
5222244b95aSChristoph Lameter 
52340f4b1eaSCody P Schafer /*
5242bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
5252bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
5262bb921e5SChristoph Lameter  * synchronization is simplified.
5272bb921e5SChristoph Lameter  */
5282bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
5292bb921e5SChristoph Lameter {
5302bb921e5SChristoph Lameter 	struct zone *zone;
5312bb921e5SChristoph Lameter 	int i;
5322bb921e5SChristoph Lameter 	int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
5332bb921e5SChristoph Lameter 
5342bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
5352bb921e5SChristoph Lameter 		struct per_cpu_pageset *p;
5362bb921e5SChristoph Lameter 
5372bb921e5SChristoph Lameter 		p = per_cpu_ptr(zone->pageset, cpu);
5382bb921e5SChristoph Lameter 
5392bb921e5SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
5402bb921e5SChristoph Lameter 			if (p->vm_stat_diff[i]) {
5412bb921e5SChristoph Lameter 				int v;
5422bb921e5SChristoph Lameter 
5432bb921e5SChristoph Lameter 				v = p->vm_stat_diff[i];
5442bb921e5SChristoph Lameter 				p->vm_stat_diff[i] = 0;
5452bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
5462bb921e5SChristoph Lameter 				global_diff[i] += v;
5472bb921e5SChristoph Lameter 			}
5482bb921e5SChristoph Lameter 	}
5492bb921e5SChristoph Lameter 
5504edb0748SChristoph Lameter 	fold_diff(global_diff);
5512bb921e5SChristoph Lameter }
5522bb921e5SChristoph Lameter 
5532bb921e5SChristoph Lameter /*
55440f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
55540f4b1eaSCody P Schafer  * pset->vm_stat_diff[] exsist.
55640f4b1eaSCody P Schafer  */
5575a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
5585a883813SMinchan Kim {
5595a883813SMinchan Kim 	int i;
5605a883813SMinchan Kim 
5615a883813SMinchan Kim 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
5625a883813SMinchan Kim 		if (pset->vm_stat_diff[i]) {
5635a883813SMinchan Kim 			int v = pset->vm_stat_diff[i];
5645a883813SMinchan Kim 			pset->vm_stat_diff[i] = 0;
5655a883813SMinchan Kim 			atomic_long_add(v, &zone->vm_stat[i]);
5665a883813SMinchan Kim 			atomic_long_add(v, &vm_stat[i]);
5675a883813SMinchan Kim 		}
5685a883813SMinchan Kim }
5692244b95aSChristoph Lameter #endif
5702244b95aSChristoph Lameter 
571ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
572ca889e6cSChristoph Lameter /*
573ca889e6cSChristoph Lameter  * zonelist = the list of zones passed to the allocator
574ca889e6cSChristoph Lameter  * z 	    = the zone from which the allocation occurred.
575ca889e6cSChristoph Lameter  *
576ca889e6cSChristoph Lameter  * Must be called with interrupts disabled.
57778afd561SAndi Kleen  *
57878afd561SAndi Kleen  * When __GFP_OTHER_NODE is set assume the node of the preferred
57978afd561SAndi Kleen  * zone is the local node. This is useful for daemons who allocate
58078afd561SAndi Kleen  * memory on behalf of other processes.
581ca889e6cSChristoph Lameter  */
58278afd561SAndi Kleen void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
583ca889e6cSChristoph Lameter {
58418ea7e71SMel Gorman 	if (z->zone_pgdat == preferred_zone->zone_pgdat) {
585ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_HIT);
586ca889e6cSChristoph Lameter 	} else {
587ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_MISS);
58818ea7e71SMel Gorman 		__inc_zone_state(preferred_zone, NUMA_FOREIGN);
589ca889e6cSChristoph Lameter 	}
59078afd561SAndi Kleen 	if (z->node == ((flags & __GFP_OTHER_NODE) ?
59178afd561SAndi Kleen 			preferred_zone->node : numa_node_id()))
592ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_LOCAL);
593ca889e6cSChristoph Lameter 	else
594ca889e6cSChristoph Lameter 		__inc_zone_state(z, NUMA_OTHER);
595ca889e6cSChristoph Lameter }
596c2d42c16SAndrew Morton 
597c2d42c16SAndrew Morton /*
598c2d42c16SAndrew Morton  * Determine the per node value of a stat item.
599c2d42c16SAndrew Morton  */
600c2d42c16SAndrew Morton unsigned long node_page_state(int node, enum zone_stat_item item)
601c2d42c16SAndrew Morton {
602c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
603c2d42c16SAndrew Morton 
604c2d42c16SAndrew Morton 	return
605c2d42c16SAndrew Morton #ifdef CONFIG_ZONE_DMA
606c2d42c16SAndrew Morton 		zone_page_state(&zones[ZONE_DMA], item) +
607c2d42c16SAndrew Morton #endif
608c2d42c16SAndrew Morton #ifdef CONFIG_ZONE_DMA32
609c2d42c16SAndrew Morton 		zone_page_state(&zones[ZONE_DMA32], item) +
610c2d42c16SAndrew Morton #endif
611c2d42c16SAndrew Morton #ifdef CONFIG_HIGHMEM
612c2d42c16SAndrew Morton 		zone_page_state(&zones[ZONE_HIGHMEM], item) +
613c2d42c16SAndrew Morton #endif
614c2d42c16SAndrew Morton 		zone_page_state(&zones[ZONE_NORMAL], item) +
615c2d42c16SAndrew Morton 		zone_page_state(&zones[ZONE_MOVABLE], item);
616c2d42c16SAndrew Morton }
617c2d42c16SAndrew Morton 
618ca889e6cSChristoph Lameter #endif
619ca889e6cSChristoph Lameter 
620d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
62136deb0beSNamhyung Kim 
622d7a5752cSMel Gorman struct contig_page_info {
623d7a5752cSMel Gorman 	unsigned long free_pages;
624d7a5752cSMel Gorman 	unsigned long free_blocks_total;
625d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
626d7a5752cSMel Gorman };
627d7a5752cSMel Gorman 
628d7a5752cSMel Gorman /*
629d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
630d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
631d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
632d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
633d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
634d7a5752cSMel Gorman  * figured out from userspace
635d7a5752cSMel Gorman  */
636d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
637d7a5752cSMel Gorman 				unsigned int suitable_order,
638d7a5752cSMel Gorman 				struct contig_page_info *info)
639d7a5752cSMel Gorman {
640d7a5752cSMel Gorman 	unsigned int order;
641d7a5752cSMel Gorman 
642d7a5752cSMel Gorman 	info->free_pages = 0;
643d7a5752cSMel Gorman 	info->free_blocks_total = 0;
644d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
645d7a5752cSMel Gorman 
646d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
647d7a5752cSMel Gorman 		unsigned long blocks;
648d7a5752cSMel Gorman 
649d7a5752cSMel Gorman 		/* Count number of free blocks */
650d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
651d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
652d7a5752cSMel Gorman 
653d7a5752cSMel Gorman 		/* Count free base pages */
654d7a5752cSMel Gorman 		info->free_pages += blocks << order;
655d7a5752cSMel Gorman 
656d7a5752cSMel Gorman 		/* Count the suitable free blocks */
657d7a5752cSMel Gorman 		if (order >= suitable_order)
658d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
659d7a5752cSMel Gorman 						(order - suitable_order);
660d7a5752cSMel Gorman 	}
661d7a5752cSMel Gorman }
662f1a5ab12SMel Gorman 
663f1a5ab12SMel Gorman /*
664f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
665f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
666f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
667f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
668f1a5ab12SMel Gorman  * should be used
669f1a5ab12SMel Gorman  */
67056de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
671f1a5ab12SMel Gorman {
672f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
673f1a5ab12SMel Gorman 
674f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
675f1a5ab12SMel Gorman 		return 0;
676f1a5ab12SMel Gorman 
677f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
678f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
679f1a5ab12SMel Gorman 		return -1000;
680f1a5ab12SMel Gorman 
681f1a5ab12SMel Gorman 	/*
682f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
683f1a5ab12SMel Gorman 	 *
684f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
685f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
686f1a5ab12SMel Gorman 	 */
687f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
688f1a5ab12SMel Gorman }
68956de7263SMel Gorman 
69056de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
69156de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
69256de7263SMel Gorman {
69356de7263SMel Gorman 	struct contig_page_info info;
69456de7263SMel Gorman 
69556de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
69656de7263SMel Gorman 	return __fragmentation_index(order, &info);
69756de7263SMel Gorman }
698d7a5752cSMel Gorman #endif
699d7a5752cSMel Gorman 
7000d6617c7SDavid Rientjes #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
701fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
702fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
703fa25c503SKOSAKI Motohiro #else
704fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
705fa25c503SKOSAKI Motohiro #endif
706fa25c503SKOSAKI Motohiro 
707fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
708fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
709fa25c503SKOSAKI Motohiro #else
710fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
711fa25c503SKOSAKI Motohiro #endif
712fa25c503SKOSAKI Motohiro 
713fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
714fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
715fa25c503SKOSAKI Motohiro #else
716fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
717fa25c503SKOSAKI Motohiro #endif
718fa25c503SKOSAKI Motohiro 
719fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
720fa25c503SKOSAKI Motohiro 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
721fa25c503SKOSAKI Motohiro 
722fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
72309316c09SKonstantin Khlebnikov 	/* enum zone_stat_item countes */
724fa25c503SKOSAKI Motohiro 	"nr_free_pages",
72581c0a2bbSJohannes Weiner 	"nr_alloc_batch",
726fa25c503SKOSAKI Motohiro 	"nr_inactive_anon",
727fa25c503SKOSAKI Motohiro 	"nr_active_anon",
728fa25c503SKOSAKI Motohiro 	"nr_inactive_file",
729fa25c503SKOSAKI Motohiro 	"nr_active_file",
730fa25c503SKOSAKI Motohiro 	"nr_unevictable",
731fa25c503SKOSAKI Motohiro 	"nr_mlock",
732fa25c503SKOSAKI Motohiro 	"nr_anon_pages",
733fa25c503SKOSAKI Motohiro 	"nr_mapped",
734fa25c503SKOSAKI Motohiro 	"nr_file_pages",
735fa25c503SKOSAKI Motohiro 	"nr_dirty",
736fa25c503SKOSAKI Motohiro 	"nr_writeback",
737fa25c503SKOSAKI Motohiro 	"nr_slab_reclaimable",
738fa25c503SKOSAKI Motohiro 	"nr_slab_unreclaimable",
739fa25c503SKOSAKI Motohiro 	"nr_page_table_pages",
740fa25c503SKOSAKI Motohiro 	"nr_kernel_stack",
741fa25c503SKOSAKI Motohiro 	"nr_unstable",
742fa25c503SKOSAKI Motohiro 	"nr_bounce",
743fa25c503SKOSAKI Motohiro 	"nr_vmscan_write",
74449ea7eb6SMel Gorman 	"nr_vmscan_immediate_reclaim",
745fa25c503SKOSAKI Motohiro 	"nr_writeback_temp",
746fa25c503SKOSAKI Motohiro 	"nr_isolated_anon",
747fa25c503SKOSAKI Motohiro 	"nr_isolated_file",
748fa25c503SKOSAKI Motohiro 	"nr_shmem",
749fa25c503SKOSAKI Motohiro 	"nr_dirtied",
750fa25c503SKOSAKI Motohiro 	"nr_written",
7510d5d823aSMel Gorman 	"nr_pages_scanned",
752fa25c503SKOSAKI Motohiro 
753fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
754fa25c503SKOSAKI Motohiro 	"numa_hit",
755fa25c503SKOSAKI Motohiro 	"numa_miss",
756fa25c503SKOSAKI Motohiro 	"numa_foreign",
757fa25c503SKOSAKI Motohiro 	"numa_interleave",
758fa25c503SKOSAKI Motohiro 	"numa_local",
759fa25c503SKOSAKI Motohiro 	"numa_other",
760fa25c503SKOSAKI Motohiro #endif
761a528910eSJohannes Weiner 	"workingset_refault",
762a528910eSJohannes Weiner 	"workingset_activate",
763449dd698SJohannes Weiner 	"workingset_nodereclaim",
764fa25c503SKOSAKI Motohiro 	"nr_anon_transparent_hugepages",
765d1ce749aSBartlomiej Zolnierkiewicz 	"nr_free_cma",
76609316c09SKonstantin Khlebnikov 
76709316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
768fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
769fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
770fa25c503SKOSAKI Motohiro 
771fa25c503SKOSAKI Motohiro #ifdef CONFIG_VM_EVENT_COUNTERS
77209316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
773fa25c503SKOSAKI Motohiro 	"pgpgin",
774fa25c503SKOSAKI Motohiro 	"pgpgout",
775fa25c503SKOSAKI Motohiro 	"pswpin",
776fa25c503SKOSAKI Motohiro 	"pswpout",
777fa25c503SKOSAKI Motohiro 
778fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
779fa25c503SKOSAKI Motohiro 
780fa25c503SKOSAKI Motohiro 	"pgfree",
781fa25c503SKOSAKI Motohiro 	"pgactivate",
782fa25c503SKOSAKI Motohiro 	"pgdeactivate",
783fa25c503SKOSAKI Motohiro 
784fa25c503SKOSAKI Motohiro 	"pgfault",
785fa25c503SKOSAKI Motohiro 	"pgmajfault",
786854e9ed0SMinchan Kim 	"pglazyfreed",
787fa25c503SKOSAKI Motohiro 
788fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgrefill")
789904249aaSYing Han 	TEXTS_FOR_ZONES("pgsteal_kswapd")
790904249aaSYing Han 	TEXTS_FOR_ZONES("pgsteal_direct")
791fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgscan_kswapd")
792fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgscan_direct")
79368243e76SMel Gorman 	"pgscan_direct_throttle",
794fa25c503SKOSAKI Motohiro 
795fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
796fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
797fa25c503SKOSAKI Motohiro #endif
798fa25c503SKOSAKI Motohiro 	"pginodesteal",
799fa25c503SKOSAKI Motohiro 	"slabs_scanned",
800fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
801fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
802fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
803fa25c503SKOSAKI Motohiro 	"pageoutrun",
804fa25c503SKOSAKI Motohiro 	"allocstall",
805fa25c503SKOSAKI Motohiro 
806fa25c503SKOSAKI Motohiro 	"pgrotated",
807fa25c503SKOSAKI Motohiro 
8085509a5d2SDave Hansen 	"drop_pagecache",
8095509a5d2SDave Hansen 	"drop_slab",
8105509a5d2SDave Hansen 
81103c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
81203c5a6e1SMel Gorman 	"numa_pte_updates",
81372403b4aSMel Gorman 	"numa_huge_pte_updates",
81403c5a6e1SMel Gorman 	"numa_hint_faults",
81503c5a6e1SMel Gorman 	"numa_hint_faults_local",
81603c5a6e1SMel Gorman 	"numa_pages_migrated",
81703c5a6e1SMel Gorman #endif
8185647bc29SMel Gorman #ifdef CONFIG_MIGRATION
8195647bc29SMel Gorman 	"pgmigrate_success",
8205647bc29SMel Gorman 	"pgmigrate_fail",
8215647bc29SMel Gorman #endif
822fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
823397487dbSMel Gorman 	"compact_migrate_scanned",
824397487dbSMel Gorman 	"compact_free_scanned",
825397487dbSMel Gorman 	"compact_isolated",
826fa25c503SKOSAKI Motohiro 	"compact_stall",
827fa25c503SKOSAKI Motohiro 	"compact_fail",
828fa25c503SKOSAKI Motohiro 	"compact_success",
829fa25c503SKOSAKI Motohiro #endif
830fa25c503SKOSAKI Motohiro 
831fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
832fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
833fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
834fa25c503SKOSAKI Motohiro #endif
835fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
836fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
837fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
838fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
839fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
840fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
841fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
842fa25c503SKOSAKI Motohiro 
843fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
844fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
845fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
846fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
847fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
848122afea9SKirill A. Shutemov 	"thp_split_page",
849122afea9SKirill A. Shutemov 	"thp_split_page_failed",
850122afea9SKirill A. Shutemov 	"thp_split_pmd",
851d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
852d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
853fa25c503SKOSAKI Motohiro #endif
85409316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
85509316c09SKonstantin Khlebnikov 	"balloon_inflate",
85609316c09SKonstantin Khlebnikov 	"balloon_deflate",
85709316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
85809316c09SKonstantin Khlebnikov 	"balloon_migrate",
85909316c09SKonstantin Khlebnikov #endif
86009316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
861ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
8626df46865SDave Hansen #ifdef CONFIG_SMP
8639824cf97SDave Hansen 	"nr_tlb_remote_flush",
8649824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
865ec659934SMel Gorman #endif /* CONFIG_SMP */
8669824cf97SDave Hansen 	"nr_tlb_local_flush_all",
8679824cf97SDave Hansen 	"nr_tlb_local_flush_one",
868ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
869fa25c503SKOSAKI Motohiro 
8704f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE
8714f115147SDavidlohr Bueso 	"vmacache_find_calls",
8724f115147SDavidlohr Bueso 	"vmacache_find_hits",
873f5f302e2SDavidlohr Bueso 	"vmacache_full_flushes",
8744f115147SDavidlohr Bueso #endif
875fa25c503SKOSAKI Motohiro #endif /* CONFIG_VM_EVENTS_COUNTERS */
876fa25c503SKOSAKI Motohiro };
8770d6617c7SDavid Rientjes #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
878fa25c503SKOSAKI Motohiro 
879fa25c503SKOSAKI Motohiro 
8803c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
8813c486871SAndrew Morton      defined(CONFIG_PROC_FS)
8823c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
8833c486871SAndrew Morton {
8843c486871SAndrew Morton 	pg_data_t *pgdat;
8853c486871SAndrew Morton 	loff_t node = *pos;
8863c486871SAndrew Morton 
8873c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
8883c486871SAndrew Morton 	     pgdat && node;
8893c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
8903c486871SAndrew Morton 		--node;
8913c486871SAndrew Morton 
8923c486871SAndrew Morton 	return pgdat;
8933c486871SAndrew Morton }
8943c486871SAndrew Morton 
8953c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
8963c486871SAndrew Morton {
8973c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
8983c486871SAndrew Morton 
8993c486871SAndrew Morton 	(*pos)++;
9003c486871SAndrew Morton 	return next_online_pgdat(pgdat);
9013c486871SAndrew Morton }
9023c486871SAndrew Morton 
9033c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
9043c486871SAndrew Morton {
9053c486871SAndrew Morton }
9063c486871SAndrew Morton 
9073c486871SAndrew Morton /* Walk all the zones in a node and print using a callback */
9083c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
9093c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
9103c486871SAndrew Morton {
9113c486871SAndrew Morton 	struct zone *zone;
9123c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
9133c486871SAndrew Morton 	unsigned long flags;
9143c486871SAndrew Morton 
9153c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
9163c486871SAndrew Morton 		if (!populated_zone(zone))
9173c486871SAndrew Morton 			continue;
9183c486871SAndrew Morton 
9193c486871SAndrew Morton 		spin_lock_irqsave(&zone->lock, flags);
9203c486871SAndrew Morton 		print(m, pgdat, zone);
9213c486871SAndrew Morton 		spin_unlock_irqrestore(&zone->lock, flags);
9223c486871SAndrew Morton 	}
9233c486871SAndrew Morton }
9243c486871SAndrew Morton #endif
9253c486871SAndrew Morton 
926d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
9273c486871SAndrew Morton static char * const migratetype_names[MIGRATE_TYPES] = {
9283c486871SAndrew Morton 	"Unmovable",
9293c486871SAndrew Morton 	"Movable",
930475a2f90SVlastimil Babka 	"Reclaimable",
9310aaa29a5SMel Gorman 	"HighAtomic",
9323c486871SAndrew Morton #ifdef CONFIG_CMA
9333c486871SAndrew Morton 	"CMA",
9343c486871SAndrew Morton #endif
9353c486871SAndrew Morton #ifdef CONFIG_MEMORY_ISOLATION
9363c486871SAndrew Morton 	"Isolate",
9373c486871SAndrew Morton #endif
9383c486871SAndrew Morton };
9393c486871SAndrew Morton 
940467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
941467c996cSMel Gorman 						struct zone *zone)
942467c996cSMel Gorman {
943467c996cSMel Gorman 	int order;
944467c996cSMel Gorman 
945f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
946f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
947f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
948f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
949f6ac2354SChristoph Lameter }
950467c996cSMel Gorman 
951467c996cSMel Gorman /*
952467c996cSMel Gorman  * This walks the free areas for each zone.
953467c996cSMel Gorman  */
954467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
955467c996cSMel Gorman {
956467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
957467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, frag_show_print);
958467c996cSMel Gorman 	return 0;
959467c996cSMel Gorman }
960467c996cSMel Gorman 
961467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
962467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
963467c996cSMel Gorman {
964467c996cSMel Gorman 	int order, mtype;
965467c996cSMel Gorman 
966467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
967467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
968467c996cSMel Gorman 					pgdat->node_id,
969467c996cSMel Gorman 					zone->name,
970467c996cSMel Gorman 					migratetype_names[mtype]);
971467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
972467c996cSMel Gorman 			unsigned long freecount = 0;
973467c996cSMel Gorman 			struct free_area *area;
974467c996cSMel Gorman 			struct list_head *curr;
975467c996cSMel Gorman 
976467c996cSMel Gorman 			area = &(zone->free_area[order]);
977467c996cSMel Gorman 
978467c996cSMel Gorman 			list_for_each(curr, &area->free_list[mtype])
979467c996cSMel Gorman 				freecount++;
980467c996cSMel Gorman 			seq_printf(m, "%6lu ", freecount);
981467c996cSMel Gorman 		}
982467c996cSMel Gorman 		seq_putc(m, '\n');
983467c996cSMel Gorman 	}
984467c996cSMel Gorman }
985467c996cSMel Gorman 
986467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
987467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
988467c996cSMel Gorman {
989467c996cSMel Gorman 	int order;
990467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
991467c996cSMel Gorman 
992467c996cSMel Gorman 	/* Print header */
993467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
994467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
995467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
996467c996cSMel Gorman 	seq_putc(m, '\n');
997467c996cSMel Gorman 
998467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
999467c996cSMel Gorman 
1000467c996cSMel Gorman 	return 0;
1001467c996cSMel Gorman }
1002467c996cSMel Gorman 
1003467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1004467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1005467c996cSMel Gorman {
1006467c996cSMel Gorman 	int mtype;
1007467c996cSMel Gorman 	unsigned long pfn;
1008467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1009108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1010467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1011467c996cSMel Gorman 
1012467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1013467c996cSMel Gorman 		struct page *page;
1014467c996cSMel Gorman 
1015467c996cSMel Gorman 		if (!pfn_valid(pfn))
1016467c996cSMel Gorman 			continue;
1017467c996cSMel Gorman 
1018467c996cSMel Gorman 		page = pfn_to_page(pfn);
1019eb33575cSMel Gorman 
1020eb33575cSMel Gorman 		/* Watch for unexpected holes punched in the memmap */
1021eb33575cSMel Gorman 		if (!memmap_valid_within(pfn, page, zone))
1022e80d6a24SMel Gorman 			continue;
1023eb33575cSMel Gorman 
1024467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1025467c996cSMel Gorman 
1026e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1027467c996cSMel Gorman 			count[mtype]++;
1028467c996cSMel Gorman 	}
1029467c996cSMel Gorman 
1030467c996cSMel Gorman 	/* Print counts */
1031467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1032467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1033467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1034467c996cSMel Gorman 	seq_putc(m, '\n');
1035467c996cSMel Gorman }
1036467c996cSMel Gorman 
1037467c996cSMel Gorman /* Print out the free pages at each order for each migratetype */
1038467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1039467c996cSMel Gorman {
1040467c996cSMel Gorman 	int mtype;
1041467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1042467c996cSMel Gorman 
1043467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1044467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1045467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1046467c996cSMel Gorman 	seq_putc(m, '\n');
1047467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
1048467c996cSMel Gorman 
1049467c996cSMel Gorman 	return 0;
1050467c996cSMel Gorman }
1051467c996cSMel Gorman 
105248c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
105348c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
105448c96a36SJoonsoo Kim 							pg_data_t *pgdat,
105548c96a36SJoonsoo Kim 							struct zone *zone)
105648c96a36SJoonsoo Kim {
105748c96a36SJoonsoo Kim 	struct page *page;
105848c96a36SJoonsoo Kim 	struct page_ext *page_ext;
105948c96a36SJoonsoo Kim 	unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
106048c96a36SJoonsoo Kim 	unsigned long end_pfn = pfn + zone->spanned_pages;
106148c96a36SJoonsoo Kim 	unsigned long count[MIGRATE_TYPES] = { 0, };
106248c96a36SJoonsoo Kim 	int pageblock_mt, page_mt;
106348c96a36SJoonsoo Kim 	int i;
106448c96a36SJoonsoo Kim 
106548c96a36SJoonsoo Kim 	/* Scan block by block. First and last block may be incomplete */
106648c96a36SJoonsoo Kim 	pfn = zone->zone_start_pfn;
106748c96a36SJoonsoo Kim 
106848c96a36SJoonsoo Kim 	/*
106948c96a36SJoonsoo Kim 	 * Walk the zone in pageblock_nr_pages steps. If a page block spans
107048c96a36SJoonsoo Kim 	 * a zone boundary, it will be double counted between zones. This does
107148c96a36SJoonsoo Kim 	 * not matter as the mixed block count will still be correct
107248c96a36SJoonsoo Kim 	 */
107348c96a36SJoonsoo Kim 	for (; pfn < end_pfn; ) {
107448c96a36SJoonsoo Kim 		if (!pfn_valid(pfn)) {
107548c96a36SJoonsoo Kim 			pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
107648c96a36SJoonsoo Kim 			continue;
107748c96a36SJoonsoo Kim 		}
107848c96a36SJoonsoo Kim 
107948c96a36SJoonsoo Kim 		block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
108048c96a36SJoonsoo Kim 		block_end_pfn = min(block_end_pfn, end_pfn);
108148c96a36SJoonsoo Kim 
108248c96a36SJoonsoo Kim 		page = pfn_to_page(pfn);
108348c96a36SJoonsoo Kim 		pageblock_mt = get_pfnblock_migratetype(page, pfn);
108448c96a36SJoonsoo Kim 
108548c96a36SJoonsoo Kim 		for (; pfn < block_end_pfn; pfn++) {
108648c96a36SJoonsoo Kim 			if (!pfn_valid_within(pfn))
108748c96a36SJoonsoo Kim 				continue;
108848c96a36SJoonsoo Kim 
108948c96a36SJoonsoo Kim 			page = pfn_to_page(pfn);
109048c96a36SJoonsoo Kim 			if (PageBuddy(page)) {
109148c96a36SJoonsoo Kim 				pfn += (1UL << page_order(page)) - 1;
109248c96a36SJoonsoo Kim 				continue;
109348c96a36SJoonsoo Kim 			}
109448c96a36SJoonsoo Kim 
109548c96a36SJoonsoo Kim 			if (PageReserved(page))
109648c96a36SJoonsoo Kim 				continue;
109748c96a36SJoonsoo Kim 
109848c96a36SJoonsoo Kim 			page_ext = lookup_page_ext(page);
109948c96a36SJoonsoo Kim 
110048c96a36SJoonsoo Kim 			if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
110148c96a36SJoonsoo Kim 				continue;
110248c96a36SJoonsoo Kim 
110348c96a36SJoonsoo Kim 			page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
110448c96a36SJoonsoo Kim 			if (pageblock_mt != page_mt) {
110548c96a36SJoonsoo Kim 				if (is_migrate_cma(pageblock_mt))
110648c96a36SJoonsoo Kim 					count[MIGRATE_MOVABLE]++;
110748c96a36SJoonsoo Kim 				else
110848c96a36SJoonsoo Kim 					count[pageblock_mt]++;
110948c96a36SJoonsoo Kim 
111048c96a36SJoonsoo Kim 				pfn = block_end_pfn;
111148c96a36SJoonsoo Kim 				break;
111248c96a36SJoonsoo Kim 			}
111348c96a36SJoonsoo Kim 			pfn += (1UL << page_ext->order) - 1;
111448c96a36SJoonsoo Kim 		}
111548c96a36SJoonsoo Kim 	}
111648c96a36SJoonsoo Kim 
111748c96a36SJoonsoo Kim 	/* Print counts */
111848c96a36SJoonsoo Kim 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
111948c96a36SJoonsoo Kim 	for (i = 0; i < MIGRATE_TYPES; i++)
112048c96a36SJoonsoo Kim 		seq_printf(m, "%12lu ", count[i]);
112148c96a36SJoonsoo Kim 	seq_putc(m, '\n');
112248c96a36SJoonsoo Kim }
112348c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
112448c96a36SJoonsoo Kim 
112548c96a36SJoonsoo Kim /*
112648c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
112748c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
112848c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
112948c96a36SJoonsoo Kim  * to determine what is going on
113048c96a36SJoonsoo Kim  */
113148c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
113248c96a36SJoonsoo Kim {
113348c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
113448c96a36SJoonsoo Kim 	int mtype;
113548c96a36SJoonsoo Kim 
113648c96a36SJoonsoo Kim 	if (!page_owner_inited)
113748c96a36SJoonsoo Kim 		return;
113848c96a36SJoonsoo Kim 
113948c96a36SJoonsoo Kim 	drain_all_pages(NULL);
114048c96a36SJoonsoo Kim 
114148c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
114248c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
114348c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
114448c96a36SJoonsoo Kim 	seq_putc(m, '\n');
114548c96a36SJoonsoo Kim 
114648c96a36SJoonsoo Kim 	walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
114748c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
114848c96a36SJoonsoo Kim }
114948c96a36SJoonsoo Kim 
1150467c996cSMel Gorman /*
1151467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1152467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1153467c996cSMel Gorman  */
1154467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1155467c996cSMel Gorman {
1156467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1157467c996cSMel Gorman 
115841b25a37SKOSAKI Motohiro 	/* check memoryless node */
1159a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
116041b25a37SKOSAKI Motohiro 		return 0;
116141b25a37SKOSAKI Motohiro 
1162467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1163467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1164467c996cSMel Gorman 	seq_putc(m, '\n');
1165467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1166467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
116748c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1168467c996cSMel Gorman 
1169f6ac2354SChristoph Lameter 	return 0;
1170f6ac2354SChristoph Lameter }
1171f6ac2354SChristoph Lameter 
11728f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1173f6ac2354SChristoph Lameter 	.start	= frag_start,
1174f6ac2354SChristoph Lameter 	.next	= frag_next,
1175f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1176f6ac2354SChristoph Lameter 	.show	= frag_show,
1177f6ac2354SChristoph Lameter };
1178f6ac2354SChristoph Lameter 
11798f32f7e5SAlexey Dobriyan static int fragmentation_open(struct inode *inode, struct file *file)
11808f32f7e5SAlexey Dobriyan {
11818f32f7e5SAlexey Dobriyan 	return seq_open(file, &fragmentation_op);
11828f32f7e5SAlexey Dobriyan }
11838f32f7e5SAlexey Dobriyan 
11848f32f7e5SAlexey Dobriyan static const struct file_operations fragmentation_file_operations = {
11858f32f7e5SAlexey Dobriyan 	.open		= fragmentation_open,
11868f32f7e5SAlexey Dobriyan 	.read		= seq_read,
11878f32f7e5SAlexey Dobriyan 	.llseek		= seq_lseek,
11888f32f7e5SAlexey Dobriyan 	.release	= seq_release,
11898f32f7e5SAlexey Dobriyan };
11908f32f7e5SAlexey Dobriyan 
119174e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1192467c996cSMel Gorman 	.start	= frag_start,
1193467c996cSMel Gorman 	.next	= frag_next,
1194467c996cSMel Gorman 	.stop	= frag_stop,
1195467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1196467c996cSMel Gorman };
1197467c996cSMel Gorman 
119874e2e8e8SAlexey Dobriyan static int pagetypeinfo_open(struct inode *inode, struct file *file)
119974e2e8e8SAlexey Dobriyan {
120074e2e8e8SAlexey Dobriyan 	return seq_open(file, &pagetypeinfo_op);
120174e2e8e8SAlexey Dobriyan }
120274e2e8e8SAlexey Dobriyan 
120374e2e8e8SAlexey Dobriyan static const struct file_operations pagetypeinfo_file_ops = {
120474e2e8e8SAlexey Dobriyan 	.open		= pagetypeinfo_open,
120574e2e8e8SAlexey Dobriyan 	.read		= seq_read,
120674e2e8e8SAlexey Dobriyan 	.llseek		= seq_lseek,
120774e2e8e8SAlexey Dobriyan 	.release	= seq_release,
120874e2e8e8SAlexey Dobriyan };
120974e2e8e8SAlexey Dobriyan 
1210467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1211467c996cSMel Gorman 							struct zone *zone)
1212f6ac2354SChristoph Lameter {
1213f6ac2354SChristoph Lameter 	int i;
1214f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1215f6ac2354SChristoph Lameter 	seq_printf(m,
1216f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1217f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1218f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1219f6ac2354SChristoph Lameter 		   "\n        high     %lu"
122008d9ae7cSWu Fengguang 		   "\n        scanned  %lu"
1221f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
12229feedc9dSJiang Liu 		   "\n        present  %lu"
12239feedc9dSJiang Liu 		   "\n        managed  %lu",
122488f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
122541858966SMel Gorman 		   min_wmark_pages(zone),
122641858966SMel Gorman 		   low_wmark_pages(zone),
122741858966SMel Gorman 		   high_wmark_pages(zone),
12280d5d823aSMel Gorman 		   zone_page_state(zone, NR_PAGES_SCANNED),
1229f6ac2354SChristoph Lameter 		   zone->spanned_pages,
12309feedc9dSJiang Liu 		   zone->present_pages,
12319feedc9dSJiang Liu 		   zone->managed_pages);
12322244b95aSChristoph Lameter 
12332244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
12342244b95aSChristoph Lameter 		seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
12352244b95aSChristoph Lameter 				zone_page_state(zone, i));
12362244b95aSChristoph Lameter 
1237f6ac2354SChristoph Lameter 	seq_printf(m,
12383484b2deSMel Gorman 		   "\n        protection: (%ld",
1239f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1240f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
12413484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1242f6ac2354SChristoph Lameter 	seq_printf(m,
1243f6ac2354SChristoph Lameter 		   ")"
1244f6ac2354SChristoph Lameter 		   "\n  pagesets");
1245f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
1246f6ac2354SChristoph Lameter 		struct per_cpu_pageset *pageset;
1247f6ac2354SChristoph Lameter 
124899dcc3e5SChristoph Lameter 		pageset = per_cpu_ptr(zone->pageset, i);
1249f6ac2354SChristoph Lameter 		seq_printf(m,
12503dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1251f6ac2354SChristoph Lameter 			   "\n              count: %i"
1252f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1253f6ac2354SChristoph Lameter 			   "\n              batch: %i",
12543dfa5721SChristoph Lameter 			   i,
12553dfa5721SChristoph Lameter 			   pageset->pcp.count,
12563dfa5721SChristoph Lameter 			   pageset->pcp.high,
12573dfa5721SChristoph Lameter 			   pageset->pcp.batch);
1258df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1259df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
1260df9ecabaSChristoph Lameter 				pageset->stat_threshold);
1261df9ecabaSChristoph Lameter #endif
1262f6ac2354SChristoph Lameter 	}
1263f6ac2354SChristoph Lameter 	seq_printf(m,
1264f6ac2354SChristoph Lameter 		   "\n  all_unreclaimable: %u"
1265556adecbSRik van Riel 		   "\n  start_pfn:         %lu"
1266556adecbSRik van Riel 		   "\n  inactive_ratio:    %u",
12676e543d57SLisa Du 		   !zone_reclaimable(zone),
1268556adecbSRik van Riel 		   zone->zone_start_pfn,
1269556adecbSRik van Riel 		   zone->inactive_ratio);
1270f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1271f6ac2354SChristoph Lameter }
1272467c996cSMel Gorman 
1273467c996cSMel Gorman /*
1274467c996cSMel Gorman  * Output information about zones in @pgdat.
1275467c996cSMel Gorman  */
1276467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1277467c996cSMel Gorman {
1278467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1279467c996cSMel Gorman 	walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1280f6ac2354SChristoph Lameter 	return 0;
1281f6ac2354SChristoph Lameter }
1282f6ac2354SChristoph Lameter 
12835c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1284f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1285f6ac2354SChristoph Lameter 			       * fragmentation. */
1286f6ac2354SChristoph Lameter 	.next	= frag_next,
1287f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1288f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1289f6ac2354SChristoph Lameter };
1290f6ac2354SChristoph Lameter 
12915c9fe628SAlexey Dobriyan static int zoneinfo_open(struct inode *inode, struct file *file)
12925c9fe628SAlexey Dobriyan {
12935c9fe628SAlexey Dobriyan 	return seq_open(file, &zoneinfo_op);
12945c9fe628SAlexey Dobriyan }
12955c9fe628SAlexey Dobriyan 
12965c9fe628SAlexey Dobriyan static const struct file_operations proc_zoneinfo_file_operations = {
12975c9fe628SAlexey Dobriyan 	.open		= zoneinfo_open,
12985c9fe628SAlexey Dobriyan 	.read		= seq_read,
12995c9fe628SAlexey Dobriyan 	.llseek		= seq_lseek,
13005c9fe628SAlexey Dobriyan 	.release	= seq_release,
13015c9fe628SAlexey Dobriyan };
13025c9fe628SAlexey Dobriyan 
130379da826aSMichael Rubin enum writeback_stat_item {
130479da826aSMichael Rubin 	NR_DIRTY_THRESHOLD,
130579da826aSMichael Rubin 	NR_DIRTY_BG_THRESHOLD,
130679da826aSMichael Rubin 	NR_VM_WRITEBACK_STAT_ITEMS,
130779da826aSMichael Rubin };
130879da826aSMichael Rubin 
1309f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1310f6ac2354SChristoph Lameter {
13112244b95aSChristoph Lameter 	unsigned long *v;
131279da826aSMichael Rubin 	int i, stat_items_size;
1313f6ac2354SChristoph Lameter 
1314f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1315f6ac2354SChristoph Lameter 		return NULL;
131679da826aSMichael Rubin 	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
131779da826aSMichael Rubin 			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1318f6ac2354SChristoph Lameter 
1319f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
132079da826aSMichael Rubin 	stat_items_size += sizeof(struct vm_event_state);
1321f8891e5eSChristoph Lameter #endif
132279da826aSMichael Rubin 
132379da826aSMichael Rubin 	v = kmalloc(stat_items_size, GFP_KERNEL);
13242244b95aSChristoph Lameter 	m->private = v;
13252244b95aSChristoph Lameter 	if (!v)
1326f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
13272244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
13282244b95aSChristoph Lameter 		v[i] = global_page_state(i);
132979da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
133079da826aSMichael Rubin 
133179da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
133279da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
133379da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
133479da826aSMichael Rubin 
1335f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
133679da826aSMichael Rubin 	all_vm_events(v);
133779da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
133879da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1339f8891e5eSChristoph Lameter #endif
1340ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1341f6ac2354SChristoph Lameter }
1342f6ac2354SChristoph Lameter 
1343f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1344f6ac2354SChristoph Lameter {
1345f6ac2354SChristoph Lameter 	(*pos)++;
1346f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1347f6ac2354SChristoph Lameter 		return NULL;
1348f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1349f6ac2354SChristoph Lameter }
1350f6ac2354SChristoph Lameter 
1351f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1352f6ac2354SChristoph Lameter {
1353f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1354f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
1355f6ac2354SChristoph Lameter 
1356f6ac2354SChristoph Lameter 	seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1357f6ac2354SChristoph Lameter 	return 0;
1358f6ac2354SChristoph Lameter }
1359f6ac2354SChristoph Lameter 
1360f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1361f6ac2354SChristoph Lameter {
1362f6ac2354SChristoph Lameter 	kfree(m->private);
1363f6ac2354SChristoph Lameter 	m->private = NULL;
1364f6ac2354SChristoph Lameter }
1365f6ac2354SChristoph Lameter 
1366b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1367f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1368f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1369f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1370f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1371f6ac2354SChristoph Lameter };
1372f6ac2354SChristoph Lameter 
1373b6aa44abSAlexey Dobriyan static int vmstat_open(struct inode *inode, struct file *file)
1374b6aa44abSAlexey Dobriyan {
1375b6aa44abSAlexey Dobriyan 	return seq_open(file, &vmstat_op);
1376b6aa44abSAlexey Dobriyan }
1377b6aa44abSAlexey Dobriyan 
1378b6aa44abSAlexey Dobriyan static const struct file_operations proc_vmstat_file_operations = {
1379b6aa44abSAlexey Dobriyan 	.open		= vmstat_open,
1380b6aa44abSAlexey Dobriyan 	.read		= seq_read,
1381b6aa44abSAlexey Dobriyan 	.llseek		= seq_lseek,
1382b6aa44abSAlexey Dobriyan 	.release	= seq_release,
1383b6aa44abSAlexey Dobriyan };
1384f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1385f6ac2354SChristoph Lameter 
1386df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1387373ccbe5SMichal Hocko static struct workqueue_struct *vmstat_wq;
1388d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
138977461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
13907cc36bbdSChristoph Lameter static cpumask_var_t cpu_stat_off;
1391d1187ed2SChristoph Lameter 
1392d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1393d1187ed2SChristoph Lameter {
13940eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
13957cc36bbdSChristoph Lameter 		/*
13967cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
13977cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
13987cc36bbdSChristoph Lameter 		 * update worker thread.
1399f01f17d3SMichal Hocko 		 * If we were marked on cpu_stat_off clear the flag
1400f01f17d3SMichal Hocko 		 * so that vmstat_shepherd doesn't schedule us again.
14017cc36bbdSChristoph Lameter 		 */
1402f01f17d3SMichal Hocko 		if (!cpumask_test_and_clear_cpu(smp_processor_id(),
1403f01f17d3SMichal Hocko 						cpu_stat_off)) {
1404373ccbe5SMichal Hocko 			queue_delayed_work_on(smp_processor_id(), vmstat_wq,
1405176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
140698f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1407f01f17d3SMichal Hocko 		}
1408176bed1dSLinus Torvalds 	} else {
14097cc36bbdSChristoph Lameter 		/*
14107cc36bbdSChristoph Lameter 		 * We did not update any counters so the app may be in
14117cc36bbdSChristoph Lameter 		 * a mode where it does not cause counter updates.
14127cc36bbdSChristoph Lameter 		 * We may be uselessly running vmstat_update.
14137cc36bbdSChristoph Lameter 		 * Defer the checking for differentials to the
14147cc36bbdSChristoph Lameter 		 * shepherd thread on a different processor.
14157cc36bbdSChristoph Lameter 		 */
1416587198baSChristoph Lameter 		cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
14177cc36bbdSChristoph Lameter 	}
1418d1187ed2SChristoph Lameter }
1419d1187ed2SChristoph Lameter 
14207cc36bbdSChristoph Lameter /*
14210eb77e98SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
14220eb77e98SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
14230eb77e98SChristoph Lameter  * invoked when tick processing is not active.
14240eb77e98SChristoph Lameter  */
14250eb77e98SChristoph Lameter /*
14267cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
14277cc36bbdSChristoph Lameter  * an update is needed.
14287cc36bbdSChristoph Lameter  */
14297cc36bbdSChristoph Lameter static bool need_update(int cpu)
1430d1187ed2SChristoph Lameter {
14317cc36bbdSChristoph Lameter 	struct zone *zone;
1432d1187ed2SChristoph Lameter 
14337cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
14347cc36bbdSChristoph Lameter 		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
14357cc36bbdSChristoph Lameter 
14367cc36bbdSChristoph Lameter 		BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
14377cc36bbdSChristoph Lameter 		/*
14387cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
14397cc36bbdSChristoph Lameter 		 * This works because the diffs are byte sized items.
14407cc36bbdSChristoph Lameter 		 */
14417cc36bbdSChristoph Lameter 		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
14427cc36bbdSChristoph Lameter 			return true;
14437cc36bbdSChristoph Lameter 
14447cc36bbdSChristoph Lameter 	}
14457cc36bbdSChristoph Lameter 	return false;
14467cc36bbdSChristoph Lameter }
14477cc36bbdSChristoph Lameter 
1448f01f17d3SMichal Hocko void quiet_vmstat(void)
1449f01f17d3SMichal Hocko {
1450f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1451f01f17d3SMichal Hocko 		return;
1452f01f17d3SMichal Hocko 
1453f01f17d3SMichal Hocko 	/*
1454f01f17d3SMichal Hocko 	 * If we are already in hands of the shepherd then there
1455f01f17d3SMichal Hocko 	 * is nothing for us to do here.
1456f01f17d3SMichal Hocko 	 */
1457f01f17d3SMichal Hocko 	if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
1458f01f17d3SMichal Hocko 		return;
1459f01f17d3SMichal Hocko 
1460f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
1461f01f17d3SMichal Hocko 		return;
1462f01f17d3SMichal Hocko 
1463f01f17d3SMichal Hocko 	/*
1464f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
1465f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
1466f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
1467f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
1468f01f17d3SMichal Hocko 	 */
1469f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
1470f01f17d3SMichal Hocko }
1471f01f17d3SMichal Hocko 
14727cc36bbdSChristoph Lameter 
14737cc36bbdSChristoph Lameter /*
14747cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
14757cc36bbdSChristoph Lameter  * differentials of processors that have their worker
14767cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
14777cc36bbdSChristoph Lameter  * inactivity.
14787cc36bbdSChristoph Lameter  */
14797cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
14807cc36bbdSChristoph Lameter 
14810eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
14827cc36bbdSChristoph Lameter 
14837cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
14847cc36bbdSChristoph Lameter {
14857cc36bbdSChristoph Lameter 	int cpu;
14867cc36bbdSChristoph Lameter 
14877cc36bbdSChristoph Lameter 	get_online_cpus();
14887cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
1489f01f17d3SMichal Hocko 	for_each_cpu(cpu, cpu_stat_off) {
1490f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
14917cc36bbdSChristoph Lameter 
1492f01f17d3SMichal Hocko 		if (need_update(cpu)) {
1493f01f17d3SMichal Hocko 			if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
1494f01f17d3SMichal Hocko 				queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
1495f01f17d3SMichal Hocko 		} else {
1496f01f17d3SMichal Hocko 			/*
1497f01f17d3SMichal Hocko 			 * Cancel the work if quiet_vmstat has put this
1498f01f17d3SMichal Hocko 			 * cpu on cpu_stat_off because the work item might
1499f01f17d3SMichal Hocko 			 * be still scheduled
1500f01f17d3SMichal Hocko 			 */
1501f01f17d3SMichal Hocko 			cancel_delayed_work(dw);
1502f01f17d3SMichal Hocko 		}
1503f01f17d3SMichal Hocko 	}
15047cc36bbdSChristoph Lameter 	put_online_cpus();
15057cc36bbdSChristoph Lameter 
15067cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
15077cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
15087cc36bbdSChristoph Lameter }
15097cc36bbdSChristoph Lameter 
15107cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
15117cc36bbdSChristoph Lameter {
15127cc36bbdSChristoph Lameter 	int cpu;
15137cc36bbdSChristoph Lameter 
15147cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
1515*ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
15167cc36bbdSChristoph Lameter 			vmstat_update);
15177cc36bbdSChristoph Lameter 
15187cc36bbdSChristoph Lameter 	if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
15197cc36bbdSChristoph Lameter 		BUG();
15207cc36bbdSChristoph Lameter 	cpumask_copy(cpu_stat_off, cpu_online_mask);
15217cc36bbdSChristoph Lameter 
1522751e5f5cSMichal Hocko 	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
15237cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
15247cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
1525d1187ed2SChristoph Lameter }
1526d1187ed2SChristoph Lameter 
1527807a1bd2SToshi Kani static void vmstat_cpu_dead(int node)
1528807a1bd2SToshi Kani {
1529807a1bd2SToshi Kani 	int cpu;
1530807a1bd2SToshi Kani 
1531807a1bd2SToshi Kani 	get_online_cpus();
1532807a1bd2SToshi Kani 	for_each_online_cpu(cpu)
1533807a1bd2SToshi Kani 		if (cpu_to_node(cpu) == node)
1534807a1bd2SToshi Kani 			goto end;
1535807a1bd2SToshi Kani 
1536807a1bd2SToshi Kani 	node_clear_state(node, N_CPU);
1537807a1bd2SToshi Kani end:
1538807a1bd2SToshi Kani 	put_online_cpus();
1539807a1bd2SToshi Kani }
1540807a1bd2SToshi Kani 
1541df9ecabaSChristoph Lameter /*
1542df9ecabaSChristoph Lameter  * Use the cpu notifier to insure that the thresholds are recalculated
1543df9ecabaSChristoph Lameter  * when necessary.
1544df9ecabaSChristoph Lameter  */
15450db0628dSPaul Gortmaker static int vmstat_cpuup_callback(struct notifier_block *nfb,
1546df9ecabaSChristoph Lameter 		unsigned long action,
1547df9ecabaSChristoph Lameter 		void *hcpu)
1548df9ecabaSChristoph Lameter {
1549d1187ed2SChristoph Lameter 	long cpu = (long)hcpu;
1550d1187ed2SChristoph Lameter 
1551df9ecabaSChristoph Lameter 	switch (action) {
1552d1187ed2SChristoph Lameter 	case CPU_ONLINE:
1553d1187ed2SChristoph Lameter 	case CPU_ONLINE_FROZEN:
15545ee28a44SKAMEZAWA Hiroyuki 		refresh_zone_stat_thresholds();
1555ad596925SChristoph Lameter 		node_set_state(cpu_to_node(cpu), N_CPU);
15567cc36bbdSChristoph Lameter 		cpumask_set_cpu(cpu, cpu_stat_off);
1557d1187ed2SChristoph Lameter 		break;
1558d1187ed2SChristoph Lameter 	case CPU_DOWN_PREPARE:
1559d1187ed2SChristoph Lameter 	case CPU_DOWN_PREPARE_FROZEN:
1560afe2c511STejun Heo 		cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
15617cc36bbdSChristoph Lameter 		cpumask_clear_cpu(cpu, cpu_stat_off);
1562d1187ed2SChristoph Lameter 		break;
1563d1187ed2SChristoph Lameter 	case CPU_DOWN_FAILED:
1564d1187ed2SChristoph Lameter 	case CPU_DOWN_FAILED_FROZEN:
15657cc36bbdSChristoph Lameter 		cpumask_set_cpu(cpu, cpu_stat_off);
1566d1187ed2SChristoph Lameter 		break;
1567df9ecabaSChristoph Lameter 	case CPU_DEAD:
15688bb78442SRafael J. Wysocki 	case CPU_DEAD_FROZEN:
1569df9ecabaSChristoph Lameter 		refresh_zone_stat_thresholds();
1570807a1bd2SToshi Kani 		vmstat_cpu_dead(cpu_to_node(cpu));
1571df9ecabaSChristoph Lameter 		break;
1572df9ecabaSChristoph Lameter 	default:
1573df9ecabaSChristoph Lameter 		break;
1574df9ecabaSChristoph Lameter 	}
1575df9ecabaSChristoph Lameter 	return NOTIFY_OK;
1576df9ecabaSChristoph Lameter }
1577df9ecabaSChristoph Lameter 
15780db0628dSPaul Gortmaker static struct notifier_block vmstat_notifier =
1579df9ecabaSChristoph Lameter 	{ &vmstat_cpuup_callback, NULL, 0 };
15808f32f7e5SAlexey Dobriyan #endif
1581df9ecabaSChristoph Lameter 
1582e2fc88d0SAdrian Bunk static int __init setup_vmstat(void)
1583df9ecabaSChristoph Lameter {
15848f32f7e5SAlexey Dobriyan #ifdef CONFIG_SMP
15850be94badSSrivatsa S. Bhat 	cpu_notifier_register_begin();
15860be94badSSrivatsa S. Bhat 	__register_cpu_notifier(&vmstat_notifier);
1587d1187ed2SChristoph Lameter 
15887cc36bbdSChristoph Lameter 	start_shepherd_timer();
15890be94badSSrivatsa S. Bhat 	cpu_notifier_register_done();
15908f32f7e5SAlexey Dobriyan #endif
15918f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
15928f32f7e5SAlexey Dobriyan 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
159374e2e8e8SAlexey Dobriyan 	proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1594b6aa44abSAlexey Dobriyan 	proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
15955c9fe628SAlexey Dobriyan 	proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
15968f32f7e5SAlexey Dobriyan #endif
1597df9ecabaSChristoph Lameter 	return 0;
1598df9ecabaSChristoph Lameter }
1599df9ecabaSChristoph Lameter module_init(setup_vmstat)
1600d7a5752cSMel Gorman 
1601d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1602d7a5752cSMel Gorman 
1603d7a5752cSMel Gorman /*
1604d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
1605d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
1606d7a5752cSMel Gorman  */
1607d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
1608d7a5752cSMel Gorman 				struct contig_page_info *info)
1609d7a5752cSMel Gorman {
1610d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
1611d7a5752cSMel Gorman 	if (info->free_pages == 0)
1612d7a5752cSMel Gorman 		return 1000;
1613d7a5752cSMel Gorman 
1614d7a5752cSMel Gorman 	/*
1615d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
1616d7a5752cSMel Gorman 	 * decimal places.
1617d7a5752cSMel Gorman 	 *
1618d7a5752cSMel Gorman 	 * 0 => no fragmentation
1619d7a5752cSMel Gorman 	 * 1 => high fragmentation
1620d7a5752cSMel Gorman 	 */
1621d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1622d7a5752cSMel Gorman 
1623d7a5752cSMel Gorman }
1624d7a5752cSMel Gorman 
1625d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
1626d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1627d7a5752cSMel Gorman {
1628d7a5752cSMel Gorman 	unsigned int order;
1629d7a5752cSMel Gorman 	int index;
1630d7a5752cSMel Gorman 	struct contig_page_info info;
1631d7a5752cSMel Gorman 
1632d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1633d7a5752cSMel Gorman 				pgdat->node_id,
1634d7a5752cSMel Gorman 				zone->name);
1635d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1636d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
1637d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
1638d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1639d7a5752cSMel Gorman 	}
1640d7a5752cSMel Gorman 
1641d7a5752cSMel Gorman 	seq_putc(m, '\n');
1642d7a5752cSMel Gorman }
1643d7a5752cSMel Gorman 
1644d7a5752cSMel Gorman /*
1645d7a5752cSMel Gorman  * Display unusable free space index
1646d7a5752cSMel Gorman  *
1647d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
1648d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
1649d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
1650d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
1651d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
1652d7a5752cSMel Gorman  */
1653d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
1654d7a5752cSMel Gorman {
1655d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1656d7a5752cSMel Gorman 
1657d7a5752cSMel Gorman 	/* check memoryless node */
1658a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
1659d7a5752cSMel Gorman 		return 0;
1660d7a5752cSMel Gorman 
1661d7a5752cSMel Gorman 	walk_zones_in_node(m, pgdat, unusable_show_print);
1662d7a5752cSMel Gorman 
1663d7a5752cSMel Gorman 	return 0;
1664d7a5752cSMel Gorman }
1665d7a5752cSMel Gorman 
1666d7a5752cSMel Gorman static const struct seq_operations unusable_op = {
1667d7a5752cSMel Gorman 	.start	= frag_start,
1668d7a5752cSMel Gorman 	.next	= frag_next,
1669d7a5752cSMel Gorman 	.stop	= frag_stop,
1670d7a5752cSMel Gorman 	.show	= unusable_show,
1671d7a5752cSMel Gorman };
1672d7a5752cSMel Gorman 
1673d7a5752cSMel Gorman static int unusable_open(struct inode *inode, struct file *file)
1674d7a5752cSMel Gorman {
1675d7a5752cSMel Gorman 	return seq_open(file, &unusable_op);
1676d7a5752cSMel Gorman }
1677d7a5752cSMel Gorman 
1678d7a5752cSMel Gorman static const struct file_operations unusable_file_ops = {
1679d7a5752cSMel Gorman 	.open		= unusable_open,
1680d7a5752cSMel Gorman 	.read		= seq_read,
1681d7a5752cSMel Gorman 	.llseek		= seq_lseek,
1682d7a5752cSMel Gorman 	.release	= seq_release,
1683d7a5752cSMel Gorman };
1684d7a5752cSMel Gorman 
1685f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
1686f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1687f1a5ab12SMel Gorman {
1688f1a5ab12SMel Gorman 	unsigned int order;
1689f1a5ab12SMel Gorman 	int index;
1690f1a5ab12SMel Gorman 
1691f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
1692f1a5ab12SMel Gorman 	struct contig_page_info info;
1693f1a5ab12SMel Gorman 
1694f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1695f1a5ab12SMel Gorman 				pgdat->node_id,
1696f1a5ab12SMel Gorman 				zone->name);
1697f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1698f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
169956de7263SMel Gorman 		index = __fragmentation_index(order, &info);
1700f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1701f1a5ab12SMel Gorman 	}
1702f1a5ab12SMel Gorman 
1703f1a5ab12SMel Gorman 	seq_putc(m, '\n');
1704f1a5ab12SMel Gorman }
1705f1a5ab12SMel Gorman 
1706f1a5ab12SMel Gorman /*
1707f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
1708f1a5ab12SMel Gorman  */
1709f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
1710f1a5ab12SMel Gorman {
1711f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1712f1a5ab12SMel Gorman 
1713f1a5ab12SMel Gorman 	walk_zones_in_node(m, pgdat, extfrag_show_print);
1714f1a5ab12SMel Gorman 
1715f1a5ab12SMel Gorman 	return 0;
1716f1a5ab12SMel Gorman }
1717f1a5ab12SMel Gorman 
1718f1a5ab12SMel Gorman static const struct seq_operations extfrag_op = {
1719f1a5ab12SMel Gorman 	.start	= frag_start,
1720f1a5ab12SMel Gorman 	.next	= frag_next,
1721f1a5ab12SMel Gorman 	.stop	= frag_stop,
1722f1a5ab12SMel Gorman 	.show	= extfrag_show,
1723f1a5ab12SMel Gorman };
1724f1a5ab12SMel Gorman 
1725f1a5ab12SMel Gorman static int extfrag_open(struct inode *inode, struct file *file)
1726f1a5ab12SMel Gorman {
1727f1a5ab12SMel Gorman 	return seq_open(file, &extfrag_op);
1728f1a5ab12SMel Gorman }
1729f1a5ab12SMel Gorman 
1730f1a5ab12SMel Gorman static const struct file_operations extfrag_file_ops = {
1731f1a5ab12SMel Gorman 	.open		= extfrag_open,
1732f1a5ab12SMel Gorman 	.read		= seq_read,
1733f1a5ab12SMel Gorman 	.llseek		= seq_lseek,
1734f1a5ab12SMel Gorman 	.release	= seq_release,
1735f1a5ab12SMel Gorman };
1736f1a5ab12SMel Gorman 
1737d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
1738d7a5752cSMel Gorman {
1739bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
1740bde8bd8aSSasikantha babu 
1741d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1742d7a5752cSMel Gorman 	if (!extfrag_debug_root)
1743d7a5752cSMel Gorman 		return -ENOMEM;
1744d7a5752cSMel Gorman 
1745d7a5752cSMel Gorman 	if (!debugfs_create_file("unusable_index", 0444,
1746d7a5752cSMel Gorman 			extfrag_debug_root, NULL, &unusable_file_ops))
1747bde8bd8aSSasikantha babu 		goto fail;
1748d7a5752cSMel Gorman 
1749f1a5ab12SMel Gorman 	if (!debugfs_create_file("extfrag_index", 0444,
1750f1a5ab12SMel Gorman 			extfrag_debug_root, NULL, &extfrag_file_ops))
1751bde8bd8aSSasikantha babu 		goto fail;
1752f1a5ab12SMel Gorman 
1753d7a5752cSMel Gorman 	return 0;
1754bde8bd8aSSasikantha babu fail:
1755bde8bd8aSSasikantha babu 	debugfs_remove_recursive(extfrag_debug_root);
1756bde8bd8aSSasikantha babu 	return -ENOMEM;
1757d7a5752cSMel Gorman }
1758d7a5752cSMel Gorman 
1759d7a5752cSMel Gorman module_init(extfrag_debug_init);
1760d7a5752cSMel Gorman #endif
1761