xref: /linux/mm/vmstat.c (revision cbc65df240c104bf540af1ad58595bf1eaa5ee10)
1f6ac2354SChristoph Lameter /*
2f6ac2354SChristoph Lameter  *  linux/mm/vmstat.c
3f6ac2354SChristoph Lameter  *
4f6ac2354SChristoph Lameter  *  Manages VM statistics
5f6ac2354SChristoph Lameter  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
62244b95aSChristoph Lameter  *
72244b95aSChristoph Lameter  *  zoned VM statistics
82244b95aSChristoph Lameter  *  Copyright (C) 2006 Silicon Graphics, Inc.,
92244b95aSChristoph Lameter  *		Christoph Lameter <christoph@lameter.com>
107cc36bbdSChristoph Lameter  *  Copyright (C) 2008-2014 Christoph Lameter
11f6ac2354SChristoph Lameter  */
128f32f7e5SAlexey Dobriyan #include <linux/fs.h>
13f6ac2354SChristoph Lameter #include <linux/mm.h>
144e950f6fSAlexey Dobriyan #include <linux/err.h>
152244b95aSChristoph Lameter #include <linux/module.h>
165a0e3ad6STejun Heo #include <linux/slab.h>
17df9ecabaSChristoph Lameter #include <linux/cpu.h>
187cc36bbdSChristoph Lameter #include <linux/cpumask.h>
19c748e134SAdrian Bunk #include <linux/vmstat.h>
203c486871SAndrew Morton #include <linux/proc_fs.h>
213c486871SAndrew Morton #include <linux/seq_file.h>
223c486871SAndrew Morton #include <linux/debugfs.h>
23e8edc6e0SAlexey Dobriyan #include <linux/sched.h>
24f1a5ab12SMel Gorman #include <linux/math64.h>
2579da826aSMichael Rubin #include <linux/writeback.h>
2636deb0beSNamhyung Kim #include <linux/compaction.h>
276e543d57SLisa Du #include <linux/mm_inline.h>
2848c96a36SJoonsoo Kim #include <linux/page_ext.h>
2948c96a36SJoonsoo Kim #include <linux/page_owner.h>
306e543d57SLisa Du 
316e543d57SLisa Du #include "internal.h"
32f6ac2354SChristoph Lameter 
33f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
34f8891e5eSChristoph Lameter DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
35f8891e5eSChristoph Lameter EXPORT_PER_CPU_SYMBOL(vm_event_states);
36f8891e5eSChristoph Lameter 
3731f961a8SMinchan Kim static void sum_vm_events(unsigned long *ret)
38f8891e5eSChristoph Lameter {
399eccf2a8SChristoph Lameter 	int cpu;
40f8891e5eSChristoph Lameter 	int i;
41f8891e5eSChristoph Lameter 
42f8891e5eSChristoph Lameter 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
43f8891e5eSChristoph Lameter 
4431f961a8SMinchan Kim 	for_each_online_cpu(cpu) {
45f8891e5eSChristoph Lameter 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
46f8891e5eSChristoph Lameter 
47f8891e5eSChristoph Lameter 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
48f8891e5eSChristoph Lameter 			ret[i] += this->event[i];
49f8891e5eSChristoph Lameter 	}
50f8891e5eSChristoph Lameter }
51f8891e5eSChristoph Lameter 
52f8891e5eSChristoph Lameter /*
53f8891e5eSChristoph Lameter  * Accumulate the vm event counters across all CPUs.
54f8891e5eSChristoph Lameter  * The result is unavoidably approximate - it can change
55f8891e5eSChristoph Lameter  * during and after execution of this function.
56f8891e5eSChristoph Lameter */
57f8891e5eSChristoph Lameter void all_vm_events(unsigned long *ret)
58f8891e5eSChristoph Lameter {
59b5be1132SKOSAKI Motohiro 	get_online_cpus();
6031f961a8SMinchan Kim 	sum_vm_events(ret);
61b5be1132SKOSAKI Motohiro 	put_online_cpus();
62f8891e5eSChristoph Lameter }
6332dd66fcSHeiko Carstens EXPORT_SYMBOL_GPL(all_vm_events);
64f8891e5eSChristoph Lameter 
65f8891e5eSChristoph Lameter /*
66f8891e5eSChristoph Lameter  * Fold the foreign cpu events into our own.
67f8891e5eSChristoph Lameter  *
68f8891e5eSChristoph Lameter  * This is adding to the events on one processor
69f8891e5eSChristoph Lameter  * but keeps the global counts constant.
70f8891e5eSChristoph Lameter  */
71f8891e5eSChristoph Lameter void vm_events_fold_cpu(int cpu)
72f8891e5eSChristoph Lameter {
73f8891e5eSChristoph Lameter 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
74f8891e5eSChristoph Lameter 	int i;
75f8891e5eSChristoph Lameter 
76f8891e5eSChristoph Lameter 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
77f8891e5eSChristoph Lameter 		count_vm_events(i, fold_state->event[i]);
78f8891e5eSChristoph Lameter 		fold_state->event[i] = 0;
79f8891e5eSChristoph Lameter 	}
80f8891e5eSChristoph Lameter }
81f8891e5eSChristoph Lameter 
82f8891e5eSChristoph Lameter #endif /* CONFIG_VM_EVENT_COUNTERS */
83f8891e5eSChristoph Lameter 
842244b95aSChristoph Lameter /*
852244b95aSChristoph Lameter  * Manage combined zone based / global counters
862244b95aSChristoph Lameter  *
872244b95aSChristoph Lameter  * vm_stat contains the global counters
882244b95aSChristoph Lameter  */
8975ef7184SMel Gorman atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
9075ef7184SMel Gorman atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
9175ef7184SMel Gorman EXPORT_SYMBOL(vm_zone_stat);
9275ef7184SMel Gorman EXPORT_SYMBOL(vm_node_stat);
932244b95aSChristoph Lameter 
942244b95aSChristoph Lameter #ifdef CONFIG_SMP
952244b95aSChristoph Lameter 
96b44129b3SMel Gorman int calculate_pressure_threshold(struct zone *zone)
9788f5acf8SMel Gorman {
9888f5acf8SMel Gorman 	int threshold;
9988f5acf8SMel Gorman 	int watermark_distance;
10088f5acf8SMel Gorman 
10188f5acf8SMel Gorman 	/*
10288f5acf8SMel Gorman 	 * As vmstats are not up to date, there is drift between the estimated
10388f5acf8SMel Gorman 	 * and real values. For high thresholds and a high number of CPUs, it
10488f5acf8SMel Gorman 	 * is possible for the min watermark to be breached while the estimated
10588f5acf8SMel Gorman 	 * value looks fine. The pressure threshold is a reduced value such
10688f5acf8SMel Gorman 	 * that even the maximum amount of drift will not accidentally breach
10788f5acf8SMel Gorman 	 * the min watermark
10888f5acf8SMel Gorman 	 */
10988f5acf8SMel Gorman 	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
11088f5acf8SMel Gorman 	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
11188f5acf8SMel Gorman 
11288f5acf8SMel Gorman 	/*
11388f5acf8SMel Gorman 	 * Maximum threshold is 125
11488f5acf8SMel Gorman 	 */
11588f5acf8SMel Gorman 	threshold = min(125, threshold);
11688f5acf8SMel Gorman 
11788f5acf8SMel Gorman 	return threshold;
11888f5acf8SMel Gorman }
11988f5acf8SMel Gorman 
120b44129b3SMel Gorman int calculate_normal_threshold(struct zone *zone)
121df9ecabaSChristoph Lameter {
122df9ecabaSChristoph Lameter 	int threshold;
123df9ecabaSChristoph Lameter 	int mem;	/* memory in 128 MB units */
1242244b95aSChristoph Lameter 
1252244b95aSChristoph Lameter 	/*
126df9ecabaSChristoph Lameter 	 * The threshold scales with the number of processors and the amount
127df9ecabaSChristoph Lameter 	 * of memory per zone. More memory means that we can defer updates for
128df9ecabaSChristoph Lameter 	 * longer, more processors could lead to more contention.
129df9ecabaSChristoph Lameter  	 * fls() is used to have a cheap way of logarithmic scaling.
1302244b95aSChristoph Lameter 	 *
131df9ecabaSChristoph Lameter 	 * Some sample thresholds:
132df9ecabaSChristoph Lameter 	 *
133df9ecabaSChristoph Lameter 	 * Threshold	Processors	(fls)	Zonesize	fls(mem+1)
134df9ecabaSChristoph Lameter 	 * ------------------------------------------------------------------
135df9ecabaSChristoph Lameter 	 * 8		1		1	0.9-1 GB	4
136df9ecabaSChristoph Lameter 	 * 16		2		2	0.9-1 GB	4
137df9ecabaSChristoph Lameter 	 * 20 		2		2	1-2 GB		5
138df9ecabaSChristoph Lameter 	 * 24		2		2	2-4 GB		6
139df9ecabaSChristoph Lameter 	 * 28		2		2	4-8 GB		7
140df9ecabaSChristoph Lameter 	 * 32		2		2	8-16 GB		8
141df9ecabaSChristoph Lameter 	 * 4		2		2	<128M		1
142df9ecabaSChristoph Lameter 	 * 30		4		3	2-4 GB		5
143df9ecabaSChristoph Lameter 	 * 48		4		3	8-16 GB		8
144df9ecabaSChristoph Lameter 	 * 32		8		4	1-2 GB		4
145df9ecabaSChristoph Lameter 	 * 32		8		4	0.9-1GB		4
146df9ecabaSChristoph Lameter 	 * 10		16		5	<128M		1
147df9ecabaSChristoph Lameter 	 * 40		16		5	900M		4
148df9ecabaSChristoph Lameter 	 * 70		64		7	2-4 GB		5
149df9ecabaSChristoph Lameter 	 * 84		64		7	4-8 GB		6
150df9ecabaSChristoph Lameter 	 * 108		512		9	4-8 GB		6
151df9ecabaSChristoph Lameter 	 * 125		1024		10	8-16 GB		8
152df9ecabaSChristoph Lameter 	 * 125		1024		10	16-32 GB	9
1532244b95aSChristoph Lameter 	 */
154df9ecabaSChristoph Lameter 
155b40da049SJiang Liu 	mem = zone->managed_pages >> (27 - PAGE_SHIFT);
156df9ecabaSChristoph Lameter 
157df9ecabaSChristoph Lameter 	threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
158df9ecabaSChristoph Lameter 
159df9ecabaSChristoph Lameter 	/*
160df9ecabaSChristoph Lameter 	 * Maximum threshold is 125
161df9ecabaSChristoph Lameter 	 */
162df9ecabaSChristoph Lameter 	threshold = min(125, threshold);
163df9ecabaSChristoph Lameter 
164df9ecabaSChristoph Lameter 	return threshold;
165df9ecabaSChristoph Lameter }
166df9ecabaSChristoph Lameter 
167df9ecabaSChristoph Lameter /*
168df9ecabaSChristoph Lameter  * Refresh the thresholds for each zone.
169df9ecabaSChristoph Lameter  */
170a6cccdc3SKOSAKI Motohiro void refresh_zone_stat_thresholds(void)
1712244b95aSChristoph Lameter {
17275ef7184SMel Gorman 	struct pglist_data *pgdat;
173df9ecabaSChristoph Lameter 	struct zone *zone;
174df9ecabaSChristoph Lameter 	int cpu;
175df9ecabaSChristoph Lameter 	int threshold;
176df9ecabaSChristoph Lameter 
17775ef7184SMel Gorman 	/* Zero current pgdat thresholds */
17875ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
17975ef7184SMel Gorman 		for_each_online_cpu(cpu) {
18075ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
18175ef7184SMel Gorman 		}
18275ef7184SMel Gorman 	}
18375ef7184SMel Gorman 
184ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
18575ef7184SMel Gorman 		struct pglist_data *pgdat = zone->zone_pgdat;
186aa454840SChristoph Lameter 		unsigned long max_drift, tolerate_drift;
187aa454840SChristoph Lameter 
188b44129b3SMel Gorman 		threshold = calculate_normal_threshold(zone);
189df9ecabaSChristoph Lameter 
19075ef7184SMel Gorman 		for_each_online_cpu(cpu) {
19175ef7184SMel Gorman 			int pgdat_threshold;
19275ef7184SMel Gorman 
19399dcc3e5SChristoph Lameter 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
19499dcc3e5SChristoph Lameter 							= threshold;
195aa454840SChristoph Lameter 
19675ef7184SMel Gorman 			/* Base nodestat threshold on the largest populated zone. */
19775ef7184SMel Gorman 			pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
19875ef7184SMel Gorman 			per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
19975ef7184SMel Gorman 				= max(threshold, pgdat_threshold);
20075ef7184SMel Gorman 		}
20175ef7184SMel Gorman 
202aa454840SChristoph Lameter 		/*
203aa454840SChristoph Lameter 		 * Only set percpu_drift_mark if there is a danger that
204aa454840SChristoph Lameter 		 * NR_FREE_PAGES reports the low watermark is ok when in fact
205aa454840SChristoph Lameter 		 * the min watermark could be breached by an allocation
206aa454840SChristoph Lameter 		 */
207aa454840SChristoph Lameter 		tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
208aa454840SChristoph Lameter 		max_drift = num_online_cpus() * threshold;
209aa454840SChristoph Lameter 		if (max_drift > tolerate_drift)
210aa454840SChristoph Lameter 			zone->percpu_drift_mark = high_wmark_pages(zone) +
211aa454840SChristoph Lameter 					max_drift;
212df9ecabaSChristoph Lameter 	}
2132244b95aSChristoph Lameter }
2142244b95aSChristoph Lameter 
215b44129b3SMel Gorman void set_pgdat_percpu_threshold(pg_data_t *pgdat,
216b44129b3SMel Gorman 				int (*calculate_pressure)(struct zone *))
21788f5acf8SMel Gorman {
21888f5acf8SMel Gorman 	struct zone *zone;
21988f5acf8SMel Gorman 	int cpu;
22088f5acf8SMel Gorman 	int threshold;
22188f5acf8SMel Gorman 	int i;
22288f5acf8SMel Gorman 
22388f5acf8SMel Gorman 	for (i = 0; i < pgdat->nr_zones; i++) {
22488f5acf8SMel Gorman 		zone = &pgdat->node_zones[i];
22588f5acf8SMel Gorman 		if (!zone->percpu_drift_mark)
22688f5acf8SMel Gorman 			continue;
22788f5acf8SMel Gorman 
228b44129b3SMel Gorman 		threshold = (*calculate_pressure)(zone);
229bb0b6dffSMel Gorman 		for_each_online_cpu(cpu)
23088f5acf8SMel Gorman 			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
23188f5acf8SMel Gorman 							= threshold;
23288f5acf8SMel Gorman 	}
23388f5acf8SMel Gorman }
23488f5acf8SMel Gorman 
2352244b95aSChristoph Lameter /*
236bea04b07SJianyu Zhan  * For use when we know that interrupts are disabled,
237bea04b07SJianyu Zhan  * or when we know that preemption is disabled and that
238bea04b07SJianyu Zhan  * particular counter cannot be updated from interrupt context.
2392244b95aSChristoph Lameter  */
2402244b95aSChristoph Lameter void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
2416cdb18adSHeiko Carstens 			   long delta)
2422244b95aSChristoph Lameter {
24312938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
24412938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
2452244b95aSChristoph Lameter 	long x;
24612938a92SChristoph Lameter 	long t;
2472244b95aSChristoph Lameter 
24812938a92SChristoph Lameter 	x = delta + __this_cpu_read(*p);
2492244b95aSChristoph Lameter 
25012938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
25112938a92SChristoph Lameter 
25212938a92SChristoph Lameter 	if (unlikely(x > t || x < -t)) {
2532244b95aSChristoph Lameter 		zone_page_state_add(x, zone, item);
2542244b95aSChristoph Lameter 		x = 0;
2552244b95aSChristoph Lameter 	}
25612938a92SChristoph Lameter 	__this_cpu_write(*p, x);
2572244b95aSChristoph Lameter }
2582244b95aSChristoph Lameter EXPORT_SYMBOL(__mod_zone_page_state);
2592244b95aSChristoph Lameter 
26075ef7184SMel Gorman void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
26175ef7184SMel Gorman 				long delta)
26275ef7184SMel Gorman {
26375ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
26475ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
26575ef7184SMel Gorman 	long x;
26675ef7184SMel Gorman 	long t;
26775ef7184SMel Gorman 
26875ef7184SMel Gorman 	x = delta + __this_cpu_read(*p);
26975ef7184SMel Gorman 
27075ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
27175ef7184SMel Gorman 
27275ef7184SMel Gorman 	if (unlikely(x > t || x < -t)) {
27375ef7184SMel Gorman 		node_page_state_add(x, pgdat, item);
27475ef7184SMel Gorman 		x = 0;
27575ef7184SMel Gorman 	}
27675ef7184SMel Gorman 	__this_cpu_write(*p, x);
27775ef7184SMel Gorman }
27875ef7184SMel Gorman EXPORT_SYMBOL(__mod_node_page_state);
27975ef7184SMel Gorman 
2802244b95aSChristoph Lameter /*
2812244b95aSChristoph Lameter  * Optimized increment and decrement functions.
2822244b95aSChristoph Lameter  *
2832244b95aSChristoph Lameter  * These are only for a single page and therefore can take a struct page *
2842244b95aSChristoph Lameter  * argument instead of struct zone *. This allows the inclusion of the code
2852244b95aSChristoph Lameter  * generated for page_zone(page) into the optimized functions.
2862244b95aSChristoph Lameter  *
2872244b95aSChristoph Lameter  * No overflow check is necessary and therefore the differential can be
2882244b95aSChristoph Lameter  * incremented or decremented in place which may allow the compilers to
2892244b95aSChristoph Lameter  * generate better code.
2902244b95aSChristoph Lameter  * The increment or decrement is known and therefore one boundary check can
2912244b95aSChristoph Lameter  * be omitted.
2922244b95aSChristoph Lameter  *
293df9ecabaSChristoph Lameter  * NOTE: These functions are very performance sensitive. Change only
294df9ecabaSChristoph Lameter  * with care.
295df9ecabaSChristoph Lameter  *
2962244b95aSChristoph Lameter  * Some processors have inc/dec instructions that are atomic vs an interrupt.
2972244b95aSChristoph Lameter  * However, the code must first determine the differential location in a zone
2982244b95aSChristoph Lameter  * based on the processor number and then inc/dec the counter. There is no
2992244b95aSChristoph Lameter  * guarantee without disabling preemption that the processor will not change
3002244b95aSChristoph Lameter  * in between and therefore the atomicity vs. interrupt cannot be exploited
3012244b95aSChristoph Lameter  * in a useful way here.
3022244b95aSChristoph Lameter  */
303c8785385SChristoph Lameter void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
3042244b95aSChristoph Lameter {
30512938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
30612938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
30712938a92SChristoph Lameter 	s8 v, t;
3082244b95aSChristoph Lameter 
309908ee0f1SChristoph Lameter 	v = __this_cpu_inc_return(*p);
31012938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
31112938a92SChristoph Lameter 	if (unlikely(v > t)) {
31212938a92SChristoph Lameter 		s8 overstep = t >> 1;
3132244b95aSChristoph Lameter 
31412938a92SChristoph Lameter 		zone_page_state_add(v + overstep, zone, item);
31512938a92SChristoph Lameter 		__this_cpu_write(*p, -overstep);
3162244b95aSChristoph Lameter 	}
3172244b95aSChristoph Lameter }
318ca889e6cSChristoph Lameter 
31975ef7184SMel Gorman void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
32075ef7184SMel Gorman {
32175ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
32275ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
32375ef7184SMel Gorman 	s8 v, t;
32475ef7184SMel Gorman 
32575ef7184SMel Gorman 	v = __this_cpu_inc_return(*p);
32675ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
32775ef7184SMel Gorman 	if (unlikely(v > t)) {
32875ef7184SMel Gorman 		s8 overstep = t >> 1;
32975ef7184SMel Gorman 
33075ef7184SMel Gorman 		node_page_state_add(v + overstep, pgdat, item);
33175ef7184SMel Gorman 		__this_cpu_write(*p, -overstep);
33275ef7184SMel Gorman 	}
33375ef7184SMel Gorman }
33475ef7184SMel Gorman 
335ca889e6cSChristoph Lameter void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
336ca889e6cSChristoph Lameter {
337ca889e6cSChristoph Lameter 	__inc_zone_state(page_zone(page), item);
338ca889e6cSChristoph Lameter }
3392244b95aSChristoph Lameter EXPORT_SYMBOL(__inc_zone_page_state);
3402244b95aSChristoph Lameter 
34175ef7184SMel Gorman void __inc_node_page_state(struct page *page, enum node_stat_item item)
34275ef7184SMel Gorman {
34375ef7184SMel Gorman 	__inc_node_state(page_pgdat(page), item);
34475ef7184SMel Gorman }
34575ef7184SMel Gorman EXPORT_SYMBOL(__inc_node_page_state);
34675ef7184SMel Gorman 
347c8785385SChristoph Lameter void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
3482244b95aSChristoph Lameter {
34912938a92SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
35012938a92SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
35112938a92SChristoph Lameter 	s8 v, t;
3522244b95aSChristoph Lameter 
353908ee0f1SChristoph Lameter 	v = __this_cpu_dec_return(*p);
35412938a92SChristoph Lameter 	t = __this_cpu_read(pcp->stat_threshold);
35512938a92SChristoph Lameter 	if (unlikely(v < - t)) {
35612938a92SChristoph Lameter 		s8 overstep = t >> 1;
3572244b95aSChristoph Lameter 
35812938a92SChristoph Lameter 		zone_page_state_add(v - overstep, zone, item);
35912938a92SChristoph Lameter 		__this_cpu_write(*p, overstep);
3602244b95aSChristoph Lameter 	}
3612244b95aSChristoph Lameter }
362c8785385SChristoph Lameter 
36375ef7184SMel Gorman void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
36475ef7184SMel Gorman {
36575ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
36675ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
36775ef7184SMel Gorman 	s8 v, t;
36875ef7184SMel Gorman 
36975ef7184SMel Gorman 	v = __this_cpu_dec_return(*p);
37075ef7184SMel Gorman 	t = __this_cpu_read(pcp->stat_threshold);
37175ef7184SMel Gorman 	if (unlikely(v < - t)) {
37275ef7184SMel Gorman 		s8 overstep = t >> 1;
37375ef7184SMel Gorman 
37475ef7184SMel Gorman 		node_page_state_add(v - overstep, pgdat, item);
37575ef7184SMel Gorman 		__this_cpu_write(*p, overstep);
37675ef7184SMel Gorman 	}
37775ef7184SMel Gorman }
37875ef7184SMel Gorman 
379c8785385SChristoph Lameter void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
380c8785385SChristoph Lameter {
381c8785385SChristoph Lameter 	__dec_zone_state(page_zone(page), item);
382c8785385SChristoph Lameter }
3832244b95aSChristoph Lameter EXPORT_SYMBOL(__dec_zone_page_state);
3842244b95aSChristoph Lameter 
38575ef7184SMel Gorman void __dec_node_page_state(struct page *page, enum node_stat_item item)
38675ef7184SMel Gorman {
38775ef7184SMel Gorman 	__dec_node_state(page_pgdat(page), item);
38875ef7184SMel Gorman }
38975ef7184SMel Gorman EXPORT_SYMBOL(__dec_node_page_state);
39075ef7184SMel Gorman 
3914156153cSHeiko Carstens #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
3927c839120SChristoph Lameter /*
3937c839120SChristoph Lameter  * If we have cmpxchg_local support then we do not need to incur the overhead
3947c839120SChristoph Lameter  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
3957c839120SChristoph Lameter  *
3967c839120SChristoph Lameter  * mod_state() modifies the zone counter state through atomic per cpu
3977c839120SChristoph Lameter  * operations.
3987c839120SChristoph Lameter  *
3997c839120SChristoph Lameter  * Overstep mode specifies how overstep should handled:
4007c839120SChristoph Lameter  *     0       No overstepping
4017c839120SChristoph Lameter  *     1       Overstepping half of threshold
4027c839120SChristoph Lameter  *     -1      Overstepping minus half of threshold
4037c839120SChristoph Lameter */
40475ef7184SMel Gorman static inline void mod_zone_state(struct zone *zone,
40575ef7184SMel Gorman        enum zone_stat_item item, long delta, int overstep_mode)
4067c839120SChristoph Lameter {
4077c839120SChristoph Lameter 	struct per_cpu_pageset __percpu *pcp = zone->pageset;
4087c839120SChristoph Lameter 	s8 __percpu *p = pcp->vm_stat_diff + item;
4097c839120SChristoph Lameter 	long o, n, t, z;
4107c839120SChristoph Lameter 
4117c839120SChristoph Lameter 	do {
4127c839120SChristoph Lameter 		z = 0;  /* overflow to zone counters */
4137c839120SChristoph Lameter 
4147c839120SChristoph Lameter 		/*
4157c839120SChristoph Lameter 		 * The fetching of the stat_threshold is racy. We may apply
4167c839120SChristoph Lameter 		 * a counter threshold to the wrong the cpu if we get
417d3bc2367SChristoph Lameter 		 * rescheduled while executing here. However, the next
418d3bc2367SChristoph Lameter 		 * counter update will apply the threshold again and
419d3bc2367SChristoph Lameter 		 * therefore bring the counter under the threshold again.
420d3bc2367SChristoph Lameter 		 *
421d3bc2367SChristoph Lameter 		 * Most of the time the thresholds are the same anyways
422d3bc2367SChristoph Lameter 		 * for all cpus in a zone.
4237c839120SChristoph Lameter 		 */
4247c839120SChristoph Lameter 		t = this_cpu_read(pcp->stat_threshold);
4257c839120SChristoph Lameter 
4267c839120SChristoph Lameter 		o = this_cpu_read(*p);
4277c839120SChristoph Lameter 		n = delta + o;
4287c839120SChristoph Lameter 
4297c839120SChristoph Lameter 		if (n > t || n < -t) {
4307c839120SChristoph Lameter 			int os = overstep_mode * (t >> 1) ;
4317c839120SChristoph Lameter 
4327c839120SChristoph Lameter 			/* Overflow must be added to zone counters */
4337c839120SChristoph Lameter 			z = n + os;
4347c839120SChristoph Lameter 			n = -os;
4357c839120SChristoph Lameter 		}
4367c839120SChristoph Lameter 	} while (this_cpu_cmpxchg(*p, o, n) != o);
4377c839120SChristoph Lameter 
4387c839120SChristoph Lameter 	if (z)
4397c839120SChristoph Lameter 		zone_page_state_add(z, zone, item);
4407c839120SChristoph Lameter }
4417c839120SChristoph Lameter 
4427c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
4436cdb18adSHeiko Carstens 			 long delta)
4447c839120SChristoph Lameter {
44575ef7184SMel Gorman 	mod_zone_state(zone, item, delta, 0);
4467c839120SChristoph Lameter }
4477c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
4487c839120SChristoph Lameter 
4497c839120SChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
4507c839120SChristoph Lameter {
45175ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, 1, 1);
4527c839120SChristoph Lameter }
4537c839120SChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
4547c839120SChristoph Lameter 
4557c839120SChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
4567c839120SChristoph Lameter {
45775ef7184SMel Gorman 	mod_zone_state(page_zone(page), item, -1, -1);
4587c839120SChristoph Lameter }
4597c839120SChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
46075ef7184SMel Gorman 
46175ef7184SMel Gorman static inline void mod_node_state(struct pglist_data *pgdat,
46275ef7184SMel Gorman        enum node_stat_item item, int delta, int overstep_mode)
46375ef7184SMel Gorman {
46475ef7184SMel Gorman 	struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
46575ef7184SMel Gorman 	s8 __percpu *p = pcp->vm_node_stat_diff + item;
46675ef7184SMel Gorman 	long o, n, t, z;
46775ef7184SMel Gorman 
46875ef7184SMel Gorman 	do {
46975ef7184SMel Gorman 		z = 0;  /* overflow to node counters */
47075ef7184SMel Gorman 
47175ef7184SMel Gorman 		/*
47275ef7184SMel Gorman 		 * The fetching of the stat_threshold is racy. We may apply
47375ef7184SMel Gorman 		 * a counter threshold to the wrong the cpu if we get
47475ef7184SMel Gorman 		 * rescheduled while executing here. However, the next
47575ef7184SMel Gorman 		 * counter update will apply the threshold again and
47675ef7184SMel Gorman 		 * therefore bring the counter under the threshold again.
47775ef7184SMel Gorman 		 *
47875ef7184SMel Gorman 		 * Most of the time the thresholds are the same anyways
47975ef7184SMel Gorman 		 * for all cpus in a node.
48075ef7184SMel Gorman 		 */
48175ef7184SMel Gorman 		t = this_cpu_read(pcp->stat_threshold);
48275ef7184SMel Gorman 
48375ef7184SMel Gorman 		o = this_cpu_read(*p);
48475ef7184SMel Gorman 		n = delta + o;
48575ef7184SMel Gorman 
48675ef7184SMel Gorman 		if (n > t || n < -t) {
48775ef7184SMel Gorman 			int os = overstep_mode * (t >> 1) ;
48875ef7184SMel Gorman 
48975ef7184SMel Gorman 			/* Overflow must be added to node counters */
49075ef7184SMel Gorman 			z = n + os;
49175ef7184SMel Gorman 			n = -os;
49275ef7184SMel Gorman 		}
49375ef7184SMel Gorman 	} while (this_cpu_cmpxchg(*p, o, n) != o);
49475ef7184SMel Gorman 
49575ef7184SMel Gorman 	if (z)
49675ef7184SMel Gorman 		node_page_state_add(z, pgdat, item);
49775ef7184SMel Gorman }
49875ef7184SMel Gorman 
49975ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
50075ef7184SMel Gorman 					long delta)
50175ef7184SMel Gorman {
50275ef7184SMel Gorman 	mod_node_state(pgdat, item, delta, 0);
50375ef7184SMel Gorman }
50475ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
50575ef7184SMel Gorman 
50675ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
50775ef7184SMel Gorman {
50875ef7184SMel Gorman 	mod_node_state(pgdat, item, 1, 1);
50975ef7184SMel Gorman }
51075ef7184SMel Gorman 
51175ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
51275ef7184SMel Gorman {
51375ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, 1, 1);
51475ef7184SMel Gorman }
51575ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
51675ef7184SMel Gorman 
51775ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
51875ef7184SMel Gorman {
51975ef7184SMel Gorman 	mod_node_state(page_pgdat(page), item, -1, -1);
52075ef7184SMel Gorman }
52175ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
5227c839120SChristoph Lameter #else
5237c839120SChristoph Lameter /*
5247c839120SChristoph Lameter  * Use interrupt disable to serialize counter updates
5257c839120SChristoph Lameter  */
5267c839120SChristoph Lameter void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
5276cdb18adSHeiko Carstens 			 long delta)
5287c839120SChristoph Lameter {
5297c839120SChristoph Lameter 	unsigned long flags;
5307c839120SChristoph Lameter 
5317c839120SChristoph Lameter 	local_irq_save(flags);
5327c839120SChristoph Lameter 	__mod_zone_page_state(zone, item, delta);
5337c839120SChristoph Lameter 	local_irq_restore(flags);
5347c839120SChristoph Lameter }
5357c839120SChristoph Lameter EXPORT_SYMBOL(mod_zone_page_state);
5367c839120SChristoph Lameter 
5372244b95aSChristoph Lameter void inc_zone_page_state(struct page *page, enum zone_stat_item item)
5382244b95aSChristoph Lameter {
5392244b95aSChristoph Lameter 	unsigned long flags;
5402244b95aSChristoph Lameter 	struct zone *zone;
5412244b95aSChristoph Lameter 
5422244b95aSChristoph Lameter 	zone = page_zone(page);
5432244b95aSChristoph Lameter 	local_irq_save(flags);
544ca889e6cSChristoph Lameter 	__inc_zone_state(zone, item);
5452244b95aSChristoph Lameter 	local_irq_restore(flags);
5462244b95aSChristoph Lameter }
5472244b95aSChristoph Lameter EXPORT_SYMBOL(inc_zone_page_state);
5482244b95aSChristoph Lameter 
5492244b95aSChristoph Lameter void dec_zone_page_state(struct page *page, enum zone_stat_item item)
5502244b95aSChristoph Lameter {
5512244b95aSChristoph Lameter 	unsigned long flags;
5522244b95aSChristoph Lameter 
5532244b95aSChristoph Lameter 	local_irq_save(flags);
554a302eb4eSChristoph Lameter 	__dec_zone_page_state(page, item);
5552244b95aSChristoph Lameter 	local_irq_restore(flags);
5562244b95aSChristoph Lameter }
5572244b95aSChristoph Lameter EXPORT_SYMBOL(dec_zone_page_state);
5582244b95aSChristoph Lameter 
55975ef7184SMel Gorman void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
56075ef7184SMel Gorman {
56175ef7184SMel Gorman 	unsigned long flags;
56275ef7184SMel Gorman 
56375ef7184SMel Gorman 	local_irq_save(flags);
56475ef7184SMel Gorman 	__inc_node_state(pgdat, item);
56575ef7184SMel Gorman 	local_irq_restore(flags);
56675ef7184SMel Gorman }
56775ef7184SMel Gorman EXPORT_SYMBOL(inc_node_state);
56875ef7184SMel Gorman 
56975ef7184SMel Gorman void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
57075ef7184SMel Gorman 					long delta)
57175ef7184SMel Gorman {
57275ef7184SMel Gorman 	unsigned long flags;
57375ef7184SMel Gorman 
57475ef7184SMel Gorman 	local_irq_save(flags);
57575ef7184SMel Gorman 	__mod_node_page_state(pgdat, item, delta);
57675ef7184SMel Gorman 	local_irq_restore(flags);
57775ef7184SMel Gorman }
57875ef7184SMel Gorman EXPORT_SYMBOL(mod_node_page_state);
57975ef7184SMel Gorman 
58075ef7184SMel Gorman void inc_node_page_state(struct page *page, enum node_stat_item item)
58175ef7184SMel Gorman {
58275ef7184SMel Gorman 	unsigned long flags;
58375ef7184SMel Gorman 	struct pglist_data *pgdat;
58475ef7184SMel Gorman 
58575ef7184SMel Gorman 	pgdat = page_pgdat(page);
58675ef7184SMel Gorman 	local_irq_save(flags);
58775ef7184SMel Gorman 	__inc_node_state(pgdat, item);
58875ef7184SMel Gorman 	local_irq_restore(flags);
58975ef7184SMel Gorman }
59075ef7184SMel Gorman EXPORT_SYMBOL(inc_node_page_state);
59175ef7184SMel Gorman 
59275ef7184SMel Gorman void dec_node_page_state(struct page *page, enum node_stat_item item)
59375ef7184SMel Gorman {
59475ef7184SMel Gorman 	unsigned long flags;
59575ef7184SMel Gorman 
59675ef7184SMel Gorman 	local_irq_save(flags);
59775ef7184SMel Gorman 	__dec_node_page_state(page, item);
59875ef7184SMel Gorman 	local_irq_restore(flags);
59975ef7184SMel Gorman }
60075ef7184SMel Gorman EXPORT_SYMBOL(dec_node_page_state);
60175ef7184SMel Gorman #endif
6027cc36bbdSChristoph Lameter 
6037cc36bbdSChristoph Lameter /*
6047cc36bbdSChristoph Lameter  * Fold a differential into the global counters.
6057cc36bbdSChristoph Lameter  * Returns the number of counters updated.
6067cc36bbdSChristoph Lameter  */
60775ef7184SMel Gorman static int fold_diff(int *zone_diff, int *node_diff)
6084edb0748SChristoph Lameter {
6094edb0748SChristoph Lameter 	int i;
6107cc36bbdSChristoph Lameter 	int changes = 0;
6114edb0748SChristoph Lameter 
6124edb0748SChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
61375ef7184SMel Gorman 		if (zone_diff[i]) {
61475ef7184SMel Gorman 			atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
61575ef7184SMel Gorman 			changes++;
61675ef7184SMel Gorman 	}
61775ef7184SMel Gorman 
61875ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
61975ef7184SMel Gorman 		if (node_diff[i]) {
62075ef7184SMel Gorman 			atomic_long_add(node_diff[i], &vm_node_stat[i]);
6217cc36bbdSChristoph Lameter 			changes++;
6227cc36bbdSChristoph Lameter 	}
6237cc36bbdSChristoph Lameter 	return changes;
6244edb0748SChristoph Lameter }
6254edb0748SChristoph Lameter 
6262244b95aSChristoph Lameter /*
6272bb921e5SChristoph Lameter  * Update the zone counters for the current cpu.
628a7f75e25SChristoph Lameter  *
6294037d452SChristoph Lameter  * Note that refresh_cpu_vm_stats strives to only access
6304037d452SChristoph Lameter  * node local memory. The per cpu pagesets on remote zones are placed
6314037d452SChristoph Lameter  * in the memory local to the processor using that pageset. So the
6324037d452SChristoph Lameter  * loop over all zones will access a series of cachelines local to
6334037d452SChristoph Lameter  * the processor.
6344037d452SChristoph Lameter  *
6354037d452SChristoph Lameter  * The call to zone_page_state_add updates the cachelines with the
6364037d452SChristoph Lameter  * statistics in the remote zone struct as well as the global cachelines
6374037d452SChristoph Lameter  * with the global counters. These could cause remote node cache line
6384037d452SChristoph Lameter  * bouncing and will have to be only done when necessary.
6397cc36bbdSChristoph Lameter  *
6407cc36bbdSChristoph Lameter  * The function returns the number of global counters updated.
6412244b95aSChristoph Lameter  */
6420eb77e98SChristoph Lameter static int refresh_cpu_vm_stats(bool do_pagesets)
6432244b95aSChristoph Lameter {
64475ef7184SMel Gorman 	struct pglist_data *pgdat;
6452244b95aSChristoph Lameter 	struct zone *zone;
6462244b95aSChristoph Lameter 	int i;
64775ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
64875ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
6497cc36bbdSChristoph Lameter 	int changes = 0;
6502244b95aSChristoph Lameter 
651ee99c71cSKOSAKI Motohiro 	for_each_populated_zone(zone) {
652fbc2edb0SChristoph Lameter 		struct per_cpu_pageset __percpu *p = zone->pageset;
6532244b95aSChristoph Lameter 
654fbc2edb0SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
655a7f75e25SChristoph Lameter 			int v;
656a7f75e25SChristoph Lameter 
657fbc2edb0SChristoph Lameter 			v = this_cpu_xchg(p->vm_stat_diff[i], 0);
658fbc2edb0SChristoph Lameter 			if (v) {
659fbc2edb0SChristoph Lameter 
660a7f75e25SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
66175ef7184SMel Gorman 				global_zone_diff[i] += v;
6624037d452SChristoph Lameter #ifdef CONFIG_NUMA
6634037d452SChristoph Lameter 				/* 3 seconds idle till flush */
664fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 3);
6654037d452SChristoph Lameter #endif
6662244b95aSChristoph Lameter 			}
667fbc2edb0SChristoph Lameter 		}
6684037d452SChristoph Lameter #ifdef CONFIG_NUMA
6690eb77e98SChristoph Lameter 		if (do_pagesets) {
6700eb77e98SChristoph Lameter 			cond_resched();
6714037d452SChristoph Lameter 			/*
6724037d452SChristoph Lameter 			 * Deal with draining the remote pageset of this
6734037d452SChristoph Lameter 			 * processor
6744037d452SChristoph Lameter 			 *
6754037d452SChristoph Lameter 			 * Check if there are pages remaining in this pageset
6764037d452SChristoph Lameter 			 * if not then there is nothing to expire.
6774037d452SChristoph Lameter 			 */
678fbc2edb0SChristoph Lameter 			if (!__this_cpu_read(p->expire) ||
679fbc2edb0SChristoph Lameter 			       !__this_cpu_read(p->pcp.count))
6804037d452SChristoph Lameter 				continue;
6814037d452SChristoph Lameter 
6824037d452SChristoph Lameter 			/*
6834037d452SChristoph Lameter 			 * We never drain zones local to this processor.
6844037d452SChristoph Lameter 			 */
6854037d452SChristoph Lameter 			if (zone_to_nid(zone) == numa_node_id()) {
686fbc2edb0SChristoph Lameter 				__this_cpu_write(p->expire, 0);
6874037d452SChristoph Lameter 				continue;
6884037d452SChristoph Lameter 			}
6894037d452SChristoph Lameter 
690fbc2edb0SChristoph Lameter 			if (__this_cpu_dec_return(p->expire))
6914037d452SChristoph Lameter 				continue;
6924037d452SChristoph Lameter 
6937cc36bbdSChristoph Lameter 			if (__this_cpu_read(p->pcp.count)) {
6947c8e0181SChristoph Lameter 				drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
6957cc36bbdSChristoph Lameter 				changes++;
6967cc36bbdSChristoph Lameter 			}
6970eb77e98SChristoph Lameter 		}
6984037d452SChristoph Lameter #endif
6992244b95aSChristoph Lameter 	}
70075ef7184SMel Gorman 
70175ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
70275ef7184SMel Gorman 		struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
70375ef7184SMel Gorman 
70475ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
70575ef7184SMel Gorman 			int v;
70675ef7184SMel Gorman 
70775ef7184SMel Gorman 			v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
70875ef7184SMel Gorman 			if (v) {
70975ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
71075ef7184SMel Gorman 				global_node_diff[i] += v;
71175ef7184SMel Gorman 			}
71275ef7184SMel Gorman 		}
71375ef7184SMel Gorman 	}
71475ef7184SMel Gorman 
71575ef7184SMel Gorman 	changes += fold_diff(global_zone_diff, global_node_diff);
7167cc36bbdSChristoph Lameter 	return changes;
7172244b95aSChristoph Lameter }
7182244b95aSChristoph Lameter 
71940f4b1eaSCody P Schafer /*
7202bb921e5SChristoph Lameter  * Fold the data for an offline cpu into the global array.
7212bb921e5SChristoph Lameter  * There cannot be any access by the offline cpu and therefore
7222bb921e5SChristoph Lameter  * synchronization is simplified.
7232bb921e5SChristoph Lameter  */
7242bb921e5SChristoph Lameter void cpu_vm_stats_fold(int cpu)
7252bb921e5SChristoph Lameter {
72675ef7184SMel Gorman 	struct pglist_data *pgdat;
7272bb921e5SChristoph Lameter 	struct zone *zone;
7282bb921e5SChristoph Lameter 	int i;
72975ef7184SMel Gorman 	int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
73075ef7184SMel Gorman 	int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
7312bb921e5SChristoph Lameter 
7322bb921e5SChristoph Lameter 	for_each_populated_zone(zone) {
7332bb921e5SChristoph Lameter 		struct per_cpu_pageset *p;
7342bb921e5SChristoph Lameter 
7352bb921e5SChristoph Lameter 		p = per_cpu_ptr(zone->pageset, cpu);
7362bb921e5SChristoph Lameter 
7372bb921e5SChristoph Lameter 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
7382bb921e5SChristoph Lameter 			if (p->vm_stat_diff[i]) {
7392bb921e5SChristoph Lameter 				int v;
7402bb921e5SChristoph Lameter 
7412bb921e5SChristoph Lameter 				v = p->vm_stat_diff[i];
7422bb921e5SChristoph Lameter 				p->vm_stat_diff[i] = 0;
7432bb921e5SChristoph Lameter 				atomic_long_add(v, &zone->vm_stat[i]);
74475ef7184SMel Gorman 				global_zone_diff[i] += v;
7452bb921e5SChristoph Lameter 			}
7462bb921e5SChristoph Lameter 	}
7472bb921e5SChristoph Lameter 
74875ef7184SMel Gorman 	for_each_online_pgdat(pgdat) {
74975ef7184SMel Gorman 		struct per_cpu_nodestat *p;
75075ef7184SMel Gorman 
75175ef7184SMel Gorman 		p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
75275ef7184SMel Gorman 
75375ef7184SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
75475ef7184SMel Gorman 			if (p->vm_node_stat_diff[i]) {
75575ef7184SMel Gorman 				int v;
75675ef7184SMel Gorman 
75775ef7184SMel Gorman 				v = p->vm_node_stat_diff[i];
75875ef7184SMel Gorman 				p->vm_node_stat_diff[i] = 0;
75975ef7184SMel Gorman 				atomic_long_add(v, &pgdat->vm_stat[i]);
76075ef7184SMel Gorman 				global_node_diff[i] += v;
76175ef7184SMel Gorman 			}
76275ef7184SMel Gorman 	}
76375ef7184SMel Gorman 
76475ef7184SMel Gorman 	fold_diff(global_zone_diff, global_node_diff);
7652bb921e5SChristoph Lameter }
7662bb921e5SChristoph Lameter 
7672bb921e5SChristoph Lameter /*
76840f4b1eaSCody P Schafer  * this is only called if !populated_zone(zone), which implies no other users of
76940f4b1eaSCody P Schafer  * pset->vm_stat_diff[] exsist.
77040f4b1eaSCody P Schafer  */
7715a883813SMinchan Kim void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
7725a883813SMinchan Kim {
7735a883813SMinchan Kim 	int i;
7745a883813SMinchan Kim 
7755a883813SMinchan Kim 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
7765a883813SMinchan Kim 		if (pset->vm_stat_diff[i]) {
7775a883813SMinchan Kim 			int v = pset->vm_stat_diff[i];
7785a883813SMinchan Kim 			pset->vm_stat_diff[i] = 0;
7795a883813SMinchan Kim 			atomic_long_add(v, &zone->vm_stat[i]);
78075ef7184SMel Gorman 			atomic_long_add(v, &vm_zone_stat[i]);
7815a883813SMinchan Kim 		}
7825a883813SMinchan Kim }
7832244b95aSChristoph Lameter #endif
7842244b95aSChristoph Lameter 
785ca889e6cSChristoph Lameter #ifdef CONFIG_NUMA
786ca889e6cSChristoph Lameter /*
78775ef7184SMel Gorman  * Determine the per node value of a stat item. This function
78875ef7184SMel Gorman  * is called frequently in a NUMA machine, so try to be as
78975ef7184SMel Gorman  * frugal as possible.
790c2d42c16SAndrew Morton  */
79175ef7184SMel Gorman unsigned long sum_zone_node_page_state(int node,
79275ef7184SMel Gorman 				 enum zone_stat_item item)
793c2d42c16SAndrew Morton {
794c2d42c16SAndrew Morton 	struct zone *zones = NODE_DATA(node)->node_zones;
795e87d59f7SJoonsoo Kim 	int i;
796e87d59f7SJoonsoo Kim 	unsigned long count = 0;
797c2d42c16SAndrew Morton 
798e87d59f7SJoonsoo Kim 	for (i = 0; i < MAX_NR_ZONES; i++)
799e87d59f7SJoonsoo Kim 		count += zone_page_state(zones + i, item);
800e87d59f7SJoonsoo Kim 
801e87d59f7SJoonsoo Kim 	return count;
802c2d42c16SAndrew Morton }
803c2d42c16SAndrew Morton 
80475ef7184SMel Gorman /*
80575ef7184SMel Gorman  * Determine the per node value of a stat item.
80675ef7184SMel Gorman  */
80775ef7184SMel Gorman unsigned long node_page_state(struct pglist_data *pgdat,
80875ef7184SMel Gorman 				enum node_stat_item item)
80975ef7184SMel Gorman {
81075ef7184SMel Gorman 	long x = atomic_long_read(&pgdat->vm_stat[item]);
81175ef7184SMel Gorman #ifdef CONFIG_SMP
81275ef7184SMel Gorman 	if (x < 0)
81375ef7184SMel Gorman 		x = 0;
81475ef7184SMel Gorman #endif
81575ef7184SMel Gorman 	return x;
81675ef7184SMel Gorman }
817ca889e6cSChristoph Lameter #endif
818ca889e6cSChristoph Lameter 
819d7a5752cSMel Gorman #ifdef CONFIG_COMPACTION
82036deb0beSNamhyung Kim 
821d7a5752cSMel Gorman struct contig_page_info {
822d7a5752cSMel Gorman 	unsigned long free_pages;
823d7a5752cSMel Gorman 	unsigned long free_blocks_total;
824d7a5752cSMel Gorman 	unsigned long free_blocks_suitable;
825d7a5752cSMel Gorman };
826d7a5752cSMel Gorman 
827d7a5752cSMel Gorman /*
828d7a5752cSMel Gorman  * Calculate the number of free pages in a zone, how many contiguous
829d7a5752cSMel Gorman  * pages are free and how many are large enough to satisfy an allocation of
830d7a5752cSMel Gorman  * the target size. Note that this function makes no attempt to estimate
831d7a5752cSMel Gorman  * how many suitable free blocks there *might* be if MOVABLE pages were
832d7a5752cSMel Gorman  * migrated. Calculating that is possible, but expensive and can be
833d7a5752cSMel Gorman  * figured out from userspace
834d7a5752cSMel Gorman  */
835d7a5752cSMel Gorman static void fill_contig_page_info(struct zone *zone,
836d7a5752cSMel Gorman 				unsigned int suitable_order,
837d7a5752cSMel Gorman 				struct contig_page_info *info)
838d7a5752cSMel Gorman {
839d7a5752cSMel Gorman 	unsigned int order;
840d7a5752cSMel Gorman 
841d7a5752cSMel Gorman 	info->free_pages = 0;
842d7a5752cSMel Gorman 	info->free_blocks_total = 0;
843d7a5752cSMel Gorman 	info->free_blocks_suitable = 0;
844d7a5752cSMel Gorman 
845d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; order++) {
846d7a5752cSMel Gorman 		unsigned long blocks;
847d7a5752cSMel Gorman 
848d7a5752cSMel Gorman 		/* Count number of free blocks */
849d7a5752cSMel Gorman 		blocks = zone->free_area[order].nr_free;
850d7a5752cSMel Gorman 		info->free_blocks_total += blocks;
851d7a5752cSMel Gorman 
852d7a5752cSMel Gorman 		/* Count free base pages */
853d7a5752cSMel Gorman 		info->free_pages += blocks << order;
854d7a5752cSMel Gorman 
855d7a5752cSMel Gorman 		/* Count the suitable free blocks */
856d7a5752cSMel Gorman 		if (order >= suitable_order)
857d7a5752cSMel Gorman 			info->free_blocks_suitable += blocks <<
858d7a5752cSMel Gorman 						(order - suitable_order);
859d7a5752cSMel Gorman 	}
860d7a5752cSMel Gorman }
861f1a5ab12SMel Gorman 
862f1a5ab12SMel Gorman /*
863f1a5ab12SMel Gorman  * A fragmentation index only makes sense if an allocation of a requested
864f1a5ab12SMel Gorman  * size would fail. If that is true, the fragmentation index indicates
865f1a5ab12SMel Gorman  * whether external fragmentation or a lack of memory was the problem.
866f1a5ab12SMel Gorman  * The value can be used to determine if page reclaim or compaction
867f1a5ab12SMel Gorman  * should be used
868f1a5ab12SMel Gorman  */
86956de7263SMel Gorman static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
870f1a5ab12SMel Gorman {
871f1a5ab12SMel Gorman 	unsigned long requested = 1UL << order;
872f1a5ab12SMel Gorman 
87388d6ac40SWen Yang 	if (WARN_ON_ONCE(order >= MAX_ORDER))
87488d6ac40SWen Yang 		return 0;
87588d6ac40SWen Yang 
876f1a5ab12SMel Gorman 	if (!info->free_blocks_total)
877f1a5ab12SMel Gorman 		return 0;
878f1a5ab12SMel Gorman 
879f1a5ab12SMel Gorman 	/* Fragmentation index only makes sense when a request would fail */
880f1a5ab12SMel Gorman 	if (info->free_blocks_suitable)
881f1a5ab12SMel Gorman 		return -1000;
882f1a5ab12SMel Gorman 
883f1a5ab12SMel Gorman 	/*
884f1a5ab12SMel Gorman 	 * Index is between 0 and 1 so return within 3 decimal places
885f1a5ab12SMel Gorman 	 *
886f1a5ab12SMel Gorman 	 * 0 => allocation would fail due to lack of memory
887f1a5ab12SMel Gorman 	 * 1 => allocation would fail due to fragmentation
888f1a5ab12SMel Gorman 	 */
889f1a5ab12SMel Gorman 	return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
890f1a5ab12SMel Gorman }
89156de7263SMel Gorman 
89256de7263SMel Gorman /* Same as __fragmentation index but allocs contig_page_info on stack */
89356de7263SMel Gorman int fragmentation_index(struct zone *zone, unsigned int order)
89456de7263SMel Gorman {
89556de7263SMel Gorman 	struct contig_page_info info;
89656de7263SMel Gorman 
89756de7263SMel Gorman 	fill_contig_page_info(zone, order, &info);
89856de7263SMel Gorman 	return __fragmentation_index(order, &info);
89956de7263SMel Gorman }
900d7a5752cSMel Gorman #endif
901d7a5752cSMel Gorman 
9020d6617c7SDavid Rientjes #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
903fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA
904fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx) xx "_dma",
905fa25c503SKOSAKI Motohiro #else
906fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA(xx)
907fa25c503SKOSAKI Motohiro #endif
908fa25c503SKOSAKI Motohiro 
909fa25c503SKOSAKI Motohiro #ifdef CONFIG_ZONE_DMA32
910fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx) xx "_dma32",
911fa25c503SKOSAKI Motohiro #else
912fa25c503SKOSAKI Motohiro #define TEXT_FOR_DMA32(xx)
913fa25c503SKOSAKI Motohiro #endif
914fa25c503SKOSAKI Motohiro 
915fa25c503SKOSAKI Motohiro #ifdef CONFIG_HIGHMEM
916fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx) xx "_high",
917fa25c503SKOSAKI Motohiro #else
918fa25c503SKOSAKI Motohiro #define TEXT_FOR_HIGHMEM(xx)
919fa25c503SKOSAKI Motohiro #endif
920fa25c503SKOSAKI Motohiro 
921fa25c503SKOSAKI Motohiro #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
922fa25c503SKOSAKI Motohiro 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
923fa25c503SKOSAKI Motohiro 
924fa25c503SKOSAKI Motohiro const char * const vmstat_text[] = {
92509316c09SKonstantin Khlebnikov 	/* enum zone_stat_item countes */
926fa25c503SKOSAKI Motohiro 	"nr_free_pages",
92771c799f4SMinchan Kim 	"nr_zone_inactive_anon",
92871c799f4SMinchan Kim 	"nr_zone_active_anon",
92971c799f4SMinchan Kim 	"nr_zone_inactive_file",
93071c799f4SMinchan Kim 	"nr_zone_active_file",
93171c799f4SMinchan Kim 	"nr_zone_unevictable",
9325a1c84b4SMel Gorman 	"nr_zone_write_pending",
933fa25c503SKOSAKI Motohiro 	"nr_mlock",
934fa25c503SKOSAKI Motohiro 	"nr_page_table_pages",
935fa25c503SKOSAKI Motohiro 	"nr_kernel_stack",
936fa25c503SKOSAKI Motohiro 	"nr_bounce",
93791537feeSMinchan Kim #if IS_ENABLED(CONFIG_ZSMALLOC)
93891537feeSMinchan Kim 	"nr_zspages",
93991537feeSMinchan Kim #endif
940fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
941fa25c503SKOSAKI Motohiro 	"numa_hit",
942fa25c503SKOSAKI Motohiro 	"numa_miss",
943fa25c503SKOSAKI Motohiro 	"numa_foreign",
944fa25c503SKOSAKI Motohiro 	"numa_interleave",
945fa25c503SKOSAKI Motohiro 	"numa_local",
946fa25c503SKOSAKI Motohiro 	"numa_other",
947fa25c503SKOSAKI Motohiro #endif
948d1ce749aSBartlomiej Zolnierkiewicz 	"nr_free_cma",
94909316c09SKonstantin Khlebnikov 
950599d0c95SMel Gorman 	/* Node-based counters */
951599d0c95SMel Gorman 	"nr_inactive_anon",
952599d0c95SMel Gorman 	"nr_active_anon",
953599d0c95SMel Gorman 	"nr_inactive_file",
954599d0c95SMel Gorman 	"nr_active_file",
955599d0c95SMel Gorman 	"nr_unevictable",
956385386cfSJohannes Weiner 	"nr_slab_reclaimable",
957385386cfSJohannes Weiner 	"nr_slab_unreclaimable",
958599d0c95SMel Gorman 	"nr_isolated_anon",
959599d0c95SMel Gorman 	"nr_isolated_file",
9601e6b1085SMel Gorman 	"workingset_refault",
9611e6b1085SMel Gorman 	"workingset_activate",
9621e6b1085SMel Gorman 	"workingset_nodereclaim",
96350658e2eSMel Gorman 	"nr_anon_pages",
96450658e2eSMel Gorman 	"nr_mapped",
96511fb9989SMel Gorman 	"nr_file_pages",
96611fb9989SMel Gorman 	"nr_dirty",
96711fb9989SMel Gorman 	"nr_writeback",
96811fb9989SMel Gorman 	"nr_writeback_temp",
96911fb9989SMel Gorman 	"nr_shmem",
97011fb9989SMel Gorman 	"nr_shmem_hugepages",
97111fb9989SMel Gorman 	"nr_shmem_pmdmapped",
97211fb9989SMel Gorman 	"nr_anon_transparent_hugepages",
97311fb9989SMel Gorman 	"nr_unstable",
974c4a25635SMel Gorman 	"nr_vmscan_write",
975c4a25635SMel Gorman 	"nr_vmscan_immediate_reclaim",
976c4a25635SMel Gorman 	"nr_dirtied",
977c4a25635SMel Gorman 	"nr_written",
978599d0c95SMel Gorman 
97909316c09SKonstantin Khlebnikov 	/* enum writeback_stat_item counters */
980fa25c503SKOSAKI Motohiro 	"nr_dirty_threshold",
981fa25c503SKOSAKI Motohiro 	"nr_dirty_background_threshold",
982fa25c503SKOSAKI Motohiro 
983fa25c503SKOSAKI Motohiro #ifdef CONFIG_VM_EVENT_COUNTERS
98409316c09SKonstantin Khlebnikov 	/* enum vm_event_item counters */
985fa25c503SKOSAKI Motohiro 	"pgpgin",
986fa25c503SKOSAKI Motohiro 	"pgpgout",
987fa25c503SKOSAKI Motohiro 	"pswpin",
988fa25c503SKOSAKI Motohiro 	"pswpout",
989fa25c503SKOSAKI Motohiro 
990fa25c503SKOSAKI Motohiro 	TEXTS_FOR_ZONES("pgalloc")
9917cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("allocstall")
9927cc30fcfSMel Gorman 	TEXTS_FOR_ZONES("pgskip")
993fa25c503SKOSAKI Motohiro 
994fa25c503SKOSAKI Motohiro 	"pgfree",
995fa25c503SKOSAKI Motohiro 	"pgactivate",
996fa25c503SKOSAKI Motohiro 	"pgdeactivate",
997f7ad2a6cSShaohua Li 	"pglazyfree",
998fa25c503SKOSAKI Motohiro 
999fa25c503SKOSAKI Motohiro 	"pgfault",
1000fa25c503SKOSAKI Motohiro 	"pgmajfault",
1001854e9ed0SMinchan Kim 	"pglazyfreed",
1002fa25c503SKOSAKI Motohiro 
1003599d0c95SMel Gorman 	"pgrefill",
1004599d0c95SMel Gorman 	"pgsteal_kswapd",
1005599d0c95SMel Gorman 	"pgsteal_direct",
1006599d0c95SMel Gorman 	"pgscan_kswapd",
1007599d0c95SMel Gorman 	"pgscan_direct",
100868243e76SMel Gorman 	"pgscan_direct_throttle",
1009fa25c503SKOSAKI Motohiro 
1010fa25c503SKOSAKI Motohiro #ifdef CONFIG_NUMA
1011fa25c503SKOSAKI Motohiro 	"zone_reclaim_failed",
1012fa25c503SKOSAKI Motohiro #endif
1013fa25c503SKOSAKI Motohiro 	"pginodesteal",
1014fa25c503SKOSAKI Motohiro 	"slabs_scanned",
1015fa25c503SKOSAKI Motohiro 	"kswapd_inodesteal",
1016fa25c503SKOSAKI Motohiro 	"kswapd_low_wmark_hit_quickly",
1017fa25c503SKOSAKI Motohiro 	"kswapd_high_wmark_hit_quickly",
1018fa25c503SKOSAKI Motohiro 	"pageoutrun",
1019fa25c503SKOSAKI Motohiro 
1020fa25c503SKOSAKI Motohiro 	"pgrotated",
1021fa25c503SKOSAKI Motohiro 
10225509a5d2SDave Hansen 	"drop_pagecache",
10235509a5d2SDave Hansen 	"drop_slab",
10248e675f7aSKonstantin Khlebnikov 	"oom_kill",
10255509a5d2SDave Hansen 
102603c5a6e1SMel Gorman #ifdef CONFIG_NUMA_BALANCING
102703c5a6e1SMel Gorman 	"numa_pte_updates",
102872403b4aSMel Gorman 	"numa_huge_pte_updates",
102903c5a6e1SMel Gorman 	"numa_hint_faults",
103003c5a6e1SMel Gorman 	"numa_hint_faults_local",
103103c5a6e1SMel Gorman 	"numa_pages_migrated",
103203c5a6e1SMel Gorman #endif
10335647bc29SMel Gorman #ifdef CONFIG_MIGRATION
10345647bc29SMel Gorman 	"pgmigrate_success",
10355647bc29SMel Gorman 	"pgmigrate_fail",
10365647bc29SMel Gorman #endif
1037fa25c503SKOSAKI Motohiro #ifdef CONFIG_COMPACTION
1038397487dbSMel Gorman 	"compact_migrate_scanned",
1039397487dbSMel Gorman 	"compact_free_scanned",
1040397487dbSMel Gorman 	"compact_isolated",
1041fa25c503SKOSAKI Motohiro 	"compact_stall",
1042fa25c503SKOSAKI Motohiro 	"compact_fail",
1043fa25c503SKOSAKI Motohiro 	"compact_success",
1044698b1b30SVlastimil Babka 	"compact_daemon_wake",
10457f354a54SDavid Rientjes 	"compact_daemon_migrate_scanned",
10467f354a54SDavid Rientjes 	"compact_daemon_free_scanned",
1047fa25c503SKOSAKI Motohiro #endif
1048fa25c503SKOSAKI Motohiro 
1049fa25c503SKOSAKI Motohiro #ifdef CONFIG_HUGETLB_PAGE
1050fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_success",
1051fa25c503SKOSAKI Motohiro 	"htlb_buddy_alloc_fail",
1052fa25c503SKOSAKI Motohiro #endif
1053fa25c503SKOSAKI Motohiro 	"unevictable_pgs_culled",
1054fa25c503SKOSAKI Motohiro 	"unevictable_pgs_scanned",
1055fa25c503SKOSAKI Motohiro 	"unevictable_pgs_rescued",
1056fa25c503SKOSAKI Motohiro 	"unevictable_pgs_mlocked",
1057fa25c503SKOSAKI Motohiro 	"unevictable_pgs_munlocked",
1058fa25c503SKOSAKI Motohiro 	"unevictable_pgs_cleared",
1059fa25c503SKOSAKI Motohiro 	"unevictable_pgs_stranded",
1060fa25c503SKOSAKI Motohiro 
1061fa25c503SKOSAKI Motohiro #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1062fa25c503SKOSAKI Motohiro 	"thp_fault_alloc",
1063fa25c503SKOSAKI Motohiro 	"thp_fault_fallback",
1064fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc",
1065fa25c503SKOSAKI Motohiro 	"thp_collapse_alloc_failed",
106695ecedcdSKirill A. Shutemov 	"thp_file_alloc",
106795ecedcdSKirill A. Shutemov 	"thp_file_mapped",
1068122afea9SKirill A. Shutemov 	"thp_split_page",
1069122afea9SKirill A. Shutemov 	"thp_split_page_failed",
1070f9719a03SKirill A. Shutemov 	"thp_deferred_split_page",
1071122afea9SKirill A. Shutemov 	"thp_split_pmd",
1072ce9311cfSYisheng Xie #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1073ce9311cfSYisheng Xie 	"thp_split_pud",
1074ce9311cfSYisheng Xie #endif
1075d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc",
1076d8a8e1f0SKirill A. Shutemov 	"thp_zero_page_alloc_failed",
1077225311a4SHuang Ying 	"thp_swpout",
1078fe490cc0SHuang Ying 	"thp_swpout_fallback",
1079fa25c503SKOSAKI Motohiro #endif
108009316c09SKonstantin Khlebnikov #ifdef CONFIG_MEMORY_BALLOON
108109316c09SKonstantin Khlebnikov 	"balloon_inflate",
108209316c09SKonstantin Khlebnikov 	"balloon_deflate",
108309316c09SKonstantin Khlebnikov #ifdef CONFIG_BALLOON_COMPACTION
108409316c09SKonstantin Khlebnikov 	"balloon_migrate",
108509316c09SKonstantin Khlebnikov #endif
108609316c09SKonstantin Khlebnikov #endif /* CONFIG_MEMORY_BALLOON */
1087ec659934SMel Gorman #ifdef CONFIG_DEBUG_TLBFLUSH
10886df46865SDave Hansen #ifdef CONFIG_SMP
10899824cf97SDave Hansen 	"nr_tlb_remote_flush",
10909824cf97SDave Hansen 	"nr_tlb_remote_flush_received",
1091ec659934SMel Gorman #endif /* CONFIG_SMP */
10929824cf97SDave Hansen 	"nr_tlb_local_flush_all",
10939824cf97SDave Hansen 	"nr_tlb_local_flush_one",
1094ec659934SMel Gorman #endif /* CONFIG_DEBUG_TLBFLUSH */
1095fa25c503SKOSAKI Motohiro 
10964f115147SDavidlohr Bueso #ifdef CONFIG_DEBUG_VM_VMACACHE
10974f115147SDavidlohr Bueso 	"vmacache_find_calls",
10984f115147SDavidlohr Bueso 	"vmacache_find_hits",
1099f5f302e2SDavidlohr Bueso 	"vmacache_full_flushes",
11004f115147SDavidlohr Bueso #endif
1101*cbc65df2SHuang Ying #ifdef CONFIG_SWAP
1102*cbc65df2SHuang Ying 	"swap_ra",
1103*cbc65df2SHuang Ying 	"swap_ra_hit",
1104*cbc65df2SHuang Ying #endif
1105fa25c503SKOSAKI Motohiro #endif /* CONFIG_VM_EVENTS_COUNTERS */
1106fa25c503SKOSAKI Motohiro };
11070d6617c7SDavid Rientjes #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
1108fa25c503SKOSAKI Motohiro 
1109fa25c503SKOSAKI Motohiro 
11103c486871SAndrew Morton #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
11113c486871SAndrew Morton      defined(CONFIG_PROC_FS)
11123c486871SAndrew Morton static void *frag_start(struct seq_file *m, loff_t *pos)
11133c486871SAndrew Morton {
11143c486871SAndrew Morton 	pg_data_t *pgdat;
11153c486871SAndrew Morton 	loff_t node = *pos;
11163c486871SAndrew Morton 
11173c486871SAndrew Morton 	for (pgdat = first_online_pgdat();
11183c486871SAndrew Morton 	     pgdat && node;
11193c486871SAndrew Morton 	     pgdat = next_online_pgdat(pgdat))
11203c486871SAndrew Morton 		--node;
11213c486871SAndrew Morton 
11223c486871SAndrew Morton 	return pgdat;
11233c486871SAndrew Morton }
11243c486871SAndrew Morton 
11253c486871SAndrew Morton static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
11263c486871SAndrew Morton {
11273c486871SAndrew Morton 	pg_data_t *pgdat = (pg_data_t *)arg;
11283c486871SAndrew Morton 
11293c486871SAndrew Morton 	(*pos)++;
11303c486871SAndrew Morton 	return next_online_pgdat(pgdat);
11313c486871SAndrew Morton }
11323c486871SAndrew Morton 
11333c486871SAndrew Morton static void frag_stop(struct seq_file *m, void *arg)
11343c486871SAndrew Morton {
11353c486871SAndrew Morton }
11363c486871SAndrew Morton 
1137b2bd8598SDavid Rientjes /*
1138b2bd8598SDavid Rientjes  * Walk zones in a node and print using a callback.
1139b2bd8598SDavid Rientjes  * If @assert_populated is true, only use callback for zones that are populated.
1140b2bd8598SDavid Rientjes  */
11413c486871SAndrew Morton static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1142727c080fSVinayak Menon 		bool assert_populated, bool nolock,
11433c486871SAndrew Morton 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
11443c486871SAndrew Morton {
11453c486871SAndrew Morton 	struct zone *zone;
11463c486871SAndrew Morton 	struct zone *node_zones = pgdat->node_zones;
11473c486871SAndrew Morton 	unsigned long flags;
11483c486871SAndrew Morton 
11493c486871SAndrew Morton 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1150b2bd8598SDavid Rientjes 		if (assert_populated && !populated_zone(zone))
11513c486871SAndrew Morton 			continue;
11523c486871SAndrew Morton 
1153727c080fSVinayak Menon 		if (!nolock)
11543c486871SAndrew Morton 			spin_lock_irqsave(&zone->lock, flags);
11553c486871SAndrew Morton 		print(m, pgdat, zone);
1156727c080fSVinayak Menon 		if (!nolock)
11573c486871SAndrew Morton 			spin_unlock_irqrestore(&zone->lock, flags);
11583c486871SAndrew Morton 	}
11593c486871SAndrew Morton }
11603c486871SAndrew Morton #endif
11613c486871SAndrew Morton 
1162d7a5752cSMel Gorman #ifdef CONFIG_PROC_FS
1163467c996cSMel Gorman static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1164467c996cSMel Gorman 						struct zone *zone)
1165467c996cSMel Gorman {
1166467c996cSMel Gorman 	int order;
1167467c996cSMel Gorman 
1168f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1169f6ac2354SChristoph Lameter 	for (order = 0; order < MAX_ORDER; ++order)
1170f6ac2354SChristoph Lameter 		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1171f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1172f6ac2354SChristoph Lameter }
1173467c996cSMel Gorman 
1174467c996cSMel Gorman /*
1175467c996cSMel Gorman  * This walks the free areas for each zone.
1176467c996cSMel Gorman  */
1177467c996cSMel Gorman static int frag_show(struct seq_file *m, void *arg)
1178467c996cSMel Gorman {
1179467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1180727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1181467c996cSMel Gorman 	return 0;
1182467c996cSMel Gorman }
1183467c996cSMel Gorman 
1184467c996cSMel Gorman static void pagetypeinfo_showfree_print(struct seq_file *m,
1185467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1186467c996cSMel Gorman {
1187467c996cSMel Gorman 	int order, mtype;
1188467c996cSMel Gorman 
1189467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1190467c996cSMel Gorman 		seq_printf(m, "Node %4d, zone %8s, type %12s ",
1191467c996cSMel Gorman 					pgdat->node_id,
1192467c996cSMel Gorman 					zone->name,
1193467c996cSMel Gorman 					migratetype_names[mtype]);
1194467c996cSMel Gorman 		for (order = 0; order < MAX_ORDER; ++order) {
1195467c996cSMel Gorman 			unsigned long freecount = 0;
1196467c996cSMel Gorman 			struct free_area *area;
1197467c996cSMel Gorman 			struct list_head *curr;
1198467c996cSMel Gorman 
1199467c996cSMel Gorman 			area = &(zone->free_area[order]);
1200467c996cSMel Gorman 
1201467c996cSMel Gorman 			list_for_each(curr, &area->free_list[mtype])
1202467c996cSMel Gorman 				freecount++;
1203467c996cSMel Gorman 			seq_printf(m, "%6lu ", freecount);
1204467c996cSMel Gorman 		}
1205467c996cSMel Gorman 		seq_putc(m, '\n');
1206467c996cSMel Gorman 	}
1207467c996cSMel Gorman }
1208467c996cSMel Gorman 
1209467c996cSMel Gorman /* Print out the free pages at each order for each migatetype */
1210467c996cSMel Gorman static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1211467c996cSMel Gorman {
1212467c996cSMel Gorman 	int order;
1213467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1214467c996cSMel Gorman 
1215467c996cSMel Gorman 	/* Print header */
1216467c996cSMel Gorman 	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1217467c996cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order)
1218467c996cSMel Gorman 		seq_printf(m, "%6d ", order);
1219467c996cSMel Gorman 	seq_putc(m, '\n');
1220467c996cSMel Gorman 
1221727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1222467c996cSMel Gorman 
1223467c996cSMel Gorman 	return 0;
1224467c996cSMel Gorman }
1225467c996cSMel Gorman 
1226467c996cSMel Gorman static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1227467c996cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1228467c996cSMel Gorman {
1229467c996cSMel Gorman 	int mtype;
1230467c996cSMel Gorman 	unsigned long pfn;
1231467c996cSMel Gorman 	unsigned long start_pfn = zone->zone_start_pfn;
1232108bcc96SCody P Schafer 	unsigned long end_pfn = zone_end_pfn(zone);
1233467c996cSMel Gorman 	unsigned long count[MIGRATE_TYPES] = { 0, };
1234467c996cSMel Gorman 
1235467c996cSMel Gorman 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1236467c996cSMel Gorman 		struct page *page;
1237467c996cSMel Gorman 
1238d336e94eSMichal Hocko 		page = pfn_to_online_page(pfn);
1239d336e94eSMichal Hocko 		if (!page)
1240467c996cSMel Gorman 			continue;
1241467c996cSMel Gorman 
1242eb33575cSMel Gorman 		/* Watch for unexpected holes punched in the memmap */
1243eb33575cSMel Gorman 		if (!memmap_valid_within(pfn, page, zone))
1244e80d6a24SMel Gorman 			continue;
1245eb33575cSMel Gorman 
1246a91c43c7SJoonsoo Kim 		if (page_zone(page) != zone)
1247a91c43c7SJoonsoo Kim 			continue;
1248a91c43c7SJoonsoo Kim 
1249467c996cSMel Gorman 		mtype = get_pageblock_migratetype(page);
1250467c996cSMel Gorman 
1251e80d6a24SMel Gorman 		if (mtype < MIGRATE_TYPES)
1252467c996cSMel Gorman 			count[mtype]++;
1253467c996cSMel Gorman 	}
1254467c996cSMel Gorman 
1255467c996cSMel Gorman 	/* Print counts */
1256467c996cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1257467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1258467c996cSMel Gorman 		seq_printf(m, "%12lu ", count[mtype]);
1259467c996cSMel Gorman 	seq_putc(m, '\n');
1260467c996cSMel Gorman }
1261467c996cSMel Gorman 
1262f113e641SSeongJae Park /* Print out the number of pageblocks for each migratetype */
1263467c996cSMel Gorman static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1264467c996cSMel Gorman {
1265467c996cSMel Gorman 	int mtype;
1266467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1267467c996cSMel Gorman 
1268467c996cSMel Gorman 	seq_printf(m, "\n%-23s", "Number of blocks type ");
1269467c996cSMel Gorman 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1270467c996cSMel Gorman 		seq_printf(m, "%12s ", migratetype_names[mtype]);
1271467c996cSMel Gorman 	seq_putc(m, '\n');
1272727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false,
1273727c080fSVinayak Menon 		pagetypeinfo_showblockcount_print);
1274467c996cSMel Gorman 
1275467c996cSMel Gorman 	return 0;
1276467c996cSMel Gorman }
1277467c996cSMel Gorman 
127848c96a36SJoonsoo Kim /*
127948c96a36SJoonsoo Kim  * Print out the number of pageblocks for each migratetype that contain pages
128048c96a36SJoonsoo Kim  * of other types. This gives an indication of how well fallbacks are being
128148c96a36SJoonsoo Kim  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
128248c96a36SJoonsoo Kim  * to determine what is going on
128348c96a36SJoonsoo Kim  */
128448c96a36SJoonsoo Kim static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
128548c96a36SJoonsoo Kim {
128648c96a36SJoonsoo Kim #ifdef CONFIG_PAGE_OWNER
128748c96a36SJoonsoo Kim 	int mtype;
128848c96a36SJoonsoo Kim 
12897dd80b8aSVlastimil Babka 	if (!static_branch_unlikely(&page_owner_inited))
129048c96a36SJoonsoo Kim 		return;
129148c96a36SJoonsoo Kim 
129248c96a36SJoonsoo Kim 	drain_all_pages(NULL);
129348c96a36SJoonsoo Kim 
129448c96a36SJoonsoo Kim 	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
129548c96a36SJoonsoo Kim 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
129648c96a36SJoonsoo Kim 		seq_printf(m, "%12s ", migratetype_names[mtype]);
129748c96a36SJoonsoo Kim 	seq_putc(m, '\n');
129848c96a36SJoonsoo Kim 
1299727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, true,
1300727c080fSVinayak Menon 		pagetypeinfo_showmixedcount_print);
130148c96a36SJoonsoo Kim #endif /* CONFIG_PAGE_OWNER */
130248c96a36SJoonsoo Kim }
130348c96a36SJoonsoo Kim 
1304467c996cSMel Gorman /*
1305467c996cSMel Gorman  * This prints out statistics in relation to grouping pages by mobility.
1306467c996cSMel Gorman  * It is expensive to collect so do not constantly read the file.
1307467c996cSMel Gorman  */
1308467c996cSMel Gorman static int pagetypeinfo_show(struct seq_file *m, void *arg)
1309467c996cSMel Gorman {
1310467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1311467c996cSMel Gorman 
131241b25a37SKOSAKI Motohiro 	/* check memoryless node */
1313a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
131441b25a37SKOSAKI Motohiro 		return 0;
131541b25a37SKOSAKI Motohiro 
1316467c996cSMel Gorman 	seq_printf(m, "Page block order: %d\n", pageblock_order);
1317467c996cSMel Gorman 	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1318467c996cSMel Gorman 	seq_putc(m, '\n');
1319467c996cSMel Gorman 	pagetypeinfo_showfree(m, pgdat);
1320467c996cSMel Gorman 	pagetypeinfo_showblockcount(m, pgdat);
132148c96a36SJoonsoo Kim 	pagetypeinfo_showmixedcount(m, pgdat);
1322467c996cSMel Gorman 
1323f6ac2354SChristoph Lameter 	return 0;
1324f6ac2354SChristoph Lameter }
1325f6ac2354SChristoph Lameter 
13268f32f7e5SAlexey Dobriyan static const struct seq_operations fragmentation_op = {
1327f6ac2354SChristoph Lameter 	.start	= frag_start,
1328f6ac2354SChristoph Lameter 	.next	= frag_next,
1329f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1330f6ac2354SChristoph Lameter 	.show	= frag_show,
1331f6ac2354SChristoph Lameter };
1332f6ac2354SChristoph Lameter 
13338f32f7e5SAlexey Dobriyan static int fragmentation_open(struct inode *inode, struct file *file)
13348f32f7e5SAlexey Dobriyan {
13358f32f7e5SAlexey Dobriyan 	return seq_open(file, &fragmentation_op);
13368f32f7e5SAlexey Dobriyan }
13378f32f7e5SAlexey Dobriyan 
13389d85e15fSAnshuman Khandual static const struct file_operations buddyinfo_file_operations = {
13398f32f7e5SAlexey Dobriyan 	.open		= fragmentation_open,
13408f32f7e5SAlexey Dobriyan 	.read		= seq_read,
13418f32f7e5SAlexey Dobriyan 	.llseek		= seq_lseek,
13428f32f7e5SAlexey Dobriyan 	.release	= seq_release,
13438f32f7e5SAlexey Dobriyan };
13448f32f7e5SAlexey Dobriyan 
134574e2e8e8SAlexey Dobriyan static const struct seq_operations pagetypeinfo_op = {
1346467c996cSMel Gorman 	.start	= frag_start,
1347467c996cSMel Gorman 	.next	= frag_next,
1348467c996cSMel Gorman 	.stop	= frag_stop,
1349467c996cSMel Gorman 	.show	= pagetypeinfo_show,
1350467c996cSMel Gorman };
1351467c996cSMel Gorman 
135274e2e8e8SAlexey Dobriyan static int pagetypeinfo_open(struct inode *inode, struct file *file)
135374e2e8e8SAlexey Dobriyan {
135474e2e8e8SAlexey Dobriyan 	return seq_open(file, &pagetypeinfo_op);
135574e2e8e8SAlexey Dobriyan }
135674e2e8e8SAlexey Dobriyan 
13579d85e15fSAnshuman Khandual static const struct file_operations pagetypeinfo_file_operations = {
135874e2e8e8SAlexey Dobriyan 	.open		= pagetypeinfo_open,
135974e2e8e8SAlexey Dobriyan 	.read		= seq_read,
136074e2e8e8SAlexey Dobriyan 	.llseek		= seq_lseek,
136174e2e8e8SAlexey Dobriyan 	.release	= seq_release,
136274e2e8e8SAlexey Dobriyan };
136374e2e8e8SAlexey Dobriyan 
1364e2ecc8a7SMel Gorman static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1365e2ecc8a7SMel Gorman {
1366e2ecc8a7SMel Gorman 	int zid;
1367e2ecc8a7SMel Gorman 
1368e2ecc8a7SMel Gorman 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1369e2ecc8a7SMel Gorman 		struct zone *compare = &pgdat->node_zones[zid];
1370e2ecc8a7SMel Gorman 
1371e2ecc8a7SMel Gorman 		if (populated_zone(compare))
1372e2ecc8a7SMel Gorman 			return zone == compare;
1373e2ecc8a7SMel Gorman 	}
1374e2ecc8a7SMel Gorman 
1375e2ecc8a7SMel Gorman 	return false;
1376e2ecc8a7SMel Gorman }
1377e2ecc8a7SMel Gorman 
1378467c996cSMel Gorman static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1379467c996cSMel Gorman 							struct zone *zone)
1380f6ac2354SChristoph Lameter {
1381f6ac2354SChristoph Lameter 	int i;
1382f6ac2354SChristoph Lameter 	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1383e2ecc8a7SMel Gorman 	if (is_zone_first_populated(pgdat, zone)) {
1384e2ecc8a7SMel Gorman 		seq_printf(m, "\n  per-node stats");
1385e2ecc8a7SMel Gorman 		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1386e2ecc8a7SMel Gorman 			seq_printf(m, "\n      %-12s %lu",
1387e2ecc8a7SMel Gorman 				vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
1388e2ecc8a7SMel Gorman 				node_page_state(pgdat, i));
1389e2ecc8a7SMel Gorman 		}
1390e2ecc8a7SMel Gorman 	}
1391f6ac2354SChristoph Lameter 	seq_printf(m,
1392f6ac2354SChristoph Lameter 		   "\n  pages free     %lu"
1393f6ac2354SChristoph Lameter 		   "\n        min      %lu"
1394f6ac2354SChristoph Lameter 		   "\n        low      %lu"
1395f6ac2354SChristoph Lameter 		   "\n        high     %lu"
1396f6ac2354SChristoph Lameter 		   "\n        spanned  %lu"
13979feedc9dSJiang Liu 		   "\n        present  %lu"
13989feedc9dSJiang Liu 		   "\n        managed  %lu",
139988f5acf8SMel Gorman 		   zone_page_state(zone, NR_FREE_PAGES),
140041858966SMel Gorman 		   min_wmark_pages(zone),
140141858966SMel Gorman 		   low_wmark_pages(zone),
140241858966SMel Gorman 		   high_wmark_pages(zone),
1403f6ac2354SChristoph Lameter 		   zone->spanned_pages,
14049feedc9dSJiang Liu 		   zone->present_pages,
14059feedc9dSJiang Liu 		   zone->managed_pages);
14062244b95aSChristoph Lameter 
1407f6ac2354SChristoph Lameter 	seq_printf(m,
14083484b2deSMel Gorman 		   "\n        protection: (%ld",
1409f6ac2354SChristoph Lameter 		   zone->lowmem_reserve[0]);
1410f6ac2354SChristoph Lameter 	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
14113484b2deSMel Gorman 		seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
14127dfb8bf3SDavid Rientjes 	seq_putc(m, ')');
14137dfb8bf3SDavid Rientjes 
14147dfb8bf3SDavid Rientjes 	/* If unpopulated, no other information is useful */
14157dfb8bf3SDavid Rientjes 	if (!populated_zone(zone)) {
14167dfb8bf3SDavid Rientjes 		seq_putc(m, '\n');
14177dfb8bf3SDavid Rientjes 		return;
14187dfb8bf3SDavid Rientjes 	}
14197dfb8bf3SDavid Rientjes 
14207dfb8bf3SDavid Rientjes 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
14217dfb8bf3SDavid Rientjes 		seq_printf(m, "\n      %-12s %lu", vmstat_text[i],
14227dfb8bf3SDavid Rientjes 				zone_page_state(zone, i));
14237dfb8bf3SDavid Rientjes 
14247dfb8bf3SDavid Rientjes 	seq_printf(m, "\n  pagesets");
1425f6ac2354SChristoph Lameter 	for_each_online_cpu(i) {
1426f6ac2354SChristoph Lameter 		struct per_cpu_pageset *pageset;
1427f6ac2354SChristoph Lameter 
142899dcc3e5SChristoph Lameter 		pageset = per_cpu_ptr(zone->pageset, i);
1429f6ac2354SChristoph Lameter 		seq_printf(m,
14303dfa5721SChristoph Lameter 			   "\n    cpu: %i"
1431f6ac2354SChristoph Lameter 			   "\n              count: %i"
1432f6ac2354SChristoph Lameter 			   "\n              high:  %i"
1433f6ac2354SChristoph Lameter 			   "\n              batch: %i",
14343dfa5721SChristoph Lameter 			   i,
14353dfa5721SChristoph Lameter 			   pageset->pcp.count,
14363dfa5721SChristoph Lameter 			   pageset->pcp.high,
14373dfa5721SChristoph Lameter 			   pageset->pcp.batch);
1438df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1439df9ecabaSChristoph Lameter 		seq_printf(m, "\n  vm stats threshold: %d",
1440df9ecabaSChristoph Lameter 				pageset->stat_threshold);
1441df9ecabaSChristoph Lameter #endif
1442f6ac2354SChristoph Lameter 	}
1443f6ac2354SChristoph Lameter 	seq_printf(m,
1444599d0c95SMel Gorman 		   "\n  node_unreclaimable:  %u"
1445556adecbSRik van Riel 		   "\n  start_pfn:           %lu"
1446599d0c95SMel Gorman 		   "\n  node_inactive_ratio: %u",
1447c73322d0SJohannes Weiner 		   pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1448556adecbSRik van Riel 		   zone->zone_start_pfn,
1449599d0c95SMel Gorman 		   zone->zone_pgdat->inactive_ratio);
1450f6ac2354SChristoph Lameter 	seq_putc(m, '\n');
1451f6ac2354SChristoph Lameter }
1452467c996cSMel Gorman 
1453467c996cSMel Gorman /*
1454b2bd8598SDavid Rientjes  * Output information about zones in @pgdat.  All zones are printed regardless
1455b2bd8598SDavid Rientjes  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1456b2bd8598SDavid Rientjes  * set of all zones and userspace would not be aware of such zones if they are
1457b2bd8598SDavid Rientjes  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1458467c996cSMel Gorman  */
1459467c996cSMel Gorman static int zoneinfo_show(struct seq_file *m, void *arg)
1460467c996cSMel Gorman {
1461467c996cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1462727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1463f6ac2354SChristoph Lameter 	return 0;
1464f6ac2354SChristoph Lameter }
1465f6ac2354SChristoph Lameter 
14665c9fe628SAlexey Dobriyan static const struct seq_operations zoneinfo_op = {
1467f6ac2354SChristoph Lameter 	.start	= frag_start, /* iterate over all zones. The same as in
1468f6ac2354SChristoph Lameter 			       * fragmentation. */
1469f6ac2354SChristoph Lameter 	.next	= frag_next,
1470f6ac2354SChristoph Lameter 	.stop	= frag_stop,
1471f6ac2354SChristoph Lameter 	.show	= zoneinfo_show,
1472f6ac2354SChristoph Lameter };
1473f6ac2354SChristoph Lameter 
14745c9fe628SAlexey Dobriyan static int zoneinfo_open(struct inode *inode, struct file *file)
14755c9fe628SAlexey Dobriyan {
14765c9fe628SAlexey Dobriyan 	return seq_open(file, &zoneinfo_op);
14775c9fe628SAlexey Dobriyan }
14785c9fe628SAlexey Dobriyan 
14799d85e15fSAnshuman Khandual static const struct file_operations zoneinfo_file_operations = {
14805c9fe628SAlexey Dobriyan 	.open		= zoneinfo_open,
14815c9fe628SAlexey Dobriyan 	.read		= seq_read,
14825c9fe628SAlexey Dobriyan 	.llseek		= seq_lseek,
14835c9fe628SAlexey Dobriyan 	.release	= seq_release,
14845c9fe628SAlexey Dobriyan };
14855c9fe628SAlexey Dobriyan 
148679da826aSMichael Rubin enum writeback_stat_item {
148779da826aSMichael Rubin 	NR_DIRTY_THRESHOLD,
148879da826aSMichael Rubin 	NR_DIRTY_BG_THRESHOLD,
148979da826aSMichael Rubin 	NR_VM_WRITEBACK_STAT_ITEMS,
149079da826aSMichael Rubin };
149179da826aSMichael Rubin 
1492f6ac2354SChristoph Lameter static void *vmstat_start(struct seq_file *m, loff_t *pos)
1493f6ac2354SChristoph Lameter {
14942244b95aSChristoph Lameter 	unsigned long *v;
149579da826aSMichael Rubin 	int i, stat_items_size;
1496f6ac2354SChristoph Lameter 
1497f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1498f6ac2354SChristoph Lameter 		return NULL;
149979da826aSMichael Rubin 	stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
150075ef7184SMel Gorman 			  NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
150179da826aSMichael Rubin 			  NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1502f6ac2354SChristoph Lameter 
1503f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
150479da826aSMichael Rubin 	stat_items_size += sizeof(struct vm_event_state);
1505f8891e5eSChristoph Lameter #endif
150679da826aSMichael Rubin 
150779da826aSMichael Rubin 	v = kmalloc(stat_items_size, GFP_KERNEL);
15082244b95aSChristoph Lameter 	m->private = v;
15092244b95aSChristoph Lameter 	if (!v)
1510f6ac2354SChristoph Lameter 		return ERR_PTR(-ENOMEM);
15112244b95aSChristoph Lameter 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1512c41f012aSMichal Hocko 		v[i] = global_zone_page_state(i);
151379da826aSMichael Rubin 	v += NR_VM_ZONE_STAT_ITEMS;
151479da826aSMichael Rubin 
151575ef7184SMel Gorman 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
151675ef7184SMel Gorman 		v[i] = global_node_page_state(i);
151775ef7184SMel Gorman 	v += NR_VM_NODE_STAT_ITEMS;
151875ef7184SMel Gorman 
151979da826aSMichael Rubin 	global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
152079da826aSMichael Rubin 			    v + NR_DIRTY_THRESHOLD);
152179da826aSMichael Rubin 	v += NR_VM_WRITEBACK_STAT_ITEMS;
152279da826aSMichael Rubin 
1523f8891e5eSChristoph Lameter #ifdef CONFIG_VM_EVENT_COUNTERS
152479da826aSMichael Rubin 	all_vm_events(v);
152579da826aSMichael Rubin 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
152679da826aSMichael Rubin 	v[PGPGOUT] /= 2;
1527f8891e5eSChristoph Lameter #endif
1528ff8b16d7SWu Fengguang 	return (unsigned long *)m->private + *pos;
1529f6ac2354SChristoph Lameter }
1530f6ac2354SChristoph Lameter 
1531f6ac2354SChristoph Lameter static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1532f6ac2354SChristoph Lameter {
1533f6ac2354SChristoph Lameter 	(*pos)++;
1534f6ac2354SChristoph Lameter 	if (*pos >= ARRAY_SIZE(vmstat_text))
1535f6ac2354SChristoph Lameter 		return NULL;
1536f6ac2354SChristoph Lameter 	return (unsigned long *)m->private + *pos;
1537f6ac2354SChristoph Lameter }
1538f6ac2354SChristoph Lameter 
1539f6ac2354SChristoph Lameter static int vmstat_show(struct seq_file *m, void *arg)
1540f6ac2354SChristoph Lameter {
1541f6ac2354SChristoph Lameter 	unsigned long *l = arg;
1542f6ac2354SChristoph Lameter 	unsigned long off = l - (unsigned long *)m->private;
154368ba0326SAlexey Dobriyan 
154468ba0326SAlexey Dobriyan 	seq_puts(m, vmstat_text[off]);
154575ba1d07SJoe Perches 	seq_put_decimal_ull(m, " ", *l);
154668ba0326SAlexey Dobriyan 	seq_putc(m, '\n');
1547f6ac2354SChristoph Lameter 	return 0;
1548f6ac2354SChristoph Lameter }
1549f6ac2354SChristoph Lameter 
1550f6ac2354SChristoph Lameter static void vmstat_stop(struct seq_file *m, void *arg)
1551f6ac2354SChristoph Lameter {
1552f6ac2354SChristoph Lameter 	kfree(m->private);
1553f6ac2354SChristoph Lameter 	m->private = NULL;
1554f6ac2354SChristoph Lameter }
1555f6ac2354SChristoph Lameter 
1556b6aa44abSAlexey Dobriyan static const struct seq_operations vmstat_op = {
1557f6ac2354SChristoph Lameter 	.start	= vmstat_start,
1558f6ac2354SChristoph Lameter 	.next	= vmstat_next,
1559f6ac2354SChristoph Lameter 	.stop	= vmstat_stop,
1560f6ac2354SChristoph Lameter 	.show	= vmstat_show,
1561f6ac2354SChristoph Lameter };
1562f6ac2354SChristoph Lameter 
1563b6aa44abSAlexey Dobriyan static int vmstat_open(struct inode *inode, struct file *file)
1564b6aa44abSAlexey Dobriyan {
1565b6aa44abSAlexey Dobriyan 	return seq_open(file, &vmstat_op);
1566b6aa44abSAlexey Dobriyan }
1567b6aa44abSAlexey Dobriyan 
15689d85e15fSAnshuman Khandual static const struct file_operations vmstat_file_operations = {
1569b6aa44abSAlexey Dobriyan 	.open		= vmstat_open,
1570b6aa44abSAlexey Dobriyan 	.read		= seq_read,
1571b6aa44abSAlexey Dobriyan 	.llseek		= seq_lseek,
1572b6aa44abSAlexey Dobriyan 	.release	= seq_release,
1573b6aa44abSAlexey Dobriyan };
1574f6ac2354SChristoph Lameter #endif /* CONFIG_PROC_FS */
1575f6ac2354SChristoph Lameter 
1576df9ecabaSChristoph Lameter #ifdef CONFIG_SMP
1577d1187ed2SChristoph Lameter static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
157877461ab3SChristoph Lameter int sysctl_stat_interval __read_mostly = HZ;
1579d1187ed2SChristoph Lameter 
158052b6f46bSHugh Dickins #ifdef CONFIG_PROC_FS
158152b6f46bSHugh Dickins static void refresh_vm_stats(struct work_struct *work)
158252b6f46bSHugh Dickins {
158352b6f46bSHugh Dickins 	refresh_cpu_vm_stats(true);
158452b6f46bSHugh Dickins }
158552b6f46bSHugh Dickins 
158652b6f46bSHugh Dickins int vmstat_refresh(struct ctl_table *table, int write,
158752b6f46bSHugh Dickins 		   void __user *buffer, size_t *lenp, loff_t *ppos)
158852b6f46bSHugh Dickins {
158952b6f46bSHugh Dickins 	long val;
159052b6f46bSHugh Dickins 	int err;
159152b6f46bSHugh Dickins 	int i;
159252b6f46bSHugh Dickins 
159352b6f46bSHugh Dickins 	/*
159452b6f46bSHugh Dickins 	 * The regular update, every sysctl_stat_interval, may come later
159552b6f46bSHugh Dickins 	 * than expected: leaving a significant amount in per_cpu buckets.
159652b6f46bSHugh Dickins 	 * This is particularly misleading when checking a quantity of HUGE
159752b6f46bSHugh Dickins 	 * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
159852b6f46bSHugh Dickins 	 * which can equally be echo'ed to or cat'ted from (by root),
159952b6f46bSHugh Dickins 	 * can be used to update the stats just before reading them.
160052b6f46bSHugh Dickins 	 *
1601c41f012aSMichal Hocko 	 * Oh, and since global_zone_page_state() etc. are so careful to hide
160252b6f46bSHugh Dickins 	 * transiently negative values, report an error here if any of
160352b6f46bSHugh Dickins 	 * the stats is negative, so we know to go looking for imbalance.
160452b6f46bSHugh Dickins 	 */
160552b6f46bSHugh Dickins 	err = schedule_on_each_cpu(refresh_vm_stats);
160652b6f46bSHugh Dickins 	if (err)
160752b6f46bSHugh Dickins 		return err;
160852b6f46bSHugh Dickins 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
160975ef7184SMel Gorman 		val = atomic_long_read(&vm_zone_stat[i]);
161052b6f46bSHugh Dickins 		if (val < 0) {
161152b6f46bSHugh Dickins 			pr_warn("%s: %s %ld\n",
161252b6f46bSHugh Dickins 				__func__, vmstat_text[i], val);
161352b6f46bSHugh Dickins 			err = -EINVAL;
161452b6f46bSHugh Dickins 		}
161552b6f46bSHugh Dickins 	}
161652b6f46bSHugh Dickins 	if (err)
161752b6f46bSHugh Dickins 		return err;
161852b6f46bSHugh Dickins 	if (write)
161952b6f46bSHugh Dickins 		*ppos += *lenp;
162052b6f46bSHugh Dickins 	else
162152b6f46bSHugh Dickins 		*lenp = 0;
162252b6f46bSHugh Dickins 	return 0;
162352b6f46bSHugh Dickins }
162452b6f46bSHugh Dickins #endif /* CONFIG_PROC_FS */
162552b6f46bSHugh Dickins 
1626d1187ed2SChristoph Lameter static void vmstat_update(struct work_struct *w)
1627d1187ed2SChristoph Lameter {
16280eb77e98SChristoph Lameter 	if (refresh_cpu_vm_stats(true)) {
16297cc36bbdSChristoph Lameter 		/*
16307cc36bbdSChristoph Lameter 		 * Counters were updated so we expect more updates
16317cc36bbdSChristoph Lameter 		 * to occur in the future. Keep on running the
16327cc36bbdSChristoph Lameter 		 * update worker thread.
16337cc36bbdSChristoph Lameter 		 */
1634ce612879SMichal Hocko 		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1635176bed1dSLinus Torvalds 				this_cpu_ptr(&vmstat_work),
163698f4ebb2SAnton Blanchard 				round_jiffies_relative(sysctl_stat_interval));
1637f01f17d3SMichal Hocko 	}
1638d1187ed2SChristoph Lameter }
1639d1187ed2SChristoph Lameter 
16407cc36bbdSChristoph Lameter /*
16410eb77e98SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
16420eb77e98SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
16430eb77e98SChristoph Lameter  * invoked when tick processing is not active.
16440eb77e98SChristoph Lameter  */
16450eb77e98SChristoph Lameter /*
16467cc36bbdSChristoph Lameter  * Check if the diffs for a certain cpu indicate that
16477cc36bbdSChristoph Lameter  * an update is needed.
16487cc36bbdSChristoph Lameter  */
16497cc36bbdSChristoph Lameter static bool need_update(int cpu)
1650d1187ed2SChristoph Lameter {
16517cc36bbdSChristoph Lameter 	struct zone *zone;
1652d1187ed2SChristoph Lameter 
16537cc36bbdSChristoph Lameter 	for_each_populated_zone(zone) {
16547cc36bbdSChristoph Lameter 		struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
16557cc36bbdSChristoph Lameter 
16567cc36bbdSChristoph Lameter 		BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
16577cc36bbdSChristoph Lameter 		/*
16587cc36bbdSChristoph Lameter 		 * The fast way of checking if there are any vmstat diffs.
16597cc36bbdSChristoph Lameter 		 * This works because the diffs are byte sized items.
16607cc36bbdSChristoph Lameter 		 */
16617cc36bbdSChristoph Lameter 		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
16627cc36bbdSChristoph Lameter 			return true;
16637cc36bbdSChristoph Lameter 
16647cc36bbdSChristoph Lameter 	}
16657cc36bbdSChristoph Lameter 	return false;
16667cc36bbdSChristoph Lameter }
16677cc36bbdSChristoph Lameter 
16687b8da4c7SChristoph Lameter /*
16697b8da4c7SChristoph Lameter  * Switch off vmstat processing and then fold all the remaining differentials
16707b8da4c7SChristoph Lameter  * until the diffs stay at zero. The function is used by NOHZ and can only be
16717b8da4c7SChristoph Lameter  * invoked when tick processing is not active.
16727b8da4c7SChristoph Lameter  */
1673f01f17d3SMichal Hocko void quiet_vmstat(void)
1674f01f17d3SMichal Hocko {
1675f01f17d3SMichal Hocko 	if (system_state != SYSTEM_RUNNING)
1676f01f17d3SMichal Hocko 		return;
1677f01f17d3SMichal Hocko 
16787b8da4c7SChristoph Lameter 	if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1679f01f17d3SMichal Hocko 		return;
1680f01f17d3SMichal Hocko 
1681f01f17d3SMichal Hocko 	if (!need_update(smp_processor_id()))
1682f01f17d3SMichal Hocko 		return;
1683f01f17d3SMichal Hocko 
1684f01f17d3SMichal Hocko 	/*
1685f01f17d3SMichal Hocko 	 * Just refresh counters and do not care about the pending delayed
1686f01f17d3SMichal Hocko 	 * vmstat_update. It doesn't fire that often to matter and canceling
1687f01f17d3SMichal Hocko 	 * it would be too expensive from this path.
1688f01f17d3SMichal Hocko 	 * vmstat_shepherd will take care about that for us.
1689f01f17d3SMichal Hocko 	 */
1690f01f17d3SMichal Hocko 	refresh_cpu_vm_stats(false);
1691f01f17d3SMichal Hocko }
1692f01f17d3SMichal Hocko 
16937cc36bbdSChristoph Lameter /*
16947cc36bbdSChristoph Lameter  * Shepherd worker thread that checks the
16957cc36bbdSChristoph Lameter  * differentials of processors that have their worker
16967cc36bbdSChristoph Lameter  * threads for vm statistics updates disabled because of
16977cc36bbdSChristoph Lameter  * inactivity.
16987cc36bbdSChristoph Lameter  */
16997cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w);
17007cc36bbdSChristoph Lameter 
17010eb77e98SChristoph Lameter static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
17027cc36bbdSChristoph Lameter 
17037cc36bbdSChristoph Lameter static void vmstat_shepherd(struct work_struct *w)
17047cc36bbdSChristoph Lameter {
17057cc36bbdSChristoph Lameter 	int cpu;
17067cc36bbdSChristoph Lameter 
17077cc36bbdSChristoph Lameter 	get_online_cpus();
17087cc36bbdSChristoph Lameter 	/* Check processors whose vmstat worker threads have been disabled */
17097b8da4c7SChristoph Lameter 	for_each_online_cpu(cpu) {
1710f01f17d3SMichal Hocko 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
17117cc36bbdSChristoph Lameter 
17127b8da4c7SChristoph Lameter 		if (!delayed_work_pending(dw) && need_update(cpu))
1713ce612879SMichal Hocko 			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1714f01f17d3SMichal Hocko 	}
17157cc36bbdSChristoph Lameter 	put_online_cpus();
17167cc36bbdSChristoph Lameter 
17177cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
17187cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
17197cc36bbdSChristoph Lameter }
17207cc36bbdSChristoph Lameter 
17217cc36bbdSChristoph Lameter static void __init start_shepherd_timer(void)
17227cc36bbdSChristoph Lameter {
17237cc36bbdSChristoph Lameter 	int cpu;
17247cc36bbdSChristoph Lameter 
17257cc36bbdSChristoph Lameter 	for_each_possible_cpu(cpu)
1726ccde8bd4SMichal Hocko 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
17277cc36bbdSChristoph Lameter 			vmstat_update);
17287cc36bbdSChristoph Lameter 
17297cc36bbdSChristoph Lameter 	schedule_delayed_work(&shepherd,
17307cc36bbdSChristoph Lameter 		round_jiffies_relative(sysctl_stat_interval));
1731d1187ed2SChristoph Lameter }
1732d1187ed2SChristoph Lameter 
173303e86dbaSTim Chen static void __init init_cpu_node_state(void)
173403e86dbaSTim Chen {
17354c501327SSebastian Andrzej Siewior 	int node;
173603e86dbaSTim Chen 
17374c501327SSebastian Andrzej Siewior 	for_each_online_node(node) {
17384c501327SSebastian Andrzej Siewior 		if (cpumask_weight(cpumask_of_node(node)) > 0)
17394c501327SSebastian Andrzej Siewior 			node_set_state(node, N_CPU);
17404c501327SSebastian Andrzej Siewior 	}
174103e86dbaSTim Chen }
174203e86dbaSTim Chen 
17435438da97SSebastian Andrzej Siewior static int vmstat_cpu_online(unsigned int cpu)
1744807a1bd2SToshi Kani {
17455ee28a44SKAMEZAWA Hiroyuki 	refresh_zone_stat_thresholds();
1746ad596925SChristoph Lameter 	node_set_state(cpu_to_node(cpu), N_CPU);
17475438da97SSebastian Andrzej Siewior 	return 0;
1748df9ecabaSChristoph Lameter }
1749df9ecabaSChristoph Lameter 
17505438da97SSebastian Andrzej Siewior static int vmstat_cpu_down_prep(unsigned int cpu)
17515438da97SSebastian Andrzej Siewior {
17525438da97SSebastian Andrzej Siewior 	cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
17535438da97SSebastian Andrzej Siewior 	return 0;
17545438da97SSebastian Andrzej Siewior }
17555438da97SSebastian Andrzej Siewior 
17565438da97SSebastian Andrzej Siewior static int vmstat_cpu_dead(unsigned int cpu)
17575438da97SSebastian Andrzej Siewior {
17585438da97SSebastian Andrzej Siewior 	const struct cpumask *node_cpus;
17595438da97SSebastian Andrzej Siewior 	int node;
17605438da97SSebastian Andrzej Siewior 
17615438da97SSebastian Andrzej Siewior 	node = cpu_to_node(cpu);
17625438da97SSebastian Andrzej Siewior 
17635438da97SSebastian Andrzej Siewior 	refresh_zone_stat_thresholds();
17645438da97SSebastian Andrzej Siewior 	node_cpus = cpumask_of_node(node);
17655438da97SSebastian Andrzej Siewior 	if (cpumask_weight(node_cpus) > 0)
17665438da97SSebastian Andrzej Siewior 		return 0;
17675438da97SSebastian Andrzej Siewior 
17685438da97SSebastian Andrzej Siewior 	node_clear_state(node, N_CPU);
17695438da97SSebastian Andrzej Siewior 	return 0;
17705438da97SSebastian Andrzej Siewior }
17715438da97SSebastian Andrzej Siewior 
17728f32f7e5SAlexey Dobriyan #endif
1773df9ecabaSChristoph Lameter 
1774ce612879SMichal Hocko struct workqueue_struct *mm_percpu_wq;
1775ce612879SMichal Hocko 
1776597b7305SMichal Hocko void __init init_mm_internals(void)
1777df9ecabaSChristoph Lameter {
1778ce612879SMichal Hocko 	int ret __maybe_unused;
17795438da97SSebastian Andrzej Siewior 
178080d136e1SMichal Hocko 	mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
1781ce612879SMichal Hocko 
1782ce612879SMichal Hocko #ifdef CONFIG_SMP
17835438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
17845438da97SSebastian Andrzej Siewior 					NULL, vmstat_cpu_dead);
17855438da97SSebastian Andrzej Siewior 	if (ret < 0)
17865438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'dead' hotplug state\n");
17875438da97SSebastian Andrzej Siewior 
17885438da97SSebastian Andrzej Siewior 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
17895438da97SSebastian Andrzej Siewior 					vmstat_cpu_online,
17905438da97SSebastian Andrzej Siewior 					vmstat_cpu_down_prep);
17915438da97SSebastian Andrzej Siewior 	if (ret < 0)
17925438da97SSebastian Andrzej Siewior 		pr_err("vmstat: failed to register 'online' hotplug state\n");
17935438da97SSebastian Andrzej Siewior 
17945438da97SSebastian Andrzej Siewior 	get_online_cpus();
179503e86dbaSTim Chen 	init_cpu_node_state();
17965438da97SSebastian Andrzej Siewior 	put_online_cpus();
1797d1187ed2SChristoph Lameter 
17987cc36bbdSChristoph Lameter 	start_shepherd_timer();
17998f32f7e5SAlexey Dobriyan #endif
18008f32f7e5SAlexey Dobriyan #ifdef CONFIG_PROC_FS
18019d85e15fSAnshuman Khandual 	proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
18029d85e15fSAnshuman Khandual 	proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations);
18039d85e15fSAnshuman Khandual 	proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
18049d85e15fSAnshuman Khandual 	proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
18058f32f7e5SAlexey Dobriyan #endif
1806df9ecabaSChristoph Lameter }
1807d7a5752cSMel Gorman 
1808d7a5752cSMel Gorman #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1809d7a5752cSMel Gorman 
1810d7a5752cSMel Gorman /*
1811d7a5752cSMel Gorman  * Return an index indicating how much of the available free memory is
1812d7a5752cSMel Gorman  * unusable for an allocation of the requested size.
1813d7a5752cSMel Gorman  */
1814d7a5752cSMel Gorman static int unusable_free_index(unsigned int order,
1815d7a5752cSMel Gorman 				struct contig_page_info *info)
1816d7a5752cSMel Gorman {
1817d7a5752cSMel Gorman 	/* No free memory is interpreted as all free memory is unusable */
1818d7a5752cSMel Gorman 	if (info->free_pages == 0)
1819d7a5752cSMel Gorman 		return 1000;
1820d7a5752cSMel Gorman 
1821d7a5752cSMel Gorman 	/*
1822d7a5752cSMel Gorman 	 * Index should be a value between 0 and 1. Return a value to 3
1823d7a5752cSMel Gorman 	 * decimal places.
1824d7a5752cSMel Gorman 	 *
1825d7a5752cSMel Gorman 	 * 0 => no fragmentation
1826d7a5752cSMel Gorman 	 * 1 => high fragmentation
1827d7a5752cSMel Gorman 	 */
1828d7a5752cSMel Gorman 	return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1829d7a5752cSMel Gorman 
1830d7a5752cSMel Gorman }
1831d7a5752cSMel Gorman 
1832d7a5752cSMel Gorman static void unusable_show_print(struct seq_file *m,
1833d7a5752cSMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1834d7a5752cSMel Gorman {
1835d7a5752cSMel Gorman 	unsigned int order;
1836d7a5752cSMel Gorman 	int index;
1837d7a5752cSMel Gorman 	struct contig_page_info info;
1838d7a5752cSMel Gorman 
1839d7a5752cSMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1840d7a5752cSMel Gorman 				pgdat->node_id,
1841d7a5752cSMel Gorman 				zone->name);
1842d7a5752cSMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1843d7a5752cSMel Gorman 		fill_contig_page_info(zone, order, &info);
1844d7a5752cSMel Gorman 		index = unusable_free_index(order, &info);
1845d7a5752cSMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1846d7a5752cSMel Gorman 	}
1847d7a5752cSMel Gorman 
1848d7a5752cSMel Gorman 	seq_putc(m, '\n');
1849d7a5752cSMel Gorman }
1850d7a5752cSMel Gorman 
1851d7a5752cSMel Gorman /*
1852d7a5752cSMel Gorman  * Display unusable free space index
1853d7a5752cSMel Gorman  *
1854d7a5752cSMel Gorman  * The unusable free space index measures how much of the available free
1855d7a5752cSMel Gorman  * memory cannot be used to satisfy an allocation of a given size and is a
1856d7a5752cSMel Gorman  * value between 0 and 1. The higher the value, the more of free memory is
1857d7a5752cSMel Gorman  * unusable and by implication, the worse the external fragmentation is. This
1858d7a5752cSMel Gorman  * can be expressed as a percentage by multiplying by 100.
1859d7a5752cSMel Gorman  */
1860d7a5752cSMel Gorman static int unusable_show(struct seq_file *m, void *arg)
1861d7a5752cSMel Gorman {
1862d7a5752cSMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1863d7a5752cSMel Gorman 
1864d7a5752cSMel Gorman 	/* check memoryless node */
1865a47b53c5SLai Jiangshan 	if (!node_state(pgdat->node_id, N_MEMORY))
1866d7a5752cSMel Gorman 		return 0;
1867d7a5752cSMel Gorman 
1868727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
1869d7a5752cSMel Gorman 
1870d7a5752cSMel Gorman 	return 0;
1871d7a5752cSMel Gorman }
1872d7a5752cSMel Gorman 
1873d7a5752cSMel Gorman static const struct seq_operations unusable_op = {
1874d7a5752cSMel Gorman 	.start	= frag_start,
1875d7a5752cSMel Gorman 	.next	= frag_next,
1876d7a5752cSMel Gorman 	.stop	= frag_stop,
1877d7a5752cSMel Gorman 	.show	= unusable_show,
1878d7a5752cSMel Gorman };
1879d7a5752cSMel Gorman 
1880d7a5752cSMel Gorman static int unusable_open(struct inode *inode, struct file *file)
1881d7a5752cSMel Gorman {
1882d7a5752cSMel Gorman 	return seq_open(file, &unusable_op);
1883d7a5752cSMel Gorman }
1884d7a5752cSMel Gorman 
1885d7a5752cSMel Gorman static const struct file_operations unusable_file_ops = {
1886d7a5752cSMel Gorman 	.open		= unusable_open,
1887d7a5752cSMel Gorman 	.read		= seq_read,
1888d7a5752cSMel Gorman 	.llseek		= seq_lseek,
1889d7a5752cSMel Gorman 	.release	= seq_release,
1890d7a5752cSMel Gorman };
1891d7a5752cSMel Gorman 
1892f1a5ab12SMel Gorman static void extfrag_show_print(struct seq_file *m,
1893f1a5ab12SMel Gorman 					pg_data_t *pgdat, struct zone *zone)
1894f1a5ab12SMel Gorman {
1895f1a5ab12SMel Gorman 	unsigned int order;
1896f1a5ab12SMel Gorman 	int index;
1897f1a5ab12SMel Gorman 
1898f1a5ab12SMel Gorman 	/* Alloc on stack as interrupts are disabled for zone walk */
1899f1a5ab12SMel Gorman 	struct contig_page_info info;
1900f1a5ab12SMel Gorman 
1901f1a5ab12SMel Gorman 	seq_printf(m, "Node %d, zone %8s ",
1902f1a5ab12SMel Gorman 				pgdat->node_id,
1903f1a5ab12SMel Gorman 				zone->name);
1904f1a5ab12SMel Gorman 	for (order = 0; order < MAX_ORDER; ++order) {
1905f1a5ab12SMel Gorman 		fill_contig_page_info(zone, order, &info);
190656de7263SMel Gorman 		index = __fragmentation_index(order, &info);
1907f1a5ab12SMel Gorman 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1908f1a5ab12SMel Gorman 	}
1909f1a5ab12SMel Gorman 
1910f1a5ab12SMel Gorman 	seq_putc(m, '\n');
1911f1a5ab12SMel Gorman }
1912f1a5ab12SMel Gorman 
1913f1a5ab12SMel Gorman /*
1914f1a5ab12SMel Gorman  * Display fragmentation index for orders that allocations would fail for
1915f1a5ab12SMel Gorman  */
1916f1a5ab12SMel Gorman static int extfrag_show(struct seq_file *m, void *arg)
1917f1a5ab12SMel Gorman {
1918f1a5ab12SMel Gorman 	pg_data_t *pgdat = (pg_data_t *)arg;
1919f1a5ab12SMel Gorman 
1920727c080fSVinayak Menon 	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
1921f1a5ab12SMel Gorman 
1922f1a5ab12SMel Gorman 	return 0;
1923f1a5ab12SMel Gorman }
1924f1a5ab12SMel Gorman 
1925f1a5ab12SMel Gorman static const struct seq_operations extfrag_op = {
1926f1a5ab12SMel Gorman 	.start	= frag_start,
1927f1a5ab12SMel Gorman 	.next	= frag_next,
1928f1a5ab12SMel Gorman 	.stop	= frag_stop,
1929f1a5ab12SMel Gorman 	.show	= extfrag_show,
1930f1a5ab12SMel Gorman };
1931f1a5ab12SMel Gorman 
1932f1a5ab12SMel Gorman static int extfrag_open(struct inode *inode, struct file *file)
1933f1a5ab12SMel Gorman {
1934f1a5ab12SMel Gorman 	return seq_open(file, &extfrag_op);
1935f1a5ab12SMel Gorman }
1936f1a5ab12SMel Gorman 
1937f1a5ab12SMel Gorman static const struct file_operations extfrag_file_ops = {
1938f1a5ab12SMel Gorman 	.open		= extfrag_open,
1939f1a5ab12SMel Gorman 	.read		= seq_read,
1940f1a5ab12SMel Gorman 	.llseek		= seq_lseek,
1941f1a5ab12SMel Gorman 	.release	= seq_release,
1942f1a5ab12SMel Gorman };
1943f1a5ab12SMel Gorman 
1944d7a5752cSMel Gorman static int __init extfrag_debug_init(void)
1945d7a5752cSMel Gorman {
1946bde8bd8aSSasikantha babu 	struct dentry *extfrag_debug_root;
1947bde8bd8aSSasikantha babu 
1948d7a5752cSMel Gorman 	extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1949d7a5752cSMel Gorman 	if (!extfrag_debug_root)
1950d7a5752cSMel Gorman 		return -ENOMEM;
1951d7a5752cSMel Gorman 
1952d7a5752cSMel Gorman 	if (!debugfs_create_file("unusable_index", 0444,
1953d7a5752cSMel Gorman 			extfrag_debug_root, NULL, &unusable_file_ops))
1954bde8bd8aSSasikantha babu 		goto fail;
1955d7a5752cSMel Gorman 
1956f1a5ab12SMel Gorman 	if (!debugfs_create_file("extfrag_index", 0444,
1957f1a5ab12SMel Gorman 			extfrag_debug_root, NULL, &extfrag_file_ops))
1958bde8bd8aSSasikantha babu 		goto fail;
1959f1a5ab12SMel Gorman 
1960d7a5752cSMel Gorman 	return 0;
1961bde8bd8aSSasikantha babu fail:
1962bde8bd8aSSasikantha babu 	debugfs_remove_recursive(extfrag_debug_root);
1963bde8bd8aSSasikantha babu 	return -ENOMEM;
1964d7a5752cSMel Gorman }
1965d7a5752cSMel Gorman 
1966d7a5752cSMel Gorman module_init(extfrag_debug_init);
1967d7a5752cSMel Gorman #endif
1968