xref: /linux/mm/vmstat.c (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1 /*
2  *  linux/mm/vmstat.c
3  *
4  *  Manages VM statistics
5  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
6  *
7  *  zoned VM statistics
8  *  Copyright (C) 2006 Silicon Graphics, Inc.,
9  *		Christoph Lameter <christoph@lameter.com>
10  */
11 
12 #include <linux/config.h>
13 #include <linux/mm.h>
14 #include <linux/module.h>
15 
16 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
17 			unsigned long *free, struct pglist_data *pgdat)
18 {
19 	struct zone *zones = pgdat->node_zones;
20 	int i;
21 
22 	*active = 0;
23 	*inactive = 0;
24 	*free = 0;
25 	for (i = 0; i < MAX_NR_ZONES; i++) {
26 		*active += zones[i].nr_active;
27 		*inactive += zones[i].nr_inactive;
28 		*free += zones[i].free_pages;
29 	}
30 }
31 
32 void get_zone_counts(unsigned long *active,
33 		unsigned long *inactive, unsigned long *free)
34 {
35 	struct pglist_data *pgdat;
36 
37 	*active = 0;
38 	*inactive = 0;
39 	*free = 0;
40 	for_each_online_pgdat(pgdat) {
41 		unsigned long l, m, n;
42 		__get_zone_counts(&l, &m, &n, pgdat);
43 		*active += l;
44 		*inactive += m;
45 		*free += n;
46 	}
47 }
48 
49 #ifdef CONFIG_VM_EVENT_COUNTERS
50 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
51 EXPORT_PER_CPU_SYMBOL(vm_event_states);
52 
53 static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
54 {
55 	int cpu = 0;
56 	int i;
57 
58 	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
59 
60 	cpu = first_cpu(*cpumask);
61 	while (cpu < NR_CPUS) {
62 		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
63 
64 		cpu = next_cpu(cpu, *cpumask);
65 
66 		if (cpu < NR_CPUS)
67 			prefetch(&per_cpu(vm_event_states, cpu));
68 
69 
70 		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
71 			ret[i] += this->event[i];
72 	}
73 }
74 
75 /*
76  * Accumulate the vm event counters across all CPUs.
77  * The result is unavoidably approximate - it can change
78  * during and after execution of this function.
79 */
80 void all_vm_events(unsigned long *ret)
81 {
82 	sum_vm_events(ret, &cpu_online_map);
83 }
84 
85 #ifdef CONFIG_HOTPLUG
86 /*
87  * Fold the foreign cpu events into our own.
88  *
89  * This is adding to the events on one processor
90  * but keeps the global counts constant.
91  */
92 void vm_events_fold_cpu(int cpu)
93 {
94 	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
95 	int i;
96 
97 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
98 		count_vm_events(i, fold_state->event[i]);
99 		fold_state->event[i] = 0;
100 	}
101 }
102 #endif /* CONFIG_HOTPLUG */
103 
104 #endif /* CONFIG_VM_EVENT_COUNTERS */
105 
106 /*
107  * Manage combined zone based / global counters
108  *
109  * vm_stat contains the global counters
110  */
111 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
112 EXPORT_SYMBOL(vm_stat);
113 
114 #ifdef CONFIG_SMP
115 
116 #define STAT_THRESHOLD 32
117 
118 /*
119  * Determine pointer to currently valid differential byte given a zone and
120  * the item number.
121  *
122  * Preemption must be off
123  */
124 static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
125 {
126 	return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
127 }
128 
129 /*
130  * For use when we know that interrupts are disabled.
131  */
132 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
133 				int delta)
134 {
135 	s8 *p;
136 	long x;
137 
138 	p = diff_pointer(zone, item);
139 	x = delta + *p;
140 
141 	if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
142 		zone_page_state_add(x, zone, item);
143 		x = 0;
144 	}
145 
146 	*p = x;
147 }
148 EXPORT_SYMBOL(__mod_zone_page_state);
149 
150 /*
151  * For an unknown interrupt state
152  */
153 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
154 					int delta)
155 {
156 	unsigned long flags;
157 
158 	local_irq_save(flags);
159 	__mod_zone_page_state(zone, item, delta);
160 	local_irq_restore(flags);
161 }
162 EXPORT_SYMBOL(mod_zone_page_state);
163 
164 /*
165  * Optimized increment and decrement functions.
166  *
167  * These are only for a single page and therefore can take a struct page *
168  * argument instead of struct zone *. This allows the inclusion of the code
169  * generated for page_zone(page) into the optimized functions.
170  *
171  * No overflow check is necessary and therefore the differential can be
172  * incremented or decremented in place which may allow the compilers to
173  * generate better code.
174  *
175  * The increment or decrement is known and therefore one boundary check can
176  * be omitted.
177  *
178  * Some processors have inc/dec instructions that are atomic vs an interrupt.
179  * However, the code must first determine the differential location in a zone
180  * based on the processor number and then inc/dec the counter. There is no
181  * guarantee without disabling preemption that the processor will not change
182  * in between and therefore the atomicity vs. interrupt cannot be exploited
183  * in a useful way here.
184  */
185 static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
186 {
187 	s8 *p = diff_pointer(zone, item);
188 
189 	(*p)++;
190 
191 	if (unlikely(*p > STAT_THRESHOLD)) {
192 		zone_page_state_add(*p, zone, item);
193 		*p = 0;
194 	}
195 }
196 
197 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
198 {
199 	__inc_zone_state(page_zone(page), item);
200 }
201 EXPORT_SYMBOL(__inc_zone_page_state);
202 
203 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
204 {
205 	struct zone *zone = page_zone(page);
206 	s8 *p = diff_pointer(zone, item);
207 
208 	(*p)--;
209 
210 	if (unlikely(*p < -STAT_THRESHOLD)) {
211 		zone_page_state_add(*p, zone, item);
212 		*p = 0;
213 	}
214 }
215 EXPORT_SYMBOL(__dec_zone_page_state);
216 
217 void inc_zone_state(struct zone *zone, enum zone_stat_item item)
218 {
219 	unsigned long flags;
220 
221 	local_irq_save(flags);
222 	__inc_zone_state(zone, item);
223 	local_irq_restore(flags);
224 }
225 
226 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
227 {
228 	unsigned long flags;
229 	struct zone *zone;
230 
231 	zone = page_zone(page);
232 	local_irq_save(flags);
233 	__inc_zone_state(zone, item);
234 	local_irq_restore(flags);
235 }
236 EXPORT_SYMBOL(inc_zone_page_state);
237 
238 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
239 {
240 	unsigned long flags;
241 	struct zone *zone;
242 	s8 *p;
243 
244 	zone = page_zone(page);
245 	local_irq_save(flags);
246 	p = diff_pointer(zone, item);
247 
248 	(*p)--;
249 
250 	if (unlikely(*p < -STAT_THRESHOLD)) {
251 		zone_page_state_add(*p, zone, item);
252 		*p = 0;
253 	}
254 	local_irq_restore(flags);
255 }
256 EXPORT_SYMBOL(dec_zone_page_state);
257 
258 /*
259  * Update the zone counters for one cpu.
260  */
261 void refresh_cpu_vm_stats(int cpu)
262 {
263 	struct zone *zone;
264 	int i;
265 	unsigned long flags;
266 
267 	for_each_zone(zone) {
268 		struct per_cpu_pageset *pcp;
269 
270 		pcp = zone_pcp(zone, cpu);
271 
272 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
273 			if (pcp->vm_stat_diff[i]) {
274 				local_irq_save(flags);
275 				zone_page_state_add(pcp->vm_stat_diff[i],
276 					zone, i);
277 				pcp->vm_stat_diff[i] = 0;
278 				local_irq_restore(flags);
279 			}
280 	}
281 }
282 
283 static void __refresh_cpu_vm_stats(void *dummy)
284 {
285 	refresh_cpu_vm_stats(smp_processor_id());
286 }
287 
288 /*
289  * Consolidate all counters.
290  *
291  * Note that the result is less inaccurate but still inaccurate
292  * if concurrent processes are allowed to run.
293  */
294 void refresh_vm_stats(void)
295 {
296 	on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
297 }
298 EXPORT_SYMBOL(refresh_vm_stats);
299 
300 #endif
301 
302 #ifdef CONFIG_NUMA
303 /*
304  * zonelist = the list of zones passed to the allocator
305  * z 	    = the zone from which the allocation occurred.
306  *
307  * Must be called with interrupts disabled.
308  */
309 void zone_statistics(struct zonelist *zonelist, struct zone *z)
310 {
311 	if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
312 		__inc_zone_state(z, NUMA_HIT);
313 	} else {
314 		__inc_zone_state(z, NUMA_MISS);
315 		__inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
316 	}
317 	if (z->zone_pgdat == NODE_DATA(numa_node_id()))
318 		__inc_zone_state(z, NUMA_LOCAL);
319 	else
320 		__inc_zone_state(z, NUMA_OTHER);
321 }
322 #endif
323 
324 #ifdef CONFIG_PROC_FS
325 
326 #include <linux/seq_file.h>
327 
328 static void *frag_start(struct seq_file *m, loff_t *pos)
329 {
330 	pg_data_t *pgdat;
331 	loff_t node = *pos;
332 	for (pgdat = first_online_pgdat();
333 	     pgdat && node;
334 	     pgdat = next_online_pgdat(pgdat))
335 		--node;
336 
337 	return pgdat;
338 }
339 
340 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
341 {
342 	pg_data_t *pgdat = (pg_data_t *)arg;
343 
344 	(*pos)++;
345 	return next_online_pgdat(pgdat);
346 }
347 
348 static void frag_stop(struct seq_file *m, void *arg)
349 {
350 }
351 
352 /*
353  * This walks the free areas for each zone.
354  */
355 static int frag_show(struct seq_file *m, void *arg)
356 {
357 	pg_data_t *pgdat = (pg_data_t *)arg;
358 	struct zone *zone;
359 	struct zone *node_zones = pgdat->node_zones;
360 	unsigned long flags;
361 	int order;
362 
363 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
364 		if (!populated_zone(zone))
365 			continue;
366 
367 		spin_lock_irqsave(&zone->lock, flags);
368 		seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
369 		for (order = 0; order < MAX_ORDER; ++order)
370 			seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
371 		spin_unlock_irqrestore(&zone->lock, flags);
372 		seq_putc(m, '\n');
373 	}
374 	return 0;
375 }
376 
377 struct seq_operations fragmentation_op = {
378 	.start	= frag_start,
379 	.next	= frag_next,
380 	.stop	= frag_stop,
381 	.show	= frag_show,
382 };
383 
384 static char *vmstat_text[] = {
385 	/* Zoned VM counters */
386 	"nr_anon_pages",
387 	"nr_mapped",
388 	"nr_file_pages",
389 	"nr_slab",
390 	"nr_page_table_pages",
391 	"nr_dirty",
392 	"nr_writeback",
393 	"nr_unstable",
394 	"nr_bounce",
395 
396 #ifdef CONFIG_NUMA
397 	"numa_hit",
398 	"numa_miss",
399 	"numa_foreign",
400 	"numa_interleave",
401 	"numa_local",
402 	"numa_other",
403 #endif
404 
405 #ifdef CONFIG_VM_EVENT_COUNTERS
406 	"pgpgin",
407 	"pgpgout",
408 	"pswpin",
409 	"pswpout",
410 
411 	"pgalloc_dma",
412 	"pgalloc_dma32",
413 	"pgalloc_normal",
414 	"pgalloc_high",
415 
416 	"pgfree",
417 	"pgactivate",
418 	"pgdeactivate",
419 
420 	"pgfault",
421 	"pgmajfault",
422 
423 	"pgrefill_dma",
424 	"pgrefill_dma32",
425 	"pgrefill_normal",
426 	"pgrefill_high",
427 
428 	"pgsteal_dma",
429 	"pgsteal_dma32",
430 	"pgsteal_normal",
431 	"pgsteal_high",
432 
433 	"pgscan_kswapd_dma",
434 	"pgscan_kswapd_dma32",
435 	"pgscan_kswapd_normal",
436 	"pgscan_kswapd_high",
437 
438 	"pgscan_direct_dma",
439 	"pgscan_direct_dma32",
440 	"pgscan_direct_normal",
441 	"pgscan_direct_high",
442 
443 	"pginodesteal",
444 	"slabs_scanned",
445 	"kswapd_steal",
446 	"kswapd_inodesteal",
447 	"pageoutrun",
448 	"allocstall",
449 
450 	"pgrotated",
451 #endif
452 };
453 
454 /*
455  * Output information about zones in @pgdat.
456  */
457 static int zoneinfo_show(struct seq_file *m, void *arg)
458 {
459 	pg_data_t *pgdat = arg;
460 	struct zone *zone;
461 	struct zone *node_zones = pgdat->node_zones;
462 	unsigned long flags;
463 
464 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
465 		int i;
466 
467 		if (!populated_zone(zone))
468 			continue;
469 
470 		spin_lock_irqsave(&zone->lock, flags);
471 		seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
472 		seq_printf(m,
473 			   "\n  pages free     %lu"
474 			   "\n        min      %lu"
475 			   "\n        low      %lu"
476 			   "\n        high     %lu"
477 			   "\n        active   %lu"
478 			   "\n        inactive %lu"
479 			   "\n        scanned  %lu (a: %lu i: %lu)"
480 			   "\n        spanned  %lu"
481 			   "\n        present  %lu",
482 			   zone->free_pages,
483 			   zone->pages_min,
484 			   zone->pages_low,
485 			   zone->pages_high,
486 			   zone->nr_active,
487 			   zone->nr_inactive,
488 			   zone->pages_scanned,
489 			   zone->nr_scan_active, zone->nr_scan_inactive,
490 			   zone->spanned_pages,
491 			   zone->present_pages);
492 
493 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
494 			seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
495 					zone_page_state(zone, i));
496 
497 		seq_printf(m,
498 			   "\n        protection: (%lu",
499 			   zone->lowmem_reserve[0]);
500 		for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
501 			seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
502 		seq_printf(m,
503 			   ")"
504 			   "\n  pagesets");
505 		for_each_online_cpu(i) {
506 			struct per_cpu_pageset *pageset;
507 			int j;
508 
509 			pageset = zone_pcp(zone, i);
510 			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
511 				if (pageset->pcp[j].count)
512 					break;
513 			}
514 			if (j == ARRAY_SIZE(pageset->pcp))
515 				continue;
516 			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
517 				seq_printf(m,
518 					   "\n    cpu: %i pcp: %i"
519 					   "\n              count: %i"
520 					   "\n              high:  %i"
521 					   "\n              batch: %i",
522 					   i, j,
523 					   pageset->pcp[j].count,
524 					   pageset->pcp[j].high,
525 					   pageset->pcp[j].batch);
526 			}
527 		}
528 		seq_printf(m,
529 			   "\n  all_unreclaimable: %u"
530 			   "\n  prev_priority:     %i"
531 			   "\n  temp_priority:     %i"
532 			   "\n  start_pfn:         %lu",
533 			   zone->all_unreclaimable,
534 			   zone->prev_priority,
535 			   zone->temp_priority,
536 			   zone->zone_start_pfn);
537 		spin_unlock_irqrestore(&zone->lock, flags);
538 		seq_putc(m, '\n');
539 	}
540 	return 0;
541 }
542 
543 struct seq_operations zoneinfo_op = {
544 	.start	= frag_start, /* iterate over all zones. The same as in
545 			       * fragmentation. */
546 	.next	= frag_next,
547 	.stop	= frag_stop,
548 	.show	= zoneinfo_show,
549 };
550 
551 static void *vmstat_start(struct seq_file *m, loff_t *pos)
552 {
553 	unsigned long *v;
554 #ifdef CONFIG_VM_EVENT_COUNTERS
555 	unsigned long *e;
556 #endif
557 	int i;
558 
559 	if (*pos >= ARRAY_SIZE(vmstat_text))
560 		return NULL;
561 
562 #ifdef CONFIG_VM_EVENT_COUNTERS
563 	v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
564 			+ sizeof(struct vm_event_state), GFP_KERNEL);
565 #else
566 	v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
567 			GFP_KERNEL);
568 #endif
569 	m->private = v;
570 	if (!v)
571 		return ERR_PTR(-ENOMEM);
572 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
573 		v[i] = global_page_state(i);
574 #ifdef CONFIG_VM_EVENT_COUNTERS
575 	e = v + NR_VM_ZONE_STAT_ITEMS;
576 	all_vm_events(e);
577 	e[PGPGIN] /= 2;		/* sectors -> kbytes */
578 	e[PGPGOUT] /= 2;
579 #endif
580 	return v + *pos;
581 }
582 
583 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
584 {
585 	(*pos)++;
586 	if (*pos >= ARRAY_SIZE(vmstat_text))
587 		return NULL;
588 	return (unsigned long *)m->private + *pos;
589 }
590 
591 static int vmstat_show(struct seq_file *m, void *arg)
592 {
593 	unsigned long *l = arg;
594 	unsigned long off = l - (unsigned long *)m->private;
595 
596 	seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
597 	return 0;
598 }
599 
600 static void vmstat_stop(struct seq_file *m, void *arg)
601 {
602 	kfree(m->private);
603 	m->private = NULL;
604 }
605 
606 struct seq_operations vmstat_op = {
607 	.start	= vmstat_start,
608 	.next	= vmstat_next,
609 	.stop	= vmstat_stop,
610 	.show	= vmstat_show,
611 };
612 
613 #endif /* CONFIG_PROC_FS */
614 
615