1 /* 2 * linux/mm/vmstat.c 3 * 4 * Manages VM statistics 5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 6 * 7 * zoned VM statistics 8 * Copyright (C) 2006 Silicon Graphics, Inc., 9 * Christoph Lameter <christoph@lameter.com> 10 */ 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/err.h> 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/cpu.h> 17 #include <linux/vmstat.h> 18 #include <linux/sched.h> 19 20 #ifdef CONFIG_VM_EVENT_COUNTERS 21 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 22 EXPORT_PER_CPU_SYMBOL(vm_event_states); 23 24 static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask) 25 { 26 int cpu; 27 int i; 28 29 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 30 31 for_each_cpu(cpu, cpumask) { 32 struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 33 34 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 35 ret[i] += this->event[i]; 36 } 37 } 38 39 /* 40 * Accumulate the vm event counters across all CPUs. 41 * The result is unavoidably approximate - it can change 42 * during and after execution of this function. 43 */ 44 void all_vm_events(unsigned long *ret) 45 { 46 get_online_cpus(); 47 sum_vm_events(ret, cpu_online_mask); 48 put_online_cpus(); 49 } 50 EXPORT_SYMBOL_GPL(all_vm_events); 51 52 #ifdef CONFIG_HOTPLUG 53 /* 54 * Fold the foreign cpu events into our own. 55 * 56 * This is adding to the events on one processor 57 * but keeps the global counts constant. 58 */ 59 void vm_events_fold_cpu(int cpu) 60 { 61 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 62 int i; 63 64 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 65 count_vm_events(i, fold_state->event[i]); 66 fold_state->event[i] = 0; 67 } 68 } 69 #endif /* CONFIG_HOTPLUG */ 70 71 #endif /* CONFIG_VM_EVENT_COUNTERS */ 72 73 /* 74 * Manage combined zone based / global counters 75 * 76 * vm_stat contains the global counters 77 */ 78 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 79 EXPORT_SYMBOL(vm_stat); 80 81 #ifdef CONFIG_SMP 82 83 static int calculate_threshold(struct zone *zone) 84 { 85 int threshold; 86 int mem; /* memory in 128 MB units */ 87 88 /* 89 * The threshold scales with the number of processors and the amount 90 * of memory per zone. More memory means that we can defer updates for 91 * longer, more processors could lead to more contention. 92 * fls() is used to have a cheap way of logarithmic scaling. 93 * 94 * Some sample thresholds: 95 * 96 * Threshold Processors (fls) Zonesize fls(mem+1) 97 * ------------------------------------------------------------------ 98 * 8 1 1 0.9-1 GB 4 99 * 16 2 2 0.9-1 GB 4 100 * 20 2 2 1-2 GB 5 101 * 24 2 2 2-4 GB 6 102 * 28 2 2 4-8 GB 7 103 * 32 2 2 8-16 GB 8 104 * 4 2 2 <128M 1 105 * 30 4 3 2-4 GB 5 106 * 48 4 3 8-16 GB 8 107 * 32 8 4 1-2 GB 4 108 * 32 8 4 0.9-1GB 4 109 * 10 16 5 <128M 1 110 * 40 16 5 900M 4 111 * 70 64 7 2-4 GB 5 112 * 84 64 7 4-8 GB 6 113 * 108 512 9 4-8 GB 6 114 * 125 1024 10 8-16 GB 8 115 * 125 1024 10 16-32 GB 9 116 */ 117 118 mem = zone->present_pages >> (27 - PAGE_SHIFT); 119 120 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 121 122 /* 123 * Maximum threshold is 125 124 */ 125 threshold = min(125, threshold); 126 127 return threshold; 128 } 129 130 /* 131 * Refresh the thresholds for each zone. 132 */ 133 static void refresh_zone_stat_thresholds(void) 134 { 135 struct zone *zone; 136 int cpu; 137 int threshold; 138 139 for_each_populated_zone(zone) { 140 threshold = calculate_threshold(zone); 141 142 for_each_online_cpu(cpu) 143 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 144 = threshold; 145 } 146 } 147 148 /* 149 * For use when we know that interrupts are disabled. 150 */ 151 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 152 int delta) 153 { 154 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 155 156 s8 *p = pcp->vm_stat_diff + item; 157 long x; 158 159 x = delta + *p; 160 161 if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { 162 zone_page_state_add(x, zone, item); 163 x = 0; 164 } 165 *p = x; 166 } 167 EXPORT_SYMBOL(__mod_zone_page_state); 168 169 /* 170 * For an unknown interrupt state 171 */ 172 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 173 int delta) 174 { 175 unsigned long flags; 176 177 local_irq_save(flags); 178 __mod_zone_page_state(zone, item, delta); 179 local_irq_restore(flags); 180 } 181 EXPORT_SYMBOL(mod_zone_page_state); 182 183 /* 184 * Optimized increment and decrement functions. 185 * 186 * These are only for a single page and therefore can take a struct page * 187 * argument instead of struct zone *. This allows the inclusion of the code 188 * generated for page_zone(page) into the optimized functions. 189 * 190 * No overflow check is necessary and therefore the differential can be 191 * incremented or decremented in place which may allow the compilers to 192 * generate better code. 193 * The increment or decrement is known and therefore one boundary check can 194 * be omitted. 195 * 196 * NOTE: These functions are very performance sensitive. Change only 197 * with care. 198 * 199 * Some processors have inc/dec instructions that are atomic vs an interrupt. 200 * However, the code must first determine the differential location in a zone 201 * based on the processor number and then inc/dec the counter. There is no 202 * guarantee without disabling preemption that the processor will not change 203 * in between and therefore the atomicity vs. interrupt cannot be exploited 204 * in a useful way here. 205 */ 206 void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 207 { 208 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 209 s8 *p = pcp->vm_stat_diff + item; 210 211 (*p)++; 212 213 if (unlikely(*p > pcp->stat_threshold)) { 214 int overstep = pcp->stat_threshold / 2; 215 216 zone_page_state_add(*p + overstep, zone, item); 217 *p = -overstep; 218 } 219 } 220 221 void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 222 { 223 __inc_zone_state(page_zone(page), item); 224 } 225 EXPORT_SYMBOL(__inc_zone_page_state); 226 227 void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 228 { 229 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 230 s8 *p = pcp->vm_stat_diff + item; 231 232 (*p)--; 233 234 if (unlikely(*p < - pcp->stat_threshold)) { 235 int overstep = pcp->stat_threshold / 2; 236 237 zone_page_state_add(*p - overstep, zone, item); 238 *p = overstep; 239 } 240 } 241 242 void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 243 { 244 __dec_zone_state(page_zone(page), item); 245 } 246 EXPORT_SYMBOL(__dec_zone_page_state); 247 248 void inc_zone_state(struct zone *zone, enum zone_stat_item item) 249 { 250 unsigned long flags; 251 252 local_irq_save(flags); 253 __inc_zone_state(zone, item); 254 local_irq_restore(flags); 255 } 256 257 void inc_zone_page_state(struct page *page, enum zone_stat_item item) 258 { 259 unsigned long flags; 260 struct zone *zone; 261 262 zone = page_zone(page); 263 local_irq_save(flags); 264 __inc_zone_state(zone, item); 265 local_irq_restore(flags); 266 } 267 EXPORT_SYMBOL(inc_zone_page_state); 268 269 void dec_zone_page_state(struct page *page, enum zone_stat_item item) 270 { 271 unsigned long flags; 272 273 local_irq_save(flags); 274 __dec_zone_page_state(page, item); 275 local_irq_restore(flags); 276 } 277 EXPORT_SYMBOL(dec_zone_page_state); 278 279 /* 280 * Update the zone counters for one cpu. 281 * 282 * The cpu specified must be either the current cpu or a processor that 283 * is not online. If it is the current cpu then the execution thread must 284 * be pinned to the current cpu. 285 * 286 * Note that refresh_cpu_vm_stats strives to only access 287 * node local memory. The per cpu pagesets on remote zones are placed 288 * in the memory local to the processor using that pageset. So the 289 * loop over all zones will access a series of cachelines local to 290 * the processor. 291 * 292 * The call to zone_page_state_add updates the cachelines with the 293 * statistics in the remote zone struct as well as the global cachelines 294 * with the global counters. These could cause remote node cache line 295 * bouncing and will have to be only done when necessary. 296 */ 297 void refresh_cpu_vm_stats(int cpu) 298 { 299 struct zone *zone; 300 int i; 301 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 302 303 for_each_populated_zone(zone) { 304 struct per_cpu_pageset *p; 305 306 p = per_cpu_ptr(zone->pageset, cpu); 307 308 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 309 if (p->vm_stat_diff[i]) { 310 unsigned long flags; 311 int v; 312 313 local_irq_save(flags); 314 v = p->vm_stat_diff[i]; 315 p->vm_stat_diff[i] = 0; 316 local_irq_restore(flags); 317 atomic_long_add(v, &zone->vm_stat[i]); 318 global_diff[i] += v; 319 #ifdef CONFIG_NUMA 320 /* 3 seconds idle till flush */ 321 p->expire = 3; 322 #endif 323 } 324 cond_resched(); 325 #ifdef CONFIG_NUMA 326 /* 327 * Deal with draining the remote pageset of this 328 * processor 329 * 330 * Check if there are pages remaining in this pageset 331 * if not then there is nothing to expire. 332 */ 333 if (!p->expire || !p->pcp.count) 334 continue; 335 336 /* 337 * We never drain zones local to this processor. 338 */ 339 if (zone_to_nid(zone) == numa_node_id()) { 340 p->expire = 0; 341 continue; 342 } 343 344 p->expire--; 345 if (p->expire) 346 continue; 347 348 if (p->pcp.count) 349 drain_zone_pages(zone, &p->pcp); 350 #endif 351 } 352 353 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 354 if (global_diff[i]) 355 atomic_long_add(global_diff[i], &vm_stat[i]); 356 } 357 358 #endif 359 360 #ifdef CONFIG_NUMA 361 /* 362 * zonelist = the list of zones passed to the allocator 363 * z = the zone from which the allocation occurred. 364 * 365 * Must be called with interrupts disabled. 366 */ 367 void zone_statistics(struct zone *preferred_zone, struct zone *z) 368 { 369 if (z->zone_pgdat == preferred_zone->zone_pgdat) { 370 __inc_zone_state(z, NUMA_HIT); 371 } else { 372 __inc_zone_state(z, NUMA_MISS); 373 __inc_zone_state(preferred_zone, NUMA_FOREIGN); 374 } 375 if (z->node == numa_node_id()) 376 __inc_zone_state(z, NUMA_LOCAL); 377 else 378 __inc_zone_state(z, NUMA_OTHER); 379 } 380 #endif 381 382 #ifdef CONFIG_PROC_FS 383 #include <linux/proc_fs.h> 384 #include <linux/seq_file.h> 385 386 static char * const migratetype_names[MIGRATE_TYPES] = { 387 "Unmovable", 388 "Reclaimable", 389 "Movable", 390 "Reserve", 391 "Isolate", 392 }; 393 394 static void *frag_start(struct seq_file *m, loff_t *pos) 395 { 396 pg_data_t *pgdat; 397 loff_t node = *pos; 398 for (pgdat = first_online_pgdat(); 399 pgdat && node; 400 pgdat = next_online_pgdat(pgdat)) 401 --node; 402 403 return pgdat; 404 } 405 406 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 407 { 408 pg_data_t *pgdat = (pg_data_t *)arg; 409 410 (*pos)++; 411 return next_online_pgdat(pgdat); 412 } 413 414 static void frag_stop(struct seq_file *m, void *arg) 415 { 416 } 417 418 /* Walk all the zones in a node and print using a callback */ 419 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 420 void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 421 { 422 struct zone *zone; 423 struct zone *node_zones = pgdat->node_zones; 424 unsigned long flags; 425 426 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 427 if (!populated_zone(zone)) 428 continue; 429 430 spin_lock_irqsave(&zone->lock, flags); 431 print(m, pgdat, zone); 432 spin_unlock_irqrestore(&zone->lock, flags); 433 } 434 } 435 436 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 437 struct zone *zone) 438 { 439 int order; 440 441 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 442 for (order = 0; order < MAX_ORDER; ++order) 443 seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 444 seq_putc(m, '\n'); 445 } 446 447 /* 448 * This walks the free areas for each zone. 449 */ 450 static int frag_show(struct seq_file *m, void *arg) 451 { 452 pg_data_t *pgdat = (pg_data_t *)arg; 453 walk_zones_in_node(m, pgdat, frag_show_print); 454 return 0; 455 } 456 457 static void pagetypeinfo_showfree_print(struct seq_file *m, 458 pg_data_t *pgdat, struct zone *zone) 459 { 460 int order, mtype; 461 462 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 463 seq_printf(m, "Node %4d, zone %8s, type %12s ", 464 pgdat->node_id, 465 zone->name, 466 migratetype_names[mtype]); 467 for (order = 0; order < MAX_ORDER; ++order) { 468 unsigned long freecount = 0; 469 struct free_area *area; 470 struct list_head *curr; 471 472 area = &(zone->free_area[order]); 473 474 list_for_each(curr, &area->free_list[mtype]) 475 freecount++; 476 seq_printf(m, "%6lu ", freecount); 477 } 478 seq_putc(m, '\n'); 479 } 480 } 481 482 /* Print out the free pages at each order for each migatetype */ 483 static int pagetypeinfo_showfree(struct seq_file *m, void *arg) 484 { 485 int order; 486 pg_data_t *pgdat = (pg_data_t *)arg; 487 488 /* Print header */ 489 seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 490 for (order = 0; order < MAX_ORDER; ++order) 491 seq_printf(m, "%6d ", order); 492 seq_putc(m, '\n'); 493 494 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print); 495 496 return 0; 497 } 498 499 static void pagetypeinfo_showblockcount_print(struct seq_file *m, 500 pg_data_t *pgdat, struct zone *zone) 501 { 502 int mtype; 503 unsigned long pfn; 504 unsigned long start_pfn = zone->zone_start_pfn; 505 unsigned long end_pfn = start_pfn + zone->spanned_pages; 506 unsigned long count[MIGRATE_TYPES] = { 0, }; 507 508 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 509 struct page *page; 510 511 if (!pfn_valid(pfn)) 512 continue; 513 514 page = pfn_to_page(pfn); 515 516 /* Watch for unexpected holes punched in the memmap */ 517 if (!memmap_valid_within(pfn, page, zone)) 518 continue; 519 520 mtype = get_pageblock_migratetype(page); 521 522 if (mtype < MIGRATE_TYPES) 523 count[mtype]++; 524 } 525 526 /* Print counts */ 527 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 528 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 529 seq_printf(m, "%12lu ", count[mtype]); 530 seq_putc(m, '\n'); 531 } 532 533 /* Print out the free pages at each order for each migratetype */ 534 static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 535 { 536 int mtype; 537 pg_data_t *pgdat = (pg_data_t *)arg; 538 539 seq_printf(m, "\n%-23s", "Number of blocks type "); 540 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 541 seq_printf(m, "%12s ", migratetype_names[mtype]); 542 seq_putc(m, '\n'); 543 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print); 544 545 return 0; 546 } 547 548 /* 549 * This prints out statistics in relation to grouping pages by mobility. 550 * It is expensive to collect so do not constantly read the file. 551 */ 552 static int pagetypeinfo_show(struct seq_file *m, void *arg) 553 { 554 pg_data_t *pgdat = (pg_data_t *)arg; 555 556 /* check memoryless node */ 557 if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) 558 return 0; 559 560 seq_printf(m, "Page block order: %d\n", pageblock_order); 561 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 562 seq_putc(m, '\n'); 563 pagetypeinfo_showfree(m, pgdat); 564 pagetypeinfo_showblockcount(m, pgdat); 565 566 return 0; 567 } 568 569 static const struct seq_operations fragmentation_op = { 570 .start = frag_start, 571 .next = frag_next, 572 .stop = frag_stop, 573 .show = frag_show, 574 }; 575 576 static int fragmentation_open(struct inode *inode, struct file *file) 577 { 578 return seq_open(file, &fragmentation_op); 579 } 580 581 static const struct file_operations fragmentation_file_operations = { 582 .open = fragmentation_open, 583 .read = seq_read, 584 .llseek = seq_lseek, 585 .release = seq_release, 586 }; 587 588 static const struct seq_operations pagetypeinfo_op = { 589 .start = frag_start, 590 .next = frag_next, 591 .stop = frag_stop, 592 .show = pagetypeinfo_show, 593 }; 594 595 static int pagetypeinfo_open(struct inode *inode, struct file *file) 596 { 597 return seq_open(file, &pagetypeinfo_op); 598 } 599 600 static const struct file_operations pagetypeinfo_file_ops = { 601 .open = pagetypeinfo_open, 602 .read = seq_read, 603 .llseek = seq_lseek, 604 .release = seq_release, 605 }; 606 607 #ifdef CONFIG_ZONE_DMA 608 #define TEXT_FOR_DMA(xx) xx "_dma", 609 #else 610 #define TEXT_FOR_DMA(xx) 611 #endif 612 613 #ifdef CONFIG_ZONE_DMA32 614 #define TEXT_FOR_DMA32(xx) xx "_dma32", 615 #else 616 #define TEXT_FOR_DMA32(xx) 617 #endif 618 619 #ifdef CONFIG_HIGHMEM 620 #define TEXT_FOR_HIGHMEM(xx) xx "_high", 621 #else 622 #define TEXT_FOR_HIGHMEM(xx) 623 #endif 624 625 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 626 TEXT_FOR_HIGHMEM(xx) xx "_movable", 627 628 static const char * const vmstat_text[] = { 629 /* Zoned VM counters */ 630 "nr_free_pages", 631 "nr_inactive_anon", 632 "nr_active_anon", 633 "nr_inactive_file", 634 "nr_active_file", 635 "nr_unevictable", 636 "nr_mlock", 637 "nr_anon_pages", 638 "nr_mapped", 639 "nr_file_pages", 640 "nr_dirty", 641 "nr_writeback", 642 "nr_slab_reclaimable", 643 "nr_slab_unreclaimable", 644 "nr_page_table_pages", 645 "nr_kernel_stack", 646 "nr_unstable", 647 "nr_bounce", 648 "nr_vmscan_write", 649 "nr_writeback_temp", 650 "nr_isolated_anon", 651 "nr_isolated_file", 652 "nr_shmem", 653 #ifdef CONFIG_NUMA 654 "numa_hit", 655 "numa_miss", 656 "numa_foreign", 657 "numa_interleave", 658 "numa_local", 659 "numa_other", 660 #endif 661 662 #ifdef CONFIG_VM_EVENT_COUNTERS 663 "pgpgin", 664 "pgpgout", 665 "pswpin", 666 "pswpout", 667 668 TEXTS_FOR_ZONES("pgalloc") 669 670 "pgfree", 671 "pgactivate", 672 "pgdeactivate", 673 674 "pgfault", 675 "pgmajfault", 676 677 TEXTS_FOR_ZONES("pgrefill") 678 TEXTS_FOR_ZONES("pgsteal") 679 TEXTS_FOR_ZONES("pgscan_kswapd") 680 TEXTS_FOR_ZONES("pgscan_direct") 681 682 #ifdef CONFIG_NUMA 683 "zone_reclaim_failed", 684 #endif 685 "pginodesteal", 686 "slabs_scanned", 687 "kswapd_steal", 688 "kswapd_inodesteal", 689 "kswapd_low_wmark_hit_quickly", 690 "kswapd_high_wmark_hit_quickly", 691 "kswapd_skip_congestion_wait", 692 "pageoutrun", 693 "allocstall", 694 695 "pgrotated", 696 #ifdef CONFIG_HUGETLB_PAGE 697 "htlb_buddy_alloc_success", 698 "htlb_buddy_alloc_fail", 699 #endif 700 "unevictable_pgs_culled", 701 "unevictable_pgs_scanned", 702 "unevictable_pgs_rescued", 703 "unevictable_pgs_mlocked", 704 "unevictable_pgs_munlocked", 705 "unevictable_pgs_cleared", 706 "unevictable_pgs_stranded", 707 "unevictable_pgs_mlockfreed", 708 #endif 709 }; 710 711 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 712 struct zone *zone) 713 { 714 int i; 715 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 716 seq_printf(m, 717 "\n pages free %lu" 718 "\n min %lu" 719 "\n low %lu" 720 "\n high %lu" 721 "\n scanned %lu" 722 "\n spanned %lu" 723 "\n present %lu", 724 zone_page_state(zone, NR_FREE_PAGES), 725 min_wmark_pages(zone), 726 low_wmark_pages(zone), 727 high_wmark_pages(zone), 728 zone->pages_scanned, 729 zone->spanned_pages, 730 zone->present_pages); 731 732 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 733 seq_printf(m, "\n %-12s %lu", vmstat_text[i], 734 zone_page_state(zone, i)); 735 736 seq_printf(m, 737 "\n protection: (%lu", 738 zone->lowmem_reserve[0]); 739 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 740 seq_printf(m, ", %lu", zone->lowmem_reserve[i]); 741 seq_printf(m, 742 ")" 743 "\n pagesets"); 744 for_each_online_cpu(i) { 745 struct per_cpu_pageset *pageset; 746 747 pageset = per_cpu_ptr(zone->pageset, i); 748 seq_printf(m, 749 "\n cpu: %i" 750 "\n count: %i" 751 "\n high: %i" 752 "\n batch: %i", 753 i, 754 pageset->pcp.count, 755 pageset->pcp.high, 756 pageset->pcp.batch); 757 #ifdef CONFIG_SMP 758 seq_printf(m, "\n vm stats threshold: %d", 759 pageset->stat_threshold); 760 #endif 761 } 762 seq_printf(m, 763 "\n all_unreclaimable: %u" 764 "\n prev_priority: %i" 765 "\n start_pfn: %lu" 766 "\n inactive_ratio: %u", 767 zone->all_unreclaimable, 768 zone->prev_priority, 769 zone->zone_start_pfn, 770 zone->inactive_ratio); 771 seq_putc(m, '\n'); 772 } 773 774 /* 775 * Output information about zones in @pgdat. 776 */ 777 static int zoneinfo_show(struct seq_file *m, void *arg) 778 { 779 pg_data_t *pgdat = (pg_data_t *)arg; 780 walk_zones_in_node(m, pgdat, zoneinfo_show_print); 781 return 0; 782 } 783 784 static const struct seq_operations zoneinfo_op = { 785 .start = frag_start, /* iterate over all zones. The same as in 786 * fragmentation. */ 787 .next = frag_next, 788 .stop = frag_stop, 789 .show = zoneinfo_show, 790 }; 791 792 static int zoneinfo_open(struct inode *inode, struct file *file) 793 { 794 return seq_open(file, &zoneinfo_op); 795 } 796 797 static const struct file_operations proc_zoneinfo_file_operations = { 798 .open = zoneinfo_open, 799 .read = seq_read, 800 .llseek = seq_lseek, 801 .release = seq_release, 802 }; 803 804 static void *vmstat_start(struct seq_file *m, loff_t *pos) 805 { 806 unsigned long *v; 807 #ifdef CONFIG_VM_EVENT_COUNTERS 808 unsigned long *e; 809 #endif 810 int i; 811 812 if (*pos >= ARRAY_SIZE(vmstat_text)) 813 return NULL; 814 815 #ifdef CONFIG_VM_EVENT_COUNTERS 816 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) 817 + sizeof(struct vm_event_state), GFP_KERNEL); 818 #else 819 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long), 820 GFP_KERNEL); 821 #endif 822 m->private = v; 823 if (!v) 824 return ERR_PTR(-ENOMEM); 825 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 826 v[i] = global_page_state(i); 827 #ifdef CONFIG_VM_EVENT_COUNTERS 828 e = v + NR_VM_ZONE_STAT_ITEMS; 829 all_vm_events(e); 830 e[PGPGIN] /= 2; /* sectors -> kbytes */ 831 e[PGPGOUT] /= 2; 832 #endif 833 return v + *pos; 834 } 835 836 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 837 { 838 (*pos)++; 839 if (*pos >= ARRAY_SIZE(vmstat_text)) 840 return NULL; 841 return (unsigned long *)m->private + *pos; 842 } 843 844 static int vmstat_show(struct seq_file *m, void *arg) 845 { 846 unsigned long *l = arg; 847 unsigned long off = l - (unsigned long *)m->private; 848 849 seq_printf(m, "%s %lu\n", vmstat_text[off], *l); 850 return 0; 851 } 852 853 static void vmstat_stop(struct seq_file *m, void *arg) 854 { 855 kfree(m->private); 856 m->private = NULL; 857 } 858 859 static const struct seq_operations vmstat_op = { 860 .start = vmstat_start, 861 .next = vmstat_next, 862 .stop = vmstat_stop, 863 .show = vmstat_show, 864 }; 865 866 static int vmstat_open(struct inode *inode, struct file *file) 867 { 868 return seq_open(file, &vmstat_op); 869 } 870 871 static const struct file_operations proc_vmstat_file_operations = { 872 .open = vmstat_open, 873 .read = seq_read, 874 .llseek = seq_lseek, 875 .release = seq_release, 876 }; 877 #endif /* CONFIG_PROC_FS */ 878 879 #ifdef CONFIG_SMP 880 static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 881 int sysctl_stat_interval __read_mostly = HZ; 882 883 static void vmstat_update(struct work_struct *w) 884 { 885 refresh_cpu_vm_stats(smp_processor_id()); 886 schedule_delayed_work(&__get_cpu_var(vmstat_work), 887 round_jiffies_relative(sysctl_stat_interval)); 888 } 889 890 static void __cpuinit start_cpu_timer(int cpu) 891 { 892 struct delayed_work *work = &per_cpu(vmstat_work, cpu); 893 894 INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update); 895 schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); 896 } 897 898 /* 899 * Use the cpu notifier to insure that the thresholds are recalculated 900 * when necessary. 901 */ 902 static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, 903 unsigned long action, 904 void *hcpu) 905 { 906 long cpu = (long)hcpu; 907 908 switch (action) { 909 case CPU_ONLINE: 910 case CPU_ONLINE_FROZEN: 911 start_cpu_timer(cpu); 912 node_set_state(cpu_to_node(cpu), N_CPU); 913 break; 914 case CPU_DOWN_PREPARE: 915 case CPU_DOWN_PREPARE_FROZEN: 916 cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); 917 per_cpu(vmstat_work, cpu).work.func = NULL; 918 break; 919 case CPU_DOWN_FAILED: 920 case CPU_DOWN_FAILED_FROZEN: 921 start_cpu_timer(cpu); 922 break; 923 case CPU_DEAD: 924 case CPU_DEAD_FROZEN: 925 refresh_zone_stat_thresholds(); 926 break; 927 default: 928 break; 929 } 930 return NOTIFY_OK; 931 } 932 933 static struct notifier_block __cpuinitdata vmstat_notifier = 934 { &vmstat_cpuup_callback, NULL, 0 }; 935 #endif 936 937 static int __init setup_vmstat(void) 938 { 939 #ifdef CONFIG_SMP 940 int cpu; 941 942 refresh_zone_stat_thresholds(); 943 register_cpu_notifier(&vmstat_notifier); 944 945 for_each_online_cpu(cpu) 946 start_cpu_timer(cpu); 947 #endif 948 #ifdef CONFIG_PROC_FS 949 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); 950 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); 951 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); 952 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); 953 #endif 954 return 0; 955 } 956 module_init(setup_vmstat) 957