1 /* 2 * linux/mm/vmstat.c 3 * 4 * Manages VM statistics 5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 6 * 7 * zoned VM statistics 8 * Copyright (C) 2006 Silicon Graphics, Inc., 9 * Christoph Lameter <christoph@lameter.com> 10 */ 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/err.h> 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/cpu.h> 17 #include <linux/vmstat.h> 18 #include <linux/sched.h> 19 #include <linux/math64.h> 20 21 #ifdef CONFIG_VM_EVENT_COUNTERS 22 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 23 EXPORT_PER_CPU_SYMBOL(vm_event_states); 24 25 static void sum_vm_events(unsigned long *ret) 26 { 27 int cpu; 28 int i; 29 30 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 31 32 for_each_online_cpu(cpu) { 33 struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 34 35 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 36 ret[i] += this->event[i]; 37 } 38 } 39 40 /* 41 * Accumulate the vm event counters across all CPUs. 42 * The result is unavoidably approximate - it can change 43 * during and after execution of this function. 44 */ 45 void all_vm_events(unsigned long *ret) 46 { 47 get_online_cpus(); 48 sum_vm_events(ret); 49 put_online_cpus(); 50 } 51 EXPORT_SYMBOL_GPL(all_vm_events); 52 53 #ifdef CONFIG_HOTPLUG 54 /* 55 * Fold the foreign cpu events into our own. 56 * 57 * This is adding to the events on one processor 58 * but keeps the global counts constant. 59 */ 60 void vm_events_fold_cpu(int cpu) 61 { 62 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 63 int i; 64 65 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 66 count_vm_events(i, fold_state->event[i]); 67 fold_state->event[i] = 0; 68 } 69 } 70 #endif /* CONFIG_HOTPLUG */ 71 72 #endif /* CONFIG_VM_EVENT_COUNTERS */ 73 74 /* 75 * Manage combined zone based / global counters 76 * 77 * vm_stat contains the global counters 78 */ 79 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 80 EXPORT_SYMBOL(vm_stat); 81 82 #ifdef CONFIG_SMP 83 84 static int calculate_threshold(struct zone *zone) 85 { 86 int threshold; 87 int mem; /* memory in 128 MB units */ 88 89 /* 90 * The threshold scales with the number of processors and the amount 91 * of memory per zone. More memory means that we can defer updates for 92 * longer, more processors could lead to more contention. 93 * fls() is used to have a cheap way of logarithmic scaling. 94 * 95 * Some sample thresholds: 96 * 97 * Threshold Processors (fls) Zonesize fls(mem+1) 98 * ------------------------------------------------------------------ 99 * 8 1 1 0.9-1 GB 4 100 * 16 2 2 0.9-1 GB 4 101 * 20 2 2 1-2 GB 5 102 * 24 2 2 2-4 GB 6 103 * 28 2 2 4-8 GB 7 104 * 32 2 2 8-16 GB 8 105 * 4 2 2 <128M 1 106 * 30 4 3 2-4 GB 5 107 * 48 4 3 8-16 GB 8 108 * 32 8 4 1-2 GB 4 109 * 32 8 4 0.9-1GB 4 110 * 10 16 5 <128M 1 111 * 40 16 5 900M 4 112 * 70 64 7 2-4 GB 5 113 * 84 64 7 4-8 GB 6 114 * 108 512 9 4-8 GB 6 115 * 125 1024 10 8-16 GB 8 116 * 125 1024 10 16-32 GB 9 117 */ 118 119 mem = zone->present_pages >> (27 - PAGE_SHIFT); 120 121 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 122 123 /* 124 * Maximum threshold is 125 125 */ 126 threshold = min(125, threshold); 127 128 return threshold; 129 } 130 131 /* 132 * Refresh the thresholds for each zone. 133 */ 134 static void refresh_zone_stat_thresholds(void) 135 { 136 struct zone *zone; 137 int cpu; 138 int threshold; 139 140 for_each_populated_zone(zone) { 141 unsigned long max_drift, tolerate_drift; 142 143 threshold = calculate_threshold(zone); 144 145 for_each_online_cpu(cpu) 146 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 147 = threshold; 148 149 /* 150 * Only set percpu_drift_mark if there is a danger that 151 * NR_FREE_PAGES reports the low watermark is ok when in fact 152 * the min watermark could be breached by an allocation 153 */ 154 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 155 max_drift = num_online_cpus() * threshold; 156 if (max_drift > tolerate_drift) 157 zone->percpu_drift_mark = high_wmark_pages(zone) + 158 max_drift; 159 } 160 } 161 162 /* 163 * For use when we know that interrupts are disabled. 164 */ 165 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 166 int delta) 167 { 168 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 169 170 s8 *p = pcp->vm_stat_diff + item; 171 long x; 172 173 x = delta + *p; 174 175 if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { 176 zone_page_state_add(x, zone, item); 177 x = 0; 178 } 179 *p = x; 180 } 181 EXPORT_SYMBOL(__mod_zone_page_state); 182 183 /* 184 * For an unknown interrupt state 185 */ 186 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 187 int delta) 188 { 189 unsigned long flags; 190 191 local_irq_save(flags); 192 __mod_zone_page_state(zone, item, delta); 193 local_irq_restore(flags); 194 } 195 EXPORT_SYMBOL(mod_zone_page_state); 196 197 /* 198 * Optimized increment and decrement functions. 199 * 200 * These are only for a single page and therefore can take a struct page * 201 * argument instead of struct zone *. This allows the inclusion of the code 202 * generated for page_zone(page) into the optimized functions. 203 * 204 * No overflow check is necessary and therefore the differential can be 205 * incremented or decremented in place which may allow the compilers to 206 * generate better code. 207 * The increment or decrement is known and therefore one boundary check can 208 * be omitted. 209 * 210 * NOTE: These functions are very performance sensitive. Change only 211 * with care. 212 * 213 * Some processors have inc/dec instructions that are atomic vs an interrupt. 214 * However, the code must first determine the differential location in a zone 215 * based on the processor number and then inc/dec the counter. There is no 216 * guarantee without disabling preemption that the processor will not change 217 * in between and therefore the atomicity vs. interrupt cannot be exploited 218 * in a useful way here. 219 */ 220 void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 221 { 222 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 223 s8 *p = pcp->vm_stat_diff + item; 224 225 (*p)++; 226 227 if (unlikely(*p > pcp->stat_threshold)) { 228 int overstep = pcp->stat_threshold / 2; 229 230 zone_page_state_add(*p + overstep, zone, item); 231 *p = -overstep; 232 } 233 } 234 235 void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 236 { 237 __inc_zone_state(page_zone(page), item); 238 } 239 EXPORT_SYMBOL(__inc_zone_page_state); 240 241 void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 242 { 243 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 244 s8 *p = pcp->vm_stat_diff + item; 245 246 (*p)--; 247 248 if (unlikely(*p < - pcp->stat_threshold)) { 249 int overstep = pcp->stat_threshold / 2; 250 251 zone_page_state_add(*p - overstep, zone, item); 252 *p = overstep; 253 } 254 } 255 256 void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 257 { 258 __dec_zone_state(page_zone(page), item); 259 } 260 EXPORT_SYMBOL(__dec_zone_page_state); 261 262 void inc_zone_state(struct zone *zone, enum zone_stat_item item) 263 { 264 unsigned long flags; 265 266 local_irq_save(flags); 267 __inc_zone_state(zone, item); 268 local_irq_restore(flags); 269 } 270 271 void inc_zone_page_state(struct page *page, enum zone_stat_item item) 272 { 273 unsigned long flags; 274 struct zone *zone; 275 276 zone = page_zone(page); 277 local_irq_save(flags); 278 __inc_zone_state(zone, item); 279 local_irq_restore(flags); 280 } 281 EXPORT_SYMBOL(inc_zone_page_state); 282 283 void dec_zone_page_state(struct page *page, enum zone_stat_item item) 284 { 285 unsigned long flags; 286 287 local_irq_save(flags); 288 __dec_zone_page_state(page, item); 289 local_irq_restore(flags); 290 } 291 EXPORT_SYMBOL(dec_zone_page_state); 292 293 /* 294 * Update the zone counters for one cpu. 295 * 296 * The cpu specified must be either the current cpu or a processor that 297 * is not online. If it is the current cpu then the execution thread must 298 * be pinned to the current cpu. 299 * 300 * Note that refresh_cpu_vm_stats strives to only access 301 * node local memory. The per cpu pagesets on remote zones are placed 302 * in the memory local to the processor using that pageset. So the 303 * loop over all zones will access a series of cachelines local to 304 * the processor. 305 * 306 * The call to zone_page_state_add updates the cachelines with the 307 * statistics in the remote zone struct as well as the global cachelines 308 * with the global counters. These could cause remote node cache line 309 * bouncing and will have to be only done when necessary. 310 */ 311 void refresh_cpu_vm_stats(int cpu) 312 { 313 struct zone *zone; 314 int i; 315 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 316 317 for_each_populated_zone(zone) { 318 struct per_cpu_pageset *p; 319 320 p = per_cpu_ptr(zone->pageset, cpu); 321 322 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 323 if (p->vm_stat_diff[i]) { 324 unsigned long flags; 325 int v; 326 327 local_irq_save(flags); 328 v = p->vm_stat_diff[i]; 329 p->vm_stat_diff[i] = 0; 330 local_irq_restore(flags); 331 atomic_long_add(v, &zone->vm_stat[i]); 332 global_diff[i] += v; 333 #ifdef CONFIG_NUMA 334 /* 3 seconds idle till flush */ 335 p->expire = 3; 336 #endif 337 } 338 cond_resched(); 339 #ifdef CONFIG_NUMA 340 /* 341 * Deal with draining the remote pageset of this 342 * processor 343 * 344 * Check if there are pages remaining in this pageset 345 * if not then there is nothing to expire. 346 */ 347 if (!p->expire || !p->pcp.count) 348 continue; 349 350 /* 351 * We never drain zones local to this processor. 352 */ 353 if (zone_to_nid(zone) == numa_node_id()) { 354 p->expire = 0; 355 continue; 356 } 357 358 p->expire--; 359 if (p->expire) 360 continue; 361 362 if (p->pcp.count) 363 drain_zone_pages(zone, &p->pcp); 364 #endif 365 } 366 367 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 368 if (global_diff[i]) 369 atomic_long_add(global_diff[i], &vm_stat[i]); 370 } 371 372 #endif 373 374 #ifdef CONFIG_NUMA 375 /* 376 * zonelist = the list of zones passed to the allocator 377 * z = the zone from which the allocation occurred. 378 * 379 * Must be called with interrupts disabled. 380 */ 381 void zone_statistics(struct zone *preferred_zone, struct zone *z) 382 { 383 if (z->zone_pgdat == preferred_zone->zone_pgdat) { 384 __inc_zone_state(z, NUMA_HIT); 385 } else { 386 __inc_zone_state(z, NUMA_MISS); 387 __inc_zone_state(preferred_zone, NUMA_FOREIGN); 388 } 389 if (z->node == numa_node_id()) 390 __inc_zone_state(z, NUMA_LOCAL); 391 else 392 __inc_zone_state(z, NUMA_OTHER); 393 } 394 #endif 395 396 #ifdef CONFIG_COMPACTION 397 struct contig_page_info { 398 unsigned long free_pages; 399 unsigned long free_blocks_total; 400 unsigned long free_blocks_suitable; 401 }; 402 403 /* 404 * Calculate the number of free pages in a zone, how many contiguous 405 * pages are free and how many are large enough to satisfy an allocation of 406 * the target size. Note that this function makes no attempt to estimate 407 * how many suitable free blocks there *might* be if MOVABLE pages were 408 * migrated. Calculating that is possible, but expensive and can be 409 * figured out from userspace 410 */ 411 static void fill_contig_page_info(struct zone *zone, 412 unsigned int suitable_order, 413 struct contig_page_info *info) 414 { 415 unsigned int order; 416 417 info->free_pages = 0; 418 info->free_blocks_total = 0; 419 info->free_blocks_suitable = 0; 420 421 for (order = 0; order < MAX_ORDER; order++) { 422 unsigned long blocks; 423 424 /* Count number of free blocks */ 425 blocks = zone->free_area[order].nr_free; 426 info->free_blocks_total += blocks; 427 428 /* Count free base pages */ 429 info->free_pages += blocks << order; 430 431 /* Count the suitable free blocks */ 432 if (order >= suitable_order) 433 info->free_blocks_suitable += blocks << 434 (order - suitable_order); 435 } 436 } 437 438 /* 439 * A fragmentation index only makes sense if an allocation of a requested 440 * size would fail. If that is true, the fragmentation index indicates 441 * whether external fragmentation or a lack of memory was the problem. 442 * The value can be used to determine if page reclaim or compaction 443 * should be used 444 */ 445 static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 446 { 447 unsigned long requested = 1UL << order; 448 449 if (!info->free_blocks_total) 450 return 0; 451 452 /* Fragmentation index only makes sense when a request would fail */ 453 if (info->free_blocks_suitable) 454 return -1000; 455 456 /* 457 * Index is between 0 and 1 so return within 3 decimal places 458 * 459 * 0 => allocation would fail due to lack of memory 460 * 1 => allocation would fail due to fragmentation 461 */ 462 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 463 } 464 465 /* Same as __fragmentation index but allocs contig_page_info on stack */ 466 int fragmentation_index(struct zone *zone, unsigned int order) 467 { 468 struct contig_page_info info; 469 470 fill_contig_page_info(zone, order, &info); 471 return __fragmentation_index(order, &info); 472 } 473 #endif 474 475 #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) 476 #include <linux/proc_fs.h> 477 #include <linux/seq_file.h> 478 479 static char * const migratetype_names[MIGRATE_TYPES] = { 480 "Unmovable", 481 "Reclaimable", 482 "Movable", 483 "Reserve", 484 "Isolate", 485 }; 486 487 static void *frag_start(struct seq_file *m, loff_t *pos) 488 { 489 pg_data_t *pgdat; 490 loff_t node = *pos; 491 for (pgdat = first_online_pgdat(); 492 pgdat && node; 493 pgdat = next_online_pgdat(pgdat)) 494 --node; 495 496 return pgdat; 497 } 498 499 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 500 { 501 pg_data_t *pgdat = (pg_data_t *)arg; 502 503 (*pos)++; 504 return next_online_pgdat(pgdat); 505 } 506 507 static void frag_stop(struct seq_file *m, void *arg) 508 { 509 } 510 511 /* Walk all the zones in a node and print using a callback */ 512 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 513 void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 514 { 515 struct zone *zone; 516 struct zone *node_zones = pgdat->node_zones; 517 unsigned long flags; 518 519 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 520 if (!populated_zone(zone)) 521 continue; 522 523 spin_lock_irqsave(&zone->lock, flags); 524 print(m, pgdat, zone); 525 spin_unlock_irqrestore(&zone->lock, flags); 526 } 527 } 528 #endif 529 530 #ifdef CONFIG_PROC_FS 531 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 532 struct zone *zone) 533 { 534 int order; 535 536 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 537 for (order = 0; order < MAX_ORDER; ++order) 538 seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 539 seq_putc(m, '\n'); 540 } 541 542 /* 543 * This walks the free areas for each zone. 544 */ 545 static int frag_show(struct seq_file *m, void *arg) 546 { 547 pg_data_t *pgdat = (pg_data_t *)arg; 548 walk_zones_in_node(m, pgdat, frag_show_print); 549 return 0; 550 } 551 552 static void pagetypeinfo_showfree_print(struct seq_file *m, 553 pg_data_t *pgdat, struct zone *zone) 554 { 555 int order, mtype; 556 557 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 558 seq_printf(m, "Node %4d, zone %8s, type %12s ", 559 pgdat->node_id, 560 zone->name, 561 migratetype_names[mtype]); 562 for (order = 0; order < MAX_ORDER; ++order) { 563 unsigned long freecount = 0; 564 struct free_area *area; 565 struct list_head *curr; 566 567 area = &(zone->free_area[order]); 568 569 list_for_each(curr, &area->free_list[mtype]) 570 freecount++; 571 seq_printf(m, "%6lu ", freecount); 572 } 573 seq_putc(m, '\n'); 574 } 575 } 576 577 /* Print out the free pages at each order for each migatetype */ 578 static int pagetypeinfo_showfree(struct seq_file *m, void *arg) 579 { 580 int order; 581 pg_data_t *pgdat = (pg_data_t *)arg; 582 583 /* Print header */ 584 seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 585 for (order = 0; order < MAX_ORDER; ++order) 586 seq_printf(m, "%6d ", order); 587 seq_putc(m, '\n'); 588 589 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print); 590 591 return 0; 592 } 593 594 static void pagetypeinfo_showblockcount_print(struct seq_file *m, 595 pg_data_t *pgdat, struct zone *zone) 596 { 597 int mtype; 598 unsigned long pfn; 599 unsigned long start_pfn = zone->zone_start_pfn; 600 unsigned long end_pfn = start_pfn + zone->spanned_pages; 601 unsigned long count[MIGRATE_TYPES] = { 0, }; 602 603 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 604 struct page *page; 605 606 if (!pfn_valid(pfn)) 607 continue; 608 609 page = pfn_to_page(pfn); 610 611 /* Watch for unexpected holes punched in the memmap */ 612 if (!memmap_valid_within(pfn, page, zone)) 613 continue; 614 615 mtype = get_pageblock_migratetype(page); 616 617 if (mtype < MIGRATE_TYPES) 618 count[mtype]++; 619 } 620 621 /* Print counts */ 622 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 623 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 624 seq_printf(m, "%12lu ", count[mtype]); 625 seq_putc(m, '\n'); 626 } 627 628 /* Print out the free pages at each order for each migratetype */ 629 static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 630 { 631 int mtype; 632 pg_data_t *pgdat = (pg_data_t *)arg; 633 634 seq_printf(m, "\n%-23s", "Number of blocks type "); 635 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 636 seq_printf(m, "%12s ", migratetype_names[mtype]); 637 seq_putc(m, '\n'); 638 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print); 639 640 return 0; 641 } 642 643 /* 644 * This prints out statistics in relation to grouping pages by mobility. 645 * It is expensive to collect so do not constantly read the file. 646 */ 647 static int pagetypeinfo_show(struct seq_file *m, void *arg) 648 { 649 pg_data_t *pgdat = (pg_data_t *)arg; 650 651 /* check memoryless node */ 652 if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) 653 return 0; 654 655 seq_printf(m, "Page block order: %d\n", pageblock_order); 656 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 657 seq_putc(m, '\n'); 658 pagetypeinfo_showfree(m, pgdat); 659 pagetypeinfo_showblockcount(m, pgdat); 660 661 return 0; 662 } 663 664 static const struct seq_operations fragmentation_op = { 665 .start = frag_start, 666 .next = frag_next, 667 .stop = frag_stop, 668 .show = frag_show, 669 }; 670 671 static int fragmentation_open(struct inode *inode, struct file *file) 672 { 673 return seq_open(file, &fragmentation_op); 674 } 675 676 static const struct file_operations fragmentation_file_operations = { 677 .open = fragmentation_open, 678 .read = seq_read, 679 .llseek = seq_lseek, 680 .release = seq_release, 681 }; 682 683 static const struct seq_operations pagetypeinfo_op = { 684 .start = frag_start, 685 .next = frag_next, 686 .stop = frag_stop, 687 .show = pagetypeinfo_show, 688 }; 689 690 static int pagetypeinfo_open(struct inode *inode, struct file *file) 691 { 692 return seq_open(file, &pagetypeinfo_op); 693 } 694 695 static const struct file_operations pagetypeinfo_file_ops = { 696 .open = pagetypeinfo_open, 697 .read = seq_read, 698 .llseek = seq_lseek, 699 .release = seq_release, 700 }; 701 702 #ifdef CONFIG_ZONE_DMA 703 #define TEXT_FOR_DMA(xx) xx "_dma", 704 #else 705 #define TEXT_FOR_DMA(xx) 706 #endif 707 708 #ifdef CONFIG_ZONE_DMA32 709 #define TEXT_FOR_DMA32(xx) xx "_dma32", 710 #else 711 #define TEXT_FOR_DMA32(xx) 712 #endif 713 714 #ifdef CONFIG_HIGHMEM 715 #define TEXT_FOR_HIGHMEM(xx) xx "_high", 716 #else 717 #define TEXT_FOR_HIGHMEM(xx) 718 #endif 719 720 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 721 TEXT_FOR_HIGHMEM(xx) xx "_movable", 722 723 static const char * const vmstat_text[] = { 724 /* Zoned VM counters */ 725 "nr_free_pages", 726 "nr_inactive_anon", 727 "nr_active_anon", 728 "nr_inactive_file", 729 "nr_active_file", 730 "nr_unevictable", 731 "nr_mlock", 732 "nr_anon_pages", 733 "nr_mapped", 734 "nr_file_pages", 735 "nr_dirty", 736 "nr_writeback", 737 "nr_slab_reclaimable", 738 "nr_slab_unreclaimable", 739 "nr_page_table_pages", 740 "nr_kernel_stack", 741 "nr_unstable", 742 "nr_bounce", 743 "nr_vmscan_write", 744 "nr_writeback_temp", 745 "nr_isolated_anon", 746 "nr_isolated_file", 747 "nr_shmem", 748 #ifdef CONFIG_NUMA 749 "numa_hit", 750 "numa_miss", 751 "numa_foreign", 752 "numa_interleave", 753 "numa_local", 754 "numa_other", 755 #endif 756 757 #ifdef CONFIG_VM_EVENT_COUNTERS 758 "pgpgin", 759 "pgpgout", 760 "pswpin", 761 "pswpout", 762 763 TEXTS_FOR_ZONES("pgalloc") 764 765 "pgfree", 766 "pgactivate", 767 "pgdeactivate", 768 769 "pgfault", 770 "pgmajfault", 771 772 TEXTS_FOR_ZONES("pgrefill") 773 TEXTS_FOR_ZONES("pgsteal") 774 TEXTS_FOR_ZONES("pgscan_kswapd") 775 TEXTS_FOR_ZONES("pgscan_direct") 776 777 #ifdef CONFIG_NUMA 778 "zone_reclaim_failed", 779 #endif 780 "pginodesteal", 781 "slabs_scanned", 782 "kswapd_steal", 783 "kswapd_inodesteal", 784 "kswapd_low_wmark_hit_quickly", 785 "kswapd_high_wmark_hit_quickly", 786 "kswapd_skip_congestion_wait", 787 "pageoutrun", 788 "allocstall", 789 790 "pgrotated", 791 792 #ifdef CONFIG_COMPACTION 793 "compact_blocks_moved", 794 "compact_pages_moved", 795 "compact_pagemigrate_failed", 796 "compact_stall", 797 "compact_fail", 798 "compact_success", 799 #endif 800 801 #ifdef CONFIG_HUGETLB_PAGE 802 "htlb_buddy_alloc_success", 803 "htlb_buddy_alloc_fail", 804 #endif 805 "unevictable_pgs_culled", 806 "unevictable_pgs_scanned", 807 "unevictable_pgs_rescued", 808 "unevictable_pgs_mlocked", 809 "unevictable_pgs_munlocked", 810 "unevictable_pgs_cleared", 811 "unevictable_pgs_stranded", 812 "unevictable_pgs_mlockfreed", 813 #endif 814 }; 815 816 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 817 struct zone *zone) 818 { 819 int i; 820 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 821 seq_printf(m, 822 "\n pages free %lu" 823 "\n min %lu" 824 "\n low %lu" 825 "\n high %lu" 826 "\n scanned %lu" 827 "\n spanned %lu" 828 "\n present %lu", 829 zone_nr_free_pages(zone), 830 min_wmark_pages(zone), 831 low_wmark_pages(zone), 832 high_wmark_pages(zone), 833 zone->pages_scanned, 834 zone->spanned_pages, 835 zone->present_pages); 836 837 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 838 seq_printf(m, "\n %-12s %lu", vmstat_text[i], 839 zone_page_state(zone, i)); 840 841 seq_printf(m, 842 "\n protection: (%lu", 843 zone->lowmem_reserve[0]); 844 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 845 seq_printf(m, ", %lu", zone->lowmem_reserve[i]); 846 seq_printf(m, 847 ")" 848 "\n pagesets"); 849 for_each_online_cpu(i) { 850 struct per_cpu_pageset *pageset; 851 852 pageset = per_cpu_ptr(zone->pageset, i); 853 seq_printf(m, 854 "\n cpu: %i" 855 "\n count: %i" 856 "\n high: %i" 857 "\n batch: %i", 858 i, 859 pageset->pcp.count, 860 pageset->pcp.high, 861 pageset->pcp.batch); 862 #ifdef CONFIG_SMP 863 seq_printf(m, "\n vm stats threshold: %d", 864 pageset->stat_threshold); 865 #endif 866 } 867 seq_printf(m, 868 "\n all_unreclaimable: %u" 869 "\n start_pfn: %lu" 870 "\n inactive_ratio: %u", 871 zone->all_unreclaimable, 872 zone->zone_start_pfn, 873 zone->inactive_ratio); 874 seq_putc(m, '\n'); 875 } 876 877 /* 878 * Output information about zones in @pgdat. 879 */ 880 static int zoneinfo_show(struct seq_file *m, void *arg) 881 { 882 pg_data_t *pgdat = (pg_data_t *)arg; 883 walk_zones_in_node(m, pgdat, zoneinfo_show_print); 884 return 0; 885 } 886 887 static const struct seq_operations zoneinfo_op = { 888 .start = frag_start, /* iterate over all zones. The same as in 889 * fragmentation. */ 890 .next = frag_next, 891 .stop = frag_stop, 892 .show = zoneinfo_show, 893 }; 894 895 static int zoneinfo_open(struct inode *inode, struct file *file) 896 { 897 return seq_open(file, &zoneinfo_op); 898 } 899 900 static const struct file_operations proc_zoneinfo_file_operations = { 901 .open = zoneinfo_open, 902 .read = seq_read, 903 .llseek = seq_lseek, 904 .release = seq_release, 905 }; 906 907 static void *vmstat_start(struct seq_file *m, loff_t *pos) 908 { 909 unsigned long *v; 910 #ifdef CONFIG_VM_EVENT_COUNTERS 911 unsigned long *e; 912 #endif 913 int i; 914 915 if (*pos >= ARRAY_SIZE(vmstat_text)) 916 return NULL; 917 918 #ifdef CONFIG_VM_EVENT_COUNTERS 919 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) 920 + sizeof(struct vm_event_state), GFP_KERNEL); 921 #else 922 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long), 923 GFP_KERNEL); 924 #endif 925 m->private = v; 926 if (!v) 927 return ERR_PTR(-ENOMEM); 928 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 929 v[i] = global_page_state(i); 930 #ifdef CONFIG_VM_EVENT_COUNTERS 931 e = v + NR_VM_ZONE_STAT_ITEMS; 932 all_vm_events(e); 933 e[PGPGIN] /= 2; /* sectors -> kbytes */ 934 e[PGPGOUT] /= 2; 935 #endif 936 return v + *pos; 937 } 938 939 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 940 { 941 (*pos)++; 942 if (*pos >= ARRAY_SIZE(vmstat_text)) 943 return NULL; 944 return (unsigned long *)m->private + *pos; 945 } 946 947 static int vmstat_show(struct seq_file *m, void *arg) 948 { 949 unsigned long *l = arg; 950 unsigned long off = l - (unsigned long *)m->private; 951 952 seq_printf(m, "%s %lu\n", vmstat_text[off], *l); 953 return 0; 954 } 955 956 static void vmstat_stop(struct seq_file *m, void *arg) 957 { 958 kfree(m->private); 959 m->private = NULL; 960 } 961 962 static const struct seq_operations vmstat_op = { 963 .start = vmstat_start, 964 .next = vmstat_next, 965 .stop = vmstat_stop, 966 .show = vmstat_show, 967 }; 968 969 static int vmstat_open(struct inode *inode, struct file *file) 970 { 971 return seq_open(file, &vmstat_op); 972 } 973 974 static const struct file_operations proc_vmstat_file_operations = { 975 .open = vmstat_open, 976 .read = seq_read, 977 .llseek = seq_lseek, 978 .release = seq_release, 979 }; 980 #endif /* CONFIG_PROC_FS */ 981 982 #ifdef CONFIG_SMP 983 static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 984 int sysctl_stat_interval __read_mostly = HZ; 985 986 static void vmstat_update(struct work_struct *w) 987 { 988 refresh_cpu_vm_stats(smp_processor_id()); 989 schedule_delayed_work(&__get_cpu_var(vmstat_work), 990 round_jiffies_relative(sysctl_stat_interval)); 991 } 992 993 static void __cpuinit start_cpu_timer(int cpu) 994 { 995 struct delayed_work *work = &per_cpu(vmstat_work, cpu); 996 997 INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update); 998 schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); 999 } 1000 1001 /* 1002 * Use the cpu notifier to insure that the thresholds are recalculated 1003 * when necessary. 1004 */ 1005 static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, 1006 unsigned long action, 1007 void *hcpu) 1008 { 1009 long cpu = (long)hcpu; 1010 1011 switch (action) { 1012 case CPU_ONLINE: 1013 case CPU_ONLINE_FROZEN: 1014 refresh_zone_stat_thresholds(); 1015 start_cpu_timer(cpu); 1016 node_set_state(cpu_to_node(cpu), N_CPU); 1017 break; 1018 case CPU_DOWN_PREPARE: 1019 case CPU_DOWN_PREPARE_FROZEN: 1020 cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); 1021 per_cpu(vmstat_work, cpu).work.func = NULL; 1022 break; 1023 case CPU_DOWN_FAILED: 1024 case CPU_DOWN_FAILED_FROZEN: 1025 start_cpu_timer(cpu); 1026 break; 1027 case CPU_DEAD: 1028 case CPU_DEAD_FROZEN: 1029 refresh_zone_stat_thresholds(); 1030 break; 1031 default: 1032 break; 1033 } 1034 return NOTIFY_OK; 1035 } 1036 1037 static struct notifier_block __cpuinitdata vmstat_notifier = 1038 { &vmstat_cpuup_callback, NULL, 0 }; 1039 #endif 1040 1041 static int __init setup_vmstat(void) 1042 { 1043 #ifdef CONFIG_SMP 1044 int cpu; 1045 1046 refresh_zone_stat_thresholds(); 1047 register_cpu_notifier(&vmstat_notifier); 1048 1049 for_each_online_cpu(cpu) 1050 start_cpu_timer(cpu); 1051 #endif 1052 #ifdef CONFIG_PROC_FS 1053 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); 1054 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); 1055 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); 1056 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); 1057 #endif 1058 return 0; 1059 } 1060 module_init(setup_vmstat) 1061 1062 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 1063 #include <linux/debugfs.h> 1064 1065 static struct dentry *extfrag_debug_root; 1066 1067 /* 1068 * Return an index indicating how much of the available free memory is 1069 * unusable for an allocation of the requested size. 1070 */ 1071 static int unusable_free_index(unsigned int order, 1072 struct contig_page_info *info) 1073 { 1074 /* No free memory is interpreted as all free memory is unusable */ 1075 if (info->free_pages == 0) 1076 return 1000; 1077 1078 /* 1079 * Index should be a value between 0 and 1. Return a value to 3 1080 * decimal places. 1081 * 1082 * 0 => no fragmentation 1083 * 1 => high fragmentation 1084 */ 1085 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 1086 1087 } 1088 1089 static void unusable_show_print(struct seq_file *m, 1090 pg_data_t *pgdat, struct zone *zone) 1091 { 1092 unsigned int order; 1093 int index; 1094 struct contig_page_info info; 1095 1096 seq_printf(m, "Node %d, zone %8s ", 1097 pgdat->node_id, 1098 zone->name); 1099 for (order = 0; order < MAX_ORDER; ++order) { 1100 fill_contig_page_info(zone, order, &info); 1101 index = unusable_free_index(order, &info); 1102 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 1103 } 1104 1105 seq_putc(m, '\n'); 1106 } 1107 1108 /* 1109 * Display unusable free space index 1110 * 1111 * The unusable free space index measures how much of the available free 1112 * memory cannot be used to satisfy an allocation of a given size and is a 1113 * value between 0 and 1. The higher the value, the more of free memory is 1114 * unusable and by implication, the worse the external fragmentation is. This 1115 * can be expressed as a percentage by multiplying by 100. 1116 */ 1117 static int unusable_show(struct seq_file *m, void *arg) 1118 { 1119 pg_data_t *pgdat = (pg_data_t *)arg; 1120 1121 /* check memoryless node */ 1122 if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) 1123 return 0; 1124 1125 walk_zones_in_node(m, pgdat, unusable_show_print); 1126 1127 return 0; 1128 } 1129 1130 static const struct seq_operations unusable_op = { 1131 .start = frag_start, 1132 .next = frag_next, 1133 .stop = frag_stop, 1134 .show = unusable_show, 1135 }; 1136 1137 static int unusable_open(struct inode *inode, struct file *file) 1138 { 1139 return seq_open(file, &unusable_op); 1140 } 1141 1142 static const struct file_operations unusable_file_ops = { 1143 .open = unusable_open, 1144 .read = seq_read, 1145 .llseek = seq_lseek, 1146 .release = seq_release, 1147 }; 1148 1149 static void extfrag_show_print(struct seq_file *m, 1150 pg_data_t *pgdat, struct zone *zone) 1151 { 1152 unsigned int order; 1153 int index; 1154 1155 /* Alloc on stack as interrupts are disabled for zone walk */ 1156 struct contig_page_info info; 1157 1158 seq_printf(m, "Node %d, zone %8s ", 1159 pgdat->node_id, 1160 zone->name); 1161 for (order = 0; order < MAX_ORDER; ++order) { 1162 fill_contig_page_info(zone, order, &info); 1163 index = __fragmentation_index(order, &info); 1164 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 1165 } 1166 1167 seq_putc(m, '\n'); 1168 } 1169 1170 /* 1171 * Display fragmentation index for orders that allocations would fail for 1172 */ 1173 static int extfrag_show(struct seq_file *m, void *arg) 1174 { 1175 pg_data_t *pgdat = (pg_data_t *)arg; 1176 1177 walk_zones_in_node(m, pgdat, extfrag_show_print); 1178 1179 return 0; 1180 } 1181 1182 static const struct seq_operations extfrag_op = { 1183 .start = frag_start, 1184 .next = frag_next, 1185 .stop = frag_stop, 1186 .show = extfrag_show, 1187 }; 1188 1189 static int extfrag_open(struct inode *inode, struct file *file) 1190 { 1191 return seq_open(file, &extfrag_op); 1192 } 1193 1194 static const struct file_operations extfrag_file_ops = { 1195 .open = extfrag_open, 1196 .read = seq_read, 1197 .llseek = seq_lseek, 1198 .release = seq_release, 1199 }; 1200 1201 static int __init extfrag_debug_init(void) 1202 { 1203 extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 1204 if (!extfrag_debug_root) 1205 return -ENOMEM; 1206 1207 if (!debugfs_create_file("unusable_index", 0444, 1208 extfrag_debug_root, NULL, &unusable_file_ops)) 1209 return -ENOMEM; 1210 1211 if (!debugfs_create_file("extfrag_index", 0444, 1212 extfrag_debug_root, NULL, &extfrag_file_ops)) 1213 return -ENOMEM; 1214 1215 return 0; 1216 } 1217 1218 module_init(extfrag_debug_init); 1219 #endif 1220