1*748446bbSMel Gorman /* 2*748446bbSMel Gorman * linux/mm/compaction.c 3*748446bbSMel Gorman * 4*748446bbSMel Gorman * Memory compaction for the reduction of external fragmentation. Note that 5*748446bbSMel Gorman * this heavily depends upon page migration to do all the real heavy 6*748446bbSMel Gorman * lifting 7*748446bbSMel Gorman * 8*748446bbSMel Gorman * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie> 9*748446bbSMel Gorman */ 10*748446bbSMel Gorman #include <linux/swap.h> 11*748446bbSMel Gorman #include <linux/migrate.h> 12*748446bbSMel Gorman #include <linux/compaction.h> 13*748446bbSMel Gorman #include <linux/mm_inline.h> 14*748446bbSMel Gorman #include <linux/backing-dev.h> 15*748446bbSMel Gorman #include "internal.h" 16*748446bbSMel Gorman 17*748446bbSMel Gorman /* 18*748446bbSMel Gorman * compact_control is used to track pages being migrated and the free pages 19*748446bbSMel Gorman * they are being migrated to during memory compaction. The free_pfn starts 20*748446bbSMel Gorman * at the end of a zone and migrate_pfn begins at the start. Movable pages 21*748446bbSMel Gorman * are moved to the end of a zone during a compaction run and the run 22*748446bbSMel Gorman * completes when free_pfn <= migrate_pfn 23*748446bbSMel Gorman */ 24*748446bbSMel Gorman struct compact_control { 25*748446bbSMel Gorman struct list_head freepages; /* List of free pages to migrate to */ 26*748446bbSMel Gorman struct list_head migratepages; /* List of pages being migrated */ 27*748446bbSMel Gorman unsigned long nr_freepages; /* Number of isolated free pages */ 28*748446bbSMel Gorman unsigned long nr_migratepages; /* Number of pages to migrate */ 29*748446bbSMel Gorman unsigned long free_pfn; /* isolate_freepages search base */ 30*748446bbSMel Gorman unsigned long migrate_pfn; /* isolate_migratepages search base */ 31*748446bbSMel Gorman 32*748446bbSMel Gorman /* Account for isolated anon and file pages */ 33*748446bbSMel Gorman unsigned long nr_anon; 34*748446bbSMel Gorman unsigned long nr_file; 35*748446bbSMel Gorman 36*748446bbSMel Gorman struct zone *zone; 37*748446bbSMel Gorman }; 38*748446bbSMel Gorman 39*748446bbSMel Gorman static unsigned long release_freepages(struct list_head *freelist) 40*748446bbSMel Gorman { 41*748446bbSMel Gorman struct page *page, *next; 42*748446bbSMel Gorman unsigned long count = 0; 43*748446bbSMel Gorman 44*748446bbSMel Gorman list_for_each_entry_safe(page, next, freelist, lru) { 45*748446bbSMel Gorman list_del(&page->lru); 46*748446bbSMel Gorman __free_page(page); 47*748446bbSMel Gorman count++; 48*748446bbSMel Gorman } 49*748446bbSMel Gorman 50*748446bbSMel Gorman return count; 51*748446bbSMel Gorman } 52*748446bbSMel Gorman 53*748446bbSMel Gorman /* Isolate free pages onto a private freelist. Must hold zone->lock */ 54*748446bbSMel Gorman static unsigned long isolate_freepages_block(struct zone *zone, 55*748446bbSMel Gorman unsigned long blockpfn, 56*748446bbSMel Gorman struct list_head *freelist) 57*748446bbSMel Gorman { 58*748446bbSMel Gorman unsigned long zone_end_pfn, end_pfn; 59*748446bbSMel Gorman int total_isolated = 0; 60*748446bbSMel Gorman struct page *cursor; 61*748446bbSMel Gorman 62*748446bbSMel Gorman /* Get the last PFN we should scan for free pages at */ 63*748446bbSMel Gorman zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; 64*748446bbSMel Gorman end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); 65*748446bbSMel Gorman 66*748446bbSMel Gorman /* Find the first usable PFN in the block to initialse page cursor */ 67*748446bbSMel Gorman for (; blockpfn < end_pfn; blockpfn++) { 68*748446bbSMel Gorman if (pfn_valid_within(blockpfn)) 69*748446bbSMel Gorman break; 70*748446bbSMel Gorman } 71*748446bbSMel Gorman cursor = pfn_to_page(blockpfn); 72*748446bbSMel Gorman 73*748446bbSMel Gorman /* Isolate free pages. This assumes the block is valid */ 74*748446bbSMel Gorman for (; blockpfn < end_pfn; blockpfn++, cursor++) { 75*748446bbSMel Gorman int isolated, i; 76*748446bbSMel Gorman struct page *page = cursor; 77*748446bbSMel Gorman 78*748446bbSMel Gorman if (!pfn_valid_within(blockpfn)) 79*748446bbSMel Gorman continue; 80*748446bbSMel Gorman 81*748446bbSMel Gorman if (!PageBuddy(page)) 82*748446bbSMel Gorman continue; 83*748446bbSMel Gorman 84*748446bbSMel Gorman /* Found a free page, break it into order-0 pages */ 85*748446bbSMel Gorman isolated = split_free_page(page); 86*748446bbSMel Gorman total_isolated += isolated; 87*748446bbSMel Gorman for (i = 0; i < isolated; i++) { 88*748446bbSMel Gorman list_add(&page->lru, freelist); 89*748446bbSMel Gorman page++; 90*748446bbSMel Gorman } 91*748446bbSMel Gorman 92*748446bbSMel Gorman /* If a page was split, advance to the end of it */ 93*748446bbSMel Gorman if (isolated) { 94*748446bbSMel Gorman blockpfn += isolated - 1; 95*748446bbSMel Gorman cursor += isolated - 1; 96*748446bbSMel Gorman } 97*748446bbSMel Gorman } 98*748446bbSMel Gorman 99*748446bbSMel Gorman return total_isolated; 100*748446bbSMel Gorman } 101*748446bbSMel Gorman 102*748446bbSMel Gorman /* Returns true if the page is within a block suitable for migration to */ 103*748446bbSMel Gorman static bool suitable_migration_target(struct page *page) 104*748446bbSMel Gorman { 105*748446bbSMel Gorman 106*748446bbSMel Gorman int migratetype = get_pageblock_migratetype(page); 107*748446bbSMel Gorman 108*748446bbSMel Gorman /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ 109*748446bbSMel Gorman if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) 110*748446bbSMel Gorman return false; 111*748446bbSMel Gorman 112*748446bbSMel Gorman /* If the page is a large free page, then allow migration */ 113*748446bbSMel Gorman if (PageBuddy(page) && page_order(page) >= pageblock_order) 114*748446bbSMel Gorman return true; 115*748446bbSMel Gorman 116*748446bbSMel Gorman /* If the block is MIGRATE_MOVABLE, allow migration */ 117*748446bbSMel Gorman if (migratetype == MIGRATE_MOVABLE) 118*748446bbSMel Gorman return true; 119*748446bbSMel Gorman 120*748446bbSMel Gorman /* Otherwise skip the block */ 121*748446bbSMel Gorman return false; 122*748446bbSMel Gorman } 123*748446bbSMel Gorman 124*748446bbSMel Gorman /* 125*748446bbSMel Gorman * Based on information in the current compact_control, find blocks 126*748446bbSMel Gorman * suitable for isolating free pages from and then isolate them. 127*748446bbSMel Gorman */ 128*748446bbSMel Gorman static void isolate_freepages(struct zone *zone, 129*748446bbSMel Gorman struct compact_control *cc) 130*748446bbSMel Gorman { 131*748446bbSMel Gorman struct page *page; 132*748446bbSMel Gorman unsigned long high_pfn, low_pfn, pfn; 133*748446bbSMel Gorman unsigned long flags; 134*748446bbSMel Gorman int nr_freepages = cc->nr_freepages; 135*748446bbSMel Gorman struct list_head *freelist = &cc->freepages; 136*748446bbSMel Gorman 137*748446bbSMel Gorman pfn = cc->free_pfn; 138*748446bbSMel Gorman low_pfn = cc->migrate_pfn + pageblock_nr_pages; 139*748446bbSMel Gorman high_pfn = low_pfn; 140*748446bbSMel Gorman 141*748446bbSMel Gorman /* 142*748446bbSMel Gorman * Isolate free pages until enough are available to migrate the 143*748446bbSMel Gorman * pages on cc->migratepages. We stop searching if the migrate 144*748446bbSMel Gorman * and free page scanners meet or enough free pages are isolated. 145*748446bbSMel Gorman */ 146*748446bbSMel Gorman spin_lock_irqsave(&zone->lock, flags); 147*748446bbSMel Gorman for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; 148*748446bbSMel Gorman pfn -= pageblock_nr_pages) { 149*748446bbSMel Gorman unsigned long isolated; 150*748446bbSMel Gorman 151*748446bbSMel Gorman if (!pfn_valid(pfn)) 152*748446bbSMel Gorman continue; 153*748446bbSMel Gorman 154*748446bbSMel Gorman /* 155*748446bbSMel Gorman * Check for overlapping nodes/zones. It's possible on some 156*748446bbSMel Gorman * configurations to have a setup like 157*748446bbSMel Gorman * node0 node1 node0 158*748446bbSMel Gorman * i.e. it's possible that all pages within a zones range of 159*748446bbSMel Gorman * pages do not belong to a single zone. 160*748446bbSMel Gorman */ 161*748446bbSMel Gorman page = pfn_to_page(pfn); 162*748446bbSMel Gorman if (page_zone(page) != zone) 163*748446bbSMel Gorman continue; 164*748446bbSMel Gorman 165*748446bbSMel Gorman /* Check the block is suitable for migration */ 166*748446bbSMel Gorman if (!suitable_migration_target(page)) 167*748446bbSMel Gorman continue; 168*748446bbSMel Gorman 169*748446bbSMel Gorman /* Found a block suitable for isolating free pages from */ 170*748446bbSMel Gorman isolated = isolate_freepages_block(zone, pfn, freelist); 171*748446bbSMel Gorman nr_freepages += isolated; 172*748446bbSMel Gorman 173*748446bbSMel Gorman /* 174*748446bbSMel Gorman * Record the highest PFN we isolated pages from. When next 175*748446bbSMel Gorman * looking for free pages, the search will restart here as 176*748446bbSMel Gorman * page migration may have returned some pages to the allocator 177*748446bbSMel Gorman */ 178*748446bbSMel Gorman if (isolated) 179*748446bbSMel Gorman high_pfn = max(high_pfn, pfn); 180*748446bbSMel Gorman } 181*748446bbSMel Gorman spin_unlock_irqrestore(&zone->lock, flags); 182*748446bbSMel Gorman 183*748446bbSMel Gorman /* split_free_page does not map the pages */ 184*748446bbSMel Gorman list_for_each_entry(page, freelist, lru) { 185*748446bbSMel Gorman arch_alloc_page(page, 0); 186*748446bbSMel Gorman kernel_map_pages(page, 1, 1); 187*748446bbSMel Gorman } 188*748446bbSMel Gorman 189*748446bbSMel Gorman cc->free_pfn = high_pfn; 190*748446bbSMel Gorman cc->nr_freepages = nr_freepages; 191*748446bbSMel Gorman } 192*748446bbSMel Gorman 193*748446bbSMel Gorman /* Update the number of anon and file isolated pages in the zone */ 194*748446bbSMel Gorman static void acct_isolated(struct zone *zone, struct compact_control *cc) 195*748446bbSMel Gorman { 196*748446bbSMel Gorman struct page *page; 197*748446bbSMel Gorman unsigned int count[NR_LRU_LISTS] = { 0, }; 198*748446bbSMel Gorman 199*748446bbSMel Gorman list_for_each_entry(page, &cc->migratepages, lru) { 200*748446bbSMel Gorman int lru = page_lru_base_type(page); 201*748446bbSMel Gorman count[lru]++; 202*748446bbSMel Gorman } 203*748446bbSMel Gorman 204*748446bbSMel Gorman cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; 205*748446bbSMel Gorman cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; 206*748446bbSMel Gorman __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon); 207*748446bbSMel Gorman __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file); 208*748446bbSMel Gorman } 209*748446bbSMel Gorman 210*748446bbSMel Gorman /* Similar to reclaim, but different enough that they don't share logic */ 211*748446bbSMel Gorman static bool too_many_isolated(struct zone *zone) 212*748446bbSMel Gorman { 213*748446bbSMel Gorman 214*748446bbSMel Gorman unsigned long inactive, isolated; 215*748446bbSMel Gorman 216*748446bbSMel Gorman inactive = zone_page_state(zone, NR_INACTIVE_FILE) + 217*748446bbSMel Gorman zone_page_state(zone, NR_INACTIVE_ANON); 218*748446bbSMel Gorman isolated = zone_page_state(zone, NR_ISOLATED_FILE) + 219*748446bbSMel Gorman zone_page_state(zone, NR_ISOLATED_ANON); 220*748446bbSMel Gorman 221*748446bbSMel Gorman return isolated > inactive; 222*748446bbSMel Gorman } 223*748446bbSMel Gorman 224*748446bbSMel Gorman /* 225*748446bbSMel Gorman * Isolate all pages that can be migrated from the block pointed to by 226*748446bbSMel Gorman * the migrate scanner within compact_control. 227*748446bbSMel Gorman */ 228*748446bbSMel Gorman static unsigned long isolate_migratepages(struct zone *zone, 229*748446bbSMel Gorman struct compact_control *cc) 230*748446bbSMel Gorman { 231*748446bbSMel Gorman unsigned long low_pfn, end_pfn; 232*748446bbSMel Gorman struct list_head *migratelist = &cc->migratepages; 233*748446bbSMel Gorman 234*748446bbSMel Gorman /* Do not scan outside zone boundaries */ 235*748446bbSMel Gorman low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); 236*748446bbSMel Gorman 237*748446bbSMel Gorman /* Only scan within a pageblock boundary */ 238*748446bbSMel Gorman end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); 239*748446bbSMel Gorman 240*748446bbSMel Gorman /* Do not cross the free scanner or scan within a memory hole */ 241*748446bbSMel Gorman if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { 242*748446bbSMel Gorman cc->migrate_pfn = end_pfn; 243*748446bbSMel Gorman return 0; 244*748446bbSMel Gorman } 245*748446bbSMel Gorman 246*748446bbSMel Gorman /* 247*748446bbSMel Gorman * Ensure that there are not too many pages isolated from the LRU 248*748446bbSMel Gorman * list by either parallel reclaimers or compaction. If there are, 249*748446bbSMel Gorman * delay for some time until fewer pages are isolated 250*748446bbSMel Gorman */ 251*748446bbSMel Gorman while (unlikely(too_many_isolated(zone))) { 252*748446bbSMel Gorman congestion_wait(BLK_RW_ASYNC, HZ/10); 253*748446bbSMel Gorman 254*748446bbSMel Gorman if (fatal_signal_pending(current)) 255*748446bbSMel Gorman return 0; 256*748446bbSMel Gorman } 257*748446bbSMel Gorman 258*748446bbSMel Gorman /* Time to isolate some pages for migration */ 259*748446bbSMel Gorman spin_lock_irq(&zone->lru_lock); 260*748446bbSMel Gorman for (; low_pfn < end_pfn; low_pfn++) { 261*748446bbSMel Gorman struct page *page; 262*748446bbSMel Gorman if (!pfn_valid_within(low_pfn)) 263*748446bbSMel Gorman continue; 264*748446bbSMel Gorman 265*748446bbSMel Gorman /* Get the page and skip if free */ 266*748446bbSMel Gorman page = pfn_to_page(low_pfn); 267*748446bbSMel Gorman if (PageBuddy(page)) 268*748446bbSMel Gorman continue; 269*748446bbSMel Gorman 270*748446bbSMel Gorman /* Try isolate the page */ 271*748446bbSMel Gorman if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) 272*748446bbSMel Gorman continue; 273*748446bbSMel Gorman 274*748446bbSMel Gorman /* Successfully isolated */ 275*748446bbSMel Gorman del_page_from_lru_list(zone, page, page_lru(page)); 276*748446bbSMel Gorman list_add(&page->lru, migratelist); 277*748446bbSMel Gorman mem_cgroup_del_lru(page); 278*748446bbSMel Gorman cc->nr_migratepages++; 279*748446bbSMel Gorman 280*748446bbSMel Gorman /* Avoid isolating too much */ 281*748446bbSMel Gorman if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) 282*748446bbSMel Gorman break; 283*748446bbSMel Gorman } 284*748446bbSMel Gorman 285*748446bbSMel Gorman acct_isolated(zone, cc); 286*748446bbSMel Gorman 287*748446bbSMel Gorman spin_unlock_irq(&zone->lru_lock); 288*748446bbSMel Gorman cc->migrate_pfn = low_pfn; 289*748446bbSMel Gorman 290*748446bbSMel Gorman return cc->nr_migratepages; 291*748446bbSMel Gorman } 292*748446bbSMel Gorman 293*748446bbSMel Gorman /* 294*748446bbSMel Gorman * This is a migrate-callback that "allocates" freepages by taking pages 295*748446bbSMel Gorman * from the isolated freelists in the block we are migrating to. 296*748446bbSMel Gorman */ 297*748446bbSMel Gorman static struct page *compaction_alloc(struct page *migratepage, 298*748446bbSMel Gorman unsigned long data, 299*748446bbSMel Gorman int **result) 300*748446bbSMel Gorman { 301*748446bbSMel Gorman struct compact_control *cc = (struct compact_control *)data; 302*748446bbSMel Gorman struct page *freepage; 303*748446bbSMel Gorman 304*748446bbSMel Gorman /* Isolate free pages if necessary */ 305*748446bbSMel Gorman if (list_empty(&cc->freepages)) { 306*748446bbSMel Gorman isolate_freepages(cc->zone, cc); 307*748446bbSMel Gorman 308*748446bbSMel Gorman if (list_empty(&cc->freepages)) 309*748446bbSMel Gorman return NULL; 310*748446bbSMel Gorman } 311*748446bbSMel Gorman 312*748446bbSMel Gorman freepage = list_entry(cc->freepages.next, struct page, lru); 313*748446bbSMel Gorman list_del(&freepage->lru); 314*748446bbSMel Gorman cc->nr_freepages--; 315*748446bbSMel Gorman 316*748446bbSMel Gorman return freepage; 317*748446bbSMel Gorman } 318*748446bbSMel Gorman 319*748446bbSMel Gorman /* 320*748446bbSMel Gorman * We cannot control nr_migratepages and nr_freepages fully when migration is 321*748446bbSMel Gorman * running as migrate_pages() has no knowledge of compact_control. When 322*748446bbSMel Gorman * migration is complete, we count the number of pages on the lists by hand. 323*748446bbSMel Gorman */ 324*748446bbSMel Gorman static void update_nr_listpages(struct compact_control *cc) 325*748446bbSMel Gorman { 326*748446bbSMel Gorman int nr_migratepages = 0; 327*748446bbSMel Gorman int nr_freepages = 0; 328*748446bbSMel Gorman struct page *page; 329*748446bbSMel Gorman 330*748446bbSMel Gorman list_for_each_entry(page, &cc->migratepages, lru) 331*748446bbSMel Gorman nr_migratepages++; 332*748446bbSMel Gorman list_for_each_entry(page, &cc->freepages, lru) 333*748446bbSMel Gorman nr_freepages++; 334*748446bbSMel Gorman 335*748446bbSMel Gorman cc->nr_migratepages = nr_migratepages; 336*748446bbSMel Gorman cc->nr_freepages = nr_freepages; 337*748446bbSMel Gorman } 338*748446bbSMel Gorman 339*748446bbSMel Gorman static int compact_finished(struct zone *zone, 340*748446bbSMel Gorman struct compact_control *cc) 341*748446bbSMel Gorman { 342*748446bbSMel Gorman if (fatal_signal_pending(current)) 343*748446bbSMel Gorman return COMPACT_PARTIAL; 344*748446bbSMel Gorman 345*748446bbSMel Gorman /* Compaction run completes if the migrate and free scanner meet */ 346*748446bbSMel Gorman if (cc->free_pfn <= cc->migrate_pfn) 347*748446bbSMel Gorman return COMPACT_COMPLETE; 348*748446bbSMel Gorman 349*748446bbSMel Gorman return COMPACT_CONTINUE; 350*748446bbSMel Gorman } 351*748446bbSMel Gorman 352*748446bbSMel Gorman static int compact_zone(struct zone *zone, struct compact_control *cc) 353*748446bbSMel Gorman { 354*748446bbSMel Gorman int ret; 355*748446bbSMel Gorman 356*748446bbSMel Gorman /* Setup to move all movable pages to the end of the zone */ 357*748446bbSMel Gorman cc->migrate_pfn = zone->zone_start_pfn; 358*748446bbSMel Gorman cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; 359*748446bbSMel Gorman cc->free_pfn &= ~(pageblock_nr_pages-1); 360*748446bbSMel Gorman 361*748446bbSMel Gorman migrate_prep_local(); 362*748446bbSMel Gorman 363*748446bbSMel Gorman while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) { 364*748446bbSMel Gorman unsigned long nr_migrate, nr_remaining; 365*748446bbSMel Gorman 366*748446bbSMel Gorman if (!isolate_migratepages(zone, cc)) 367*748446bbSMel Gorman continue; 368*748446bbSMel Gorman 369*748446bbSMel Gorman nr_migrate = cc->nr_migratepages; 370*748446bbSMel Gorman migrate_pages(&cc->migratepages, compaction_alloc, 371*748446bbSMel Gorman (unsigned long)cc, 0); 372*748446bbSMel Gorman update_nr_listpages(cc); 373*748446bbSMel Gorman nr_remaining = cc->nr_migratepages; 374*748446bbSMel Gorman 375*748446bbSMel Gorman count_vm_event(COMPACTBLOCKS); 376*748446bbSMel Gorman count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining); 377*748446bbSMel Gorman if (nr_remaining) 378*748446bbSMel Gorman count_vm_events(COMPACTPAGEFAILED, nr_remaining); 379*748446bbSMel Gorman 380*748446bbSMel Gorman /* Release LRU pages not migrated */ 381*748446bbSMel Gorman if (!list_empty(&cc->migratepages)) { 382*748446bbSMel Gorman putback_lru_pages(&cc->migratepages); 383*748446bbSMel Gorman cc->nr_migratepages = 0; 384*748446bbSMel Gorman } 385*748446bbSMel Gorman 386*748446bbSMel Gorman } 387*748446bbSMel Gorman 388*748446bbSMel Gorman /* Release free pages and check accounting */ 389*748446bbSMel Gorman cc->nr_freepages -= release_freepages(&cc->freepages); 390*748446bbSMel Gorman VM_BUG_ON(cc->nr_freepages != 0); 391*748446bbSMel Gorman 392*748446bbSMel Gorman return ret; 393*748446bbSMel Gorman } 394