1*b20a3503SChristoph Lameter /* 2*b20a3503SChristoph Lameter * Memory Migration functionality - linux/mm/migration.c 3*b20a3503SChristoph Lameter * 4*b20a3503SChristoph Lameter * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter 5*b20a3503SChristoph Lameter * 6*b20a3503SChristoph Lameter * Page migration was first developed in the context of the memory hotplug 7*b20a3503SChristoph Lameter * project. The main authors of the migration code are: 8*b20a3503SChristoph Lameter * 9*b20a3503SChristoph Lameter * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> 10*b20a3503SChristoph Lameter * Hirokazu Takahashi <taka@valinux.co.jp> 11*b20a3503SChristoph Lameter * Dave Hansen <haveblue@us.ibm.com> 12*b20a3503SChristoph Lameter * Christoph Lameter <clameter@sgi.com> 13*b20a3503SChristoph Lameter */ 14*b20a3503SChristoph Lameter 15*b20a3503SChristoph Lameter #include <linux/migrate.h> 16*b20a3503SChristoph Lameter #include <linux/module.h> 17*b20a3503SChristoph Lameter #include <linux/swap.h> 18*b20a3503SChristoph Lameter #include <linux/pagemap.h> 19*b20a3503SChristoph Lameter #include <linux/buffer_head.h> /* for try_to_release_page(), 20*b20a3503SChristoph Lameter buffer_heads_over_limit */ 21*b20a3503SChristoph Lameter #include <linux/mm_inline.h> 22*b20a3503SChristoph Lameter #include <linux/pagevec.h> 23*b20a3503SChristoph Lameter #include <linux/rmap.h> 24*b20a3503SChristoph Lameter #include <linux/topology.h> 25*b20a3503SChristoph Lameter #include <linux/cpu.h> 26*b20a3503SChristoph Lameter #include <linux/cpuset.h> 27*b20a3503SChristoph Lameter #include <linux/swapops.h> 28*b20a3503SChristoph Lameter 29*b20a3503SChristoph Lameter #include "internal.h" 30*b20a3503SChristoph Lameter 31*b20a3503SChristoph Lameter #include "internal.h" 32*b20a3503SChristoph Lameter 33*b20a3503SChristoph Lameter /* The maximum number of pages to take off the LRU for migration */ 34*b20a3503SChristoph Lameter #define MIGRATE_CHUNK_SIZE 256 35*b20a3503SChristoph Lameter 36*b20a3503SChristoph Lameter #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 37*b20a3503SChristoph Lameter 38*b20a3503SChristoph Lameter /* 39*b20a3503SChristoph Lameter * Isolate one page from the LRU lists. If successful put it onto 40*b20a3503SChristoph Lameter * the indicated list with elevated page count. 41*b20a3503SChristoph Lameter * 42*b20a3503SChristoph Lameter * Result: 43*b20a3503SChristoph Lameter * -EBUSY: page not on LRU list 44*b20a3503SChristoph Lameter * 0: page removed from LRU list and added to the specified list. 45*b20a3503SChristoph Lameter */ 46*b20a3503SChristoph Lameter int isolate_lru_page(struct page *page, struct list_head *pagelist) 47*b20a3503SChristoph Lameter { 48*b20a3503SChristoph Lameter int ret = -EBUSY; 49*b20a3503SChristoph Lameter 50*b20a3503SChristoph Lameter if (PageLRU(page)) { 51*b20a3503SChristoph Lameter struct zone *zone = page_zone(page); 52*b20a3503SChristoph Lameter 53*b20a3503SChristoph Lameter spin_lock_irq(&zone->lru_lock); 54*b20a3503SChristoph Lameter if (PageLRU(page)) { 55*b20a3503SChristoph Lameter ret = 0; 56*b20a3503SChristoph Lameter get_page(page); 57*b20a3503SChristoph Lameter ClearPageLRU(page); 58*b20a3503SChristoph Lameter if (PageActive(page)) 59*b20a3503SChristoph Lameter del_page_from_active_list(zone, page); 60*b20a3503SChristoph Lameter else 61*b20a3503SChristoph Lameter del_page_from_inactive_list(zone, page); 62*b20a3503SChristoph Lameter list_add_tail(&page->lru, pagelist); 63*b20a3503SChristoph Lameter } 64*b20a3503SChristoph Lameter spin_unlock_irq(&zone->lru_lock); 65*b20a3503SChristoph Lameter } 66*b20a3503SChristoph Lameter return ret; 67*b20a3503SChristoph Lameter } 68*b20a3503SChristoph Lameter 69*b20a3503SChristoph Lameter /* 70*b20a3503SChristoph Lameter * migrate_prep() needs to be called after we have compiled the list of pages 71*b20a3503SChristoph Lameter * to be migrated using isolate_lru_page() but before we begin a series of calls 72*b20a3503SChristoph Lameter * to migrate_pages(). 73*b20a3503SChristoph Lameter */ 74*b20a3503SChristoph Lameter int migrate_prep(void) 75*b20a3503SChristoph Lameter { 76*b20a3503SChristoph Lameter /* Must have swap device for migration */ 77*b20a3503SChristoph Lameter if (nr_swap_pages <= 0) 78*b20a3503SChristoph Lameter return -ENODEV; 79*b20a3503SChristoph Lameter 80*b20a3503SChristoph Lameter /* 81*b20a3503SChristoph Lameter * Clear the LRU lists so pages can be isolated. 82*b20a3503SChristoph Lameter * Note that pages may be moved off the LRU after we have 83*b20a3503SChristoph Lameter * drained them. Those pages will fail to migrate like other 84*b20a3503SChristoph Lameter * pages that may be busy. 85*b20a3503SChristoph Lameter */ 86*b20a3503SChristoph Lameter lru_add_drain_all(); 87*b20a3503SChristoph Lameter 88*b20a3503SChristoph Lameter return 0; 89*b20a3503SChristoph Lameter } 90*b20a3503SChristoph Lameter 91*b20a3503SChristoph Lameter static inline void move_to_lru(struct page *page) 92*b20a3503SChristoph Lameter { 93*b20a3503SChristoph Lameter list_del(&page->lru); 94*b20a3503SChristoph Lameter if (PageActive(page)) { 95*b20a3503SChristoph Lameter /* 96*b20a3503SChristoph Lameter * lru_cache_add_active checks that 97*b20a3503SChristoph Lameter * the PG_active bit is off. 98*b20a3503SChristoph Lameter */ 99*b20a3503SChristoph Lameter ClearPageActive(page); 100*b20a3503SChristoph Lameter lru_cache_add_active(page); 101*b20a3503SChristoph Lameter } else { 102*b20a3503SChristoph Lameter lru_cache_add(page); 103*b20a3503SChristoph Lameter } 104*b20a3503SChristoph Lameter put_page(page); 105*b20a3503SChristoph Lameter } 106*b20a3503SChristoph Lameter 107*b20a3503SChristoph Lameter /* 108*b20a3503SChristoph Lameter * Add isolated pages on the list back to the LRU. 109*b20a3503SChristoph Lameter * 110*b20a3503SChristoph Lameter * returns the number of pages put back. 111*b20a3503SChristoph Lameter */ 112*b20a3503SChristoph Lameter int putback_lru_pages(struct list_head *l) 113*b20a3503SChristoph Lameter { 114*b20a3503SChristoph Lameter struct page *page; 115*b20a3503SChristoph Lameter struct page *page2; 116*b20a3503SChristoph Lameter int count = 0; 117*b20a3503SChristoph Lameter 118*b20a3503SChristoph Lameter list_for_each_entry_safe(page, page2, l, lru) { 119*b20a3503SChristoph Lameter move_to_lru(page); 120*b20a3503SChristoph Lameter count++; 121*b20a3503SChristoph Lameter } 122*b20a3503SChristoph Lameter return count; 123*b20a3503SChristoph Lameter } 124*b20a3503SChristoph Lameter 125*b20a3503SChristoph Lameter /* 126*b20a3503SChristoph Lameter * Non migratable page 127*b20a3503SChristoph Lameter */ 128*b20a3503SChristoph Lameter int fail_migrate_page(struct page *newpage, struct page *page) 129*b20a3503SChristoph Lameter { 130*b20a3503SChristoph Lameter return -EIO; 131*b20a3503SChristoph Lameter } 132*b20a3503SChristoph Lameter EXPORT_SYMBOL(fail_migrate_page); 133*b20a3503SChristoph Lameter 134*b20a3503SChristoph Lameter /* 135*b20a3503SChristoph Lameter * swapout a single page 136*b20a3503SChristoph Lameter * page is locked upon entry, unlocked on exit 137*b20a3503SChristoph Lameter */ 138*b20a3503SChristoph Lameter static int swap_page(struct page *page) 139*b20a3503SChristoph Lameter { 140*b20a3503SChristoph Lameter struct address_space *mapping = page_mapping(page); 141*b20a3503SChristoph Lameter 142*b20a3503SChristoph Lameter if (page_mapped(page) && mapping) 143*b20a3503SChristoph Lameter if (try_to_unmap(page, 1) != SWAP_SUCCESS) 144*b20a3503SChristoph Lameter goto unlock_retry; 145*b20a3503SChristoph Lameter 146*b20a3503SChristoph Lameter if (PageDirty(page)) { 147*b20a3503SChristoph Lameter /* Page is dirty, try to write it out here */ 148*b20a3503SChristoph Lameter switch(pageout(page, mapping)) { 149*b20a3503SChristoph Lameter case PAGE_KEEP: 150*b20a3503SChristoph Lameter case PAGE_ACTIVATE: 151*b20a3503SChristoph Lameter goto unlock_retry; 152*b20a3503SChristoph Lameter 153*b20a3503SChristoph Lameter case PAGE_SUCCESS: 154*b20a3503SChristoph Lameter goto retry; 155*b20a3503SChristoph Lameter 156*b20a3503SChristoph Lameter case PAGE_CLEAN: 157*b20a3503SChristoph Lameter ; /* try to free the page below */ 158*b20a3503SChristoph Lameter } 159*b20a3503SChristoph Lameter } 160*b20a3503SChristoph Lameter 161*b20a3503SChristoph Lameter if (PagePrivate(page)) { 162*b20a3503SChristoph Lameter if (!try_to_release_page(page, GFP_KERNEL) || 163*b20a3503SChristoph Lameter (!mapping && page_count(page) == 1)) 164*b20a3503SChristoph Lameter goto unlock_retry; 165*b20a3503SChristoph Lameter } 166*b20a3503SChristoph Lameter 167*b20a3503SChristoph Lameter if (remove_mapping(mapping, page)) { 168*b20a3503SChristoph Lameter /* Success */ 169*b20a3503SChristoph Lameter unlock_page(page); 170*b20a3503SChristoph Lameter return 0; 171*b20a3503SChristoph Lameter } 172*b20a3503SChristoph Lameter 173*b20a3503SChristoph Lameter unlock_retry: 174*b20a3503SChristoph Lameter unlock_page(page); 175*b20a3503SChristoph Lameter 176*b20a3503SChristoph Lameter retry: 177*b20a3503SChristoph Lameter return -EAGAIN; 178*b20a3503SChristoph Lameter } 179*b20a3503SChristoph Lameter EXPORT_SYMBOL(swap_page); 180*b20a3503SChristoph Lameter 181*b20a3503SChristoph Lameter /* 182*b20a3503SChristoph Lameter * Remove references for a page and establish the new page with the correct 183*b20a3503SChristoph Lameter * basic settings to be able to stop accesses to the page. 184*b20a3503SChristoph Lameter */ 185*b20a3503SChristoph Lameter int migrate_page_remove_references(struct page *newpage, 186*b20a3503SChristoph Lameter struct page *page, int nr_refs) 187*b20a3503SChristoph Lameter { 188*b20a3503SChristoph Lameter struct address_space *mapping = page_mapping(page); 189*b20a3503SChristoph Lameter struct page **radix_pointer; 190*b20a3503SChristoph Lameter 191*b20a3503SChristoph Lameter /* 192*b20a3503SChristoph Lameter * Avoid doing any of the following work if the page count 193*b20a3503SChristoph Lameter * indicates that the page is in use or truncate has removed 194*b20a3503SChristoph Lameter * the page. 195*b20a3503SChristoph Lameter */ 196*b20a3503SChristoph Lameter if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) 197*b20a3503SChristoph Lameter return -EAGAIN; 198*b20a3503SChristoph Lameter 199*b20a3503SChristoph Lameter /* 200*b20a3503SChristoph Lameter * Establish swap ptes for anonymous pages or destroy pte 201*b20a3503SChristoph Lameter * maps for files. 202*b20a3503SChristoph Lameter * 203*b20a3503SChristoph Lameter * In order to reestablish file backed mappings the fault handlers 204*b20a3503SChristoph Lameter * will take the radix tree_lock which may then be used to stop 205*b20a3503SChristoph Lameter * processses from accessing this page until the new page is ready. 206*b20a3503SChristoph Lameter * 207*b20a3503SChristoph Lameter * A process accessing via a swap pte (an anonymous page) will take a 208*b20a3503SChristoph Lameter * page_lock on the old page which will block the process until the 209*b20a3503SChristoph Lameter * migration attempt is complete. At that time the PageSwapCache bit 210*b20a3503SChristoph Lameter * will be examined. If the page was migrated then the PageSwapCache 211*b20a3503SChristoph Lameter * bit will be clear and the operation to retrieve the page will be 212*b20a3503SChristoph Lameter * retried which will find the new page in the radix tree. Then a new 213*b20a3503SChristoph Lameter * direct mapping may be generated based on the radix tree contents. 214*b20a3503SChristoph Lameter * 215*b20a3503SChristoph Lameter * If the page was not migrated then the PageSwapCache bit 216*b20a3503SChristoph Lameter * is still set and the operation may continue. 217*b20a3503SChristoph Lameter */ 218*b20a3503SChristoph Lameter if (try_to_unmap(page, 1) == SWAP_FAIL) 219*b20a3503SChristoph Lameter /* A vma has VM_LOCKED set -> permanent failure */ 220*b20a3503SChristoph Lameter return -EPERM; 221*b20a3503SChristoph Lameter 222*b20a3503SChristoph Lameter /* 223*b20a3503SChristoph Lameter * Give up if we were unable to remove all mappings. 224*b20a3503SChristoph Lameter */ 225*b20a3503SChristoph Lameter if (page_mapcount(page)) 226*b20a3503SChristoph Lameter return -EAGAIN; 227*b20a3503SChristoph Lameter 228*b20a3503SChristoph Lameter write_lock_irq(&mapping->tree_lock); 229*b20a3503SChristoph Lameter 230*b20a3503SChristoph Lameter radix_pointer = (struct page **)radix_tree_lookup_slot( 231*b20a3503SChristoph Lameter &mapping->page_tree, 232*b20a3503SChristoph Lameter page_index(page)); 233*b20a3503SChristoph Lameter 234*b20a3503SChristoph Lameter if (!page_mapping(page) || page_count(page) != nr_refs || 235*b20a3503SChristoph Lameter *radix_pointer != page) { 236*b20a3503SChristoph Lameter write_unlock_irq(&mapping->tree_lock); 237*b20a3503SChristoph Lameter return 1; 238*b20a3503SChristoph Lameter } 239*b20a3503SChristoph Lameter 240*b20a3503SChristoph Lameter /* 241*b20a3503SChristoph Lameter * Now we know that no one else is looking at the page. 242*b20a3503SChristoph Lameter * 243*b20a3503SChristoph Lameter * Certain minimal information about a page must be available 244*b20a3503SChristoph Lameter * in order for other subsystems to properly handle the page if they 245*b20a3503SChristoph Lameter * find it through the radix tree update before we are finished 246*b20a3503SChristoph Lameter * copying the page. 247*b20a3503SChristoph Lameter */ 248*b20a3503SChristoph Lameter get_page(newpage); 249*b20a3503SChristoph Lameter newpage->index = page->index; 250*b20a3503SChristoph Lameter newpage->mapping = page->mapping; 251*b20a3503SChristoph Lameter if (PageSwapCache(page)) { 252*b20a3503SChristoph Lameter SetPageSwapCache(newpage); 253*b20a3503SChristoph Lameter set_page_private(newpage, page_private(page)); 254*b20a3503SChristoph Lameter } 255*b20a3503SChristoph Lameter 256*b20a3503SChristoph Lameter *radix_pointer = newpage; 257*b20a3503SChristoph Lameter __put_page(page); 258*b20a3503SChristoph Lameter write_unlock_irq(&mapping->tree_lock); 259*b20a3503SChristoph Lameter 260*b20a3503SChristoph Lameter return 0; 261*b20a3503SChristoph Lameter } 262*b20a3503SChristoph Lameter EXPORT_SYMBOL(migrate_page_remove_references); 263*b20a3503SChristoph Lameter 264*b20a3503SChristoph Lameter /* 265*b20a3503SChristoph Lameter * Copy the page to its new location 266*b20a3503SChristoph Lameter */ 267*b20a3503SChristoph Lameter void migrate_page_copy(struct page *newpage, struct page *page) 268*b20a3503SChristoph Lameter { 269*b20a3503SChristoph Lameter copy_highpage(newpage, page); 270*b20a3503SChristoph Lameter 271*b20a3503SChristoph Lameter if (PageError(page)) 272*b20a3503SChristoph Lameter SetPageError(newpage); 273*b20a3503SChristoph Lameter if (PageReferenced(page)) 274*b20a3503SChristoph Lameter SetPageReferenced(newpage); 275*b20a3503SChristoph Lameter if (PageUptodate(page)) 276*b20a3503SChristoph Lameter SetPageUptodate(newpage); 277*b20a3503SChristoph Lameter if (PageActive(page)) 278*b20a3503SChristoph Lameter SetPageActive(newpage); 279*b20a3503SChristoph Lameter if (PageChecked(page)) 280*b20a3503SChristoph Lameter SetPageChecked(newpage); 281*b20a3503SChristoph Lameter if (PageMappedToDisk(page)) 282*b20a3503SChristoph Lameter SetPageMappedToDisk(newpage); 283*b20a3503SChristoph Lameter 284*b20a3503SChristoph Lameter if (PageDirty(page)) { 285*b20a3503SChristoph Lameter clear_page_dirty_for_io(page); 286*b20a3503SChristoph Lameter set_page_dirty(newpage); 287*b20a3503SChristoph Lameter } 288*b20a3503SChristoph Lameter 289*b20a3503SChristoph Lameter ClearPageSwapCache(page); 290*b20a3503SChristoph Lameter ClearPageActive(page); 291*b20a3503SChristoph Lameter ClearPagePrivate(page); 292*b20a3503SChristoph Lameter set_page_private(page, 0); 293*b20a3503SChristoph Lameter page->mapping = NULL; 294*b20a3503SChristoph Lameter 295*b20a3503SChristoph Lameter /* 296*b20a3503SChristoph Lameter * If any waiters have accumulated on the new page then 297*b20a3503SChristoph Lameter * wake them up. 298*b20a3503SChristoph Lameter */ 299*b20a3503SChristoph Lameter if (PageWriteback(newpage)) 300*b20a3503SChristoph Lameter end_page_writeback(newpage); 301*b20a3503SChristoph Lameter } 302*b20a3503SChristoph Lameter EXPORT_SYMBOL(migrate_page_copy); 303*b20a3503SChristoph Lameter 304*b20a3503SChristoph Lameter /* 305*b20a3503SChristoph Lameter * Common logic to directly migrate a single page suitable for 306*b20a3503SChristoph Lameter * pages that do not use PagePrivate. 307*b20a3503SChristoph Lameter * 308*b20a3503SChristoph Lameter * Pages are locked upon entry and exit. 309*b20a3503SChristoph Lameter */ 310*b20a3503SChristoph Lameter int migrate_page(struct page *newpage, struct page *page) 311*b20a3503SChristoph Lameter { 312*b20a3503SChristoph Lameter int rc; 313*b20a3503SChristoph Lameter 314*b20a3503SChristoph Lameter BUG_ON(PageWriteback(page)); /* Writeback must be complete */ 315*b20a3503SChristoph Lameter 316*b20a3503SChristoph Lameter rc = migrate_page_remove_references(newpage, page, 2); 317*b20a3503SChristoph Lameter 318*b20a3503SChristoph Lameter if (rc) 319*b20a3503SChristoph Lameter return rc; 320*b20a3503SChristoph Lameter 321*b20a3503SChristoph Lameter migrate_page_copy(newpage, page); 322*b20a3503SChristoph Lameter 323*b20a3503SChristoph Lameter /* 324*b20a3503SChristoph Lameter * Remove auxiliary swap entries and replace 325*b20a3503SChristoph Lameter * them with real ptes. 326*b20a3503SChristoph Lameter * 327*b20a3503SChristoph Lameter * Note that a real pte entry will allow processes that are not 328*b20a3503SChristoph Lameter * waiting on the page lock to use the new page via the page tables 329*b20a3503SChristoph Lameter * before the new page is unlocked. 330*b20a3503SChristoph Lameter */ 331*b20a3503SChristoph Lameter remove_from_swap(newpage); 332*b20a3503SChristoph Lameter return 0; 333*b20a3503SChristoph Lameter } 334*b20a3503SChristoph Lameter EXPORT_SYMBOL(migrate_page); 335*b20a3503SChristoph Lameter 336*b20a3503SChristoph Lameter /* 337*b20a3503SChristoph Lameter * migrate_pages 338*b20a3503SChristoph Lameter * 339*b20a3503SChristoph Lameter * Two lists are passed to this function. The first list 340*b20a3503SChristoph Lameter * contains the pages isolated from the LRU to be migrated. 341*b20a3503SChristoph Lameter * The second list contains new pages that the pages isolated 342*b20a3503SChristoph Lameter * can be moved to. If the second list is NULL then all 343*b20a3503SChristoph Lameter * pages are swapped out. 344*b20a3503SChristoph Lameter * 345*b20a3503SChristoph Lameter * The function returns after 10 attempts or if no pages 346*b20a3503SChristoph Lameter * are movable anymore because to has become empty 347*b20a3503SChristoph Lameter * or no retryable pages exist anymore. 348*b20a3503SChristoph Lameter * 349*b20a3503SChristoph Lameter * Return: Number of pages not migrated when "to" ran empty. 350*b20a3503SChristoph Lameter */ 351*b20a3503SChristoph Lameter int migrate_pages(struct list_head *from, struct list_head *to, 352*b20a3503SChristoph Lameter struct list_head *moved, struct list_head *failed) 353*b20a3503SChristoph Lameter { 354*b20a3503SChristoph Lameter int retry; 355*b20a3503SChristoph Lameter int nr_failed = 0; 356*b20a3503SChristoph Lameter int pass = 0; 357*b20a3503SChristoph Lameter struct page *page; 358*b20a3503SChristoph Lameter struct page *page2; 359*b20a3503SChristoph Lameter int swapwrite = current->flags & PF_SWAPWRITE; 360*b20a3503SChristoph Lameter int rc; 361*b20a3503SChristoph Lameter 362*b20a3503SChristoph Lameter if (!swapwrite) 363*b20a3503SChristoph Lameter current->flags |= PF_SWAPWRITE; 364*b20a3503SChristoph Lameter 365*b20a3503SChristoph Lameter redo: 366*b20a3503SChristoph Lameter retry = 0; 367*b20a3503SChristoph Lameter 368*b20a3503SChristoph Lameter list_for_each_entry_safe(page, page2, from, lru) { 369*b20a3503SChristoph Lameter struct page *newpage = NULL; 370*b20a3503SChristoph Lameter struct address_space *mapping; 371*b20a3503SChristoph Lameter 372*b20a3503SChristoph Lameter cond_resched(); 373*b20a3503SChristoph Lameter 374*b20a3503SChristoph Lameter rc = 0; 375*b20a3503SChristoph Lameter if (page_count(page) == 1) 376*b20a3503SChristoph Lameter /* page was freed from under us. So we are done. */ 377*b20a3503SChristoph Lameter goto next; 378*b20a3503SChristoph Lameter 379*b20a3503SChristoph Lameter if (to && list_empty(to)) 380*b20a3503SChristoph Lameter break; 381*b20a3503SChristoph Lameter 382*b20a3503SChristoph Lameter /* 383*b20a3503SChristoph Lameter * Skip locked pages during the first two passes to give the 384*b20a3503SChristoph Lameter * functions holding the lock time to release the page. Later we 385*b20a3503SChristoph Lameter * use lock_page() to have a higher chance of acquiring the 386*b20a3503SChristoph Lameter * lock. 387*b20a3503SChristoph Lameter */ 388*b20a3503SChristoph Lameter rc = -EAGAIN; 389*b20a3503SChristoph Lameter if (pass > 2) 390*b20a3503SChristoph Lameter lock_page(page); 391*b20a3503SChristoph Lameter else 392*b20a3503SChristoph Lameter if (TestSetPageLocked(page)) 393*b20a3503SChristoph Lameter goto next; 394*b20a3503SChristoph Lameter 395*b20a3503SChristoph Lameter /* 396*b20a3503SChristoph Lameter * Only wait on writeback if we have already done a pass where 397*b20a3503SChristoph Lameter * we we may have triggered writeouts for lots of pages. 398*b20a3503SChristoph Lameter */ 399*b20a3503SChristoph Lameter if (pass > 0) { 400*b20a3503SChristoph Lameter wait_on_page_writeback(page); 401*b20a3503SChristoph Lameter } else { 402*b20a3503SChristoph Lameter if (PageWriteback(page)) 403*b20a3503SChristoph Lameter goto unlock_page; 404*b20a3503SChristoph Lameter } 405*b20a3503SChristoph Lameter 406*b20a3503SChristoph Lameter /* 407*b20a3503SChristoph Lameter * Anonymous pages must have swap cache references otherwise 408*b20a3503SChristoph Lameter * the information contained in the page maps cannot be 409*b20a3503SChristoph Lameter * preserved. 410*b20a3503SChristoph Lameter */ 411*b20a3503SChristoph Lameter if (PageAnon(page) && !PageSwapCache(page)) { 412*b20a3503SChristoph Lameter if (!add_to_swap(page, GFP_KERNEL)) { 413*b20a3503SChristoph Lameter rc = -ENOMEM; 414*b20a3503SChristoph Lameter goto unlock_page; 415*b20a3503SChristoph Lameter } 416*b20a3503SChristoph Lameter } 417*b20a3503SChristoph Lameter 418*b20a3503SChristoph Lameter if (!to) { 419*b20a3503SChristoph Lameter rc = swap_page(page); 420*b20a3503SChristoph Lameter goto next; 421*b20a3503SChristoph Lameter } 422*b20a3503SChristoph Lameter 423*b20a3503SChristoph Lameter newpage = lru_to_page(to); 424*b20a3503SChristoph Lameter lock_page(newpage); 425*b20a3503SChristoph Lameter 426*b20a3503SChristoph Lameter /* 427*b20a3503SChristoph Lameter * Pages are properly locked and writeback is complete. 428*b20a3503SChristoph Lameter * Try to migrate the page. 429*b20a3503SChristoph Lameter */ 430*b20a3503SChristoph Lameter mapping = page_mapping(page); 431*b20a3503SChristoph Lameter if (!mapping) 432*b20a3503SChristoph Lameter goto unlock_both; 433*b20a3503SChristoph Lameter 434*b20a3503SChristoph Lameter if (mapping->a_ops->migratepage) { 435*b20a3503SChristoph Lameter /* 436*b20a3503SChristoph Lameter * Most pages have a mapping and most filesystems 437*b20a3503SChristoph Lameter * should provide a migration function. Anonymous 438*b20a3503SChristoph Lameter * pages are part of swap space which also has its 439*b20a3503SChristoph Lameter * own migration function. This is the most common 440*b20a3503SChristoph Lameter * path for page migration. 441*b20a3503SChristoph Lameter */ 442*b20a3503SChristoph Lameter rc = mapping->a_ops->migratepage(newpage, page); 443*b20a3503SChristoph Lameter goto unlock_both; 444*b20a3503SChristoph Lameter } 445*b20a3503SChristoph Lameter 446*b20a3503SChristoph Lameter /* 447*b20a3503SChristoph Lameter * Default handling if a filesystem does not provide 448*b20a3503SChristoph Lameter * a migration function. We can only migrate clean 449*b20a3503SChristoph Lameter * pages so try to write out any dirty pages first. 450*b20a3503SChristoph Lameter */ 451*b20a3503SChristoph Lameter if (PageDirty(page)) { 452*b20a3503SChristoph Lameter switch (pageout(page, mapping)) { 453*b20a3503SChristoph Lameter case PAGE_KEEP: 454*b20a3503SChristoph Lameter case PAGE_ACTIVATE: 455*b20a3503SChristoph Lameter goto unlock_both; 456*b20a3503SChristoph Lameter 457*b20a3503SChristoph Lameter case PAGE_SUCCESS: 458*b20a3503SChristoph Lameter unlock_page(newpage); 459*b20a3503SChristoph Lameter goto next; 460*b20a3503SChristoph Lameter 461*b20a3503SChristoph Lameter case PAGE_CLEAN: 462*b20a3503SChristoph Lameter ; /* try to migrate the page below */ 463*b20a3503SChristoph Lameter } 464*b20a3503SChristoph Lameter } 465*b20a3503SChristoph Lameter 466*b20a3503SChristoph Lameter /* 467*b20a3503SChristoph Lameter * Buffers are managed in a filesystem specific way. 468*b20a3503SChristoph Lameter * We must have no buffers or drop them. 469*b20a3503SChristoph Lameter */ 470*b20a3503SChristoph Lameter if (!page_has_buffers(page) || 471*b20a3503SChristoph Lameter try_to_release_page(page, GFP_KERNEL)) { 472*b20a3503SChristoph Lameter rc = migrate_page(newpage, page); 473*b20a3503SChristoph Lameter goto unlock_both; 474*b20a3503SChristoph Lameter } 475*b20a3503SChristoph Lameter 476*b20a3503SChristoph Lameter /* 477*b20a3503SChristoph Lameter * On early passes with mapped pages simply 478*b20a3503SChristoph Lameter * retry. There may be a lock held for some 479*b20a3503SChristoph Lameter * buffers that may go away. Later 480*b20a3503SChristoph Lameter * swap them out. 481*b20a3503SChristoph Lameter */ 482*b20a3503SChristoph Lameter if (pass > 4) { 483*b20a3503SChristoph Lameter /* 484*b20a3503SChristoph Lameter * Persistently unable to drop buffers..... As a 485*b20a3503SChristoph Lameter * measure of last resort we fall back to 486*b20a3503SChristoph Lameter * swap_page(). 487*b20a3503SChristoph Lameter */ 488*b20a3503SChristoph Lameter unlock_page(newpage); 489*b20a3503SChristoph Lameter newpage = NULL; 490*b20a3503SChristoph Lameter rc = swap_page(page); 491*b20a3503SChristoph Lameter goto next; 492*b20a3503SChristoph Lameter } 493*b20a3503SChristoph Lameter 494*b20a3503SChristoph Lameter unlock_both: 495*b20a3503SChristoph Lameter unlock_page(newpage); 496*b20a3503SChristoph Lameter 497*b20a3503SChristoph Lameter unlock_page: 498*b20a3503SChristoph Lameter unlock_page(page); 499*b20a3503SChristoph Lameter 500*b20a3503SChristoph Lameter next: 501*b20a3503SChristoph Lameter if (rc == -EAGAIN) { 502*b20a3503SChristoph Lameter retry++; 503*b20a3503SChristoph Lameter } else if (rc) { 504*b20a3503SChristoph Lameter /* Permanent failure */ 505*b20a3503SChristoph Lameter list_move(&page->lru, failed); 506*b20a3503SChristoph Lameter nr_failed++; 507*b20a3503SChristoph Lameter } else { 508*b20a3503SChristoph Lameter if (newpage) { 509*b20a3503SChristoph Lameter /* Successful migration. Return page to LRU */ 510*b20a3503SChristoph Lameter move_to_lru(newpage); 511*b20a3503SChristoph Lameter } 512*b20a3503SChristoph Lameter list_move(&page->lru, moved); 513*b20a3503SChristoph Lameter } 514*b20a3503SChristoph Lameter } 515*b20a3503SChristoph Lameter if (retry && pass++ < 10) 516*b20a3503SChristoph Lameter goto redo; 517*b20a3503SChristoph Lameter 518*b20a3503SChristoph Lameter if (!swapwrite) 519*b20a3503SChristoph Lameter current->flags &= ~PF_SWAPWRITE; 520*b20a3503SChristoph Lameter 521*b20a3503SChristoph Lameter return nr_failed + retry; 522*b20a3503SChristoph Lameter } 523*b20a3503SChristoph Lameter 524*b20a3503SChristoph Lameter /* 525*b20a3503SChristoph Lameter * Migration function for pages with buffers. This function can only be used 526*b20a3503SChristoph Lameter * if the underlying filesystem guarantees that no other references to "page" 527*b20a3503SChristoph Lameter * exist. 528*b20a3503SChristoph Lameter */ 529*b20a3503SChristoph Lameter int buffer_migrate_page(struct page *newpage, struct page *page) 530*b20a3503SChristoph Lameter { 531*b20a3503SChristoph Lameter struct address_space *mapping = page->mapping; 532*b20a3503SChristoph Lameter struct buffer_head *bh, *head; 533*b20a3503SChristoph Lameter int rc; 534*b20a3503SChristoph Lameter 535*b20a3503SChristoph Lameter if (!mapping) 536*b20a3503SChristoph Lameter return -EAGAIN; 537*b20a3503SChristoph Lameter 538*b20a3503SChristoph Lameter if (!page_has_buffers(page)) 539*b20a3503SChristoph Lameter return migrate_page(newpage, page); 540*b20a3503SChristoph Lameter 541*b20a3503SChristoph Lameter head = page_buffers(page); 542*b20a3503SChristoph Lameter 543*b20a3503SChristoph Lameter rc = migrate_page_remove_references(newpage, page, 3); 544*b20a3503SChristoph Lameter 545*b20a3503SChristoph Lameter if (rc) 546*b20a3503SChristoph Lameter return rc; 547*b20a3503SChristoph Lameter 548*b20a3503SChristoph Lameter bh = head; 549*b20a3503SChristoph Lameter do { 550*b20a3503SChristoph Lameter get_bh(bh); 551*b20a3503SChristoph Lameter lock_buffer(bh); 552*b20a3503SChristoph Lameter bh = bh->b_this_page; 553*b20a3503SChristoph Lameter 554*b20a3503SChristoph Lameter } while (bh != head); 555*b20a3503SChristoph Lameter 556*b20a3503SChristoph Lameter ClearPagePrivate(page); 557*b20a3503SChristoph Lameter set_page_private(newpage, page_private(page)); 558*b20a3503SChristoph Lameter set_page_private(page, 0); 559*b20a3503SChristoph Lameter put_page(page); 560*b20a3503SChristoph Lameter get_page(newpage); 561*b20a3503SChristoph Lameter 562*b20a3503SChristoph Lameter bh = head; 563*b20a3503SChristoph Lameter do { 564*b20a3503SChristoph Lameter set_bh_page(bh, newpage, bh_offset(bh)); 565*b20a3503SChristoph Lameter bh = bh->b_this_page; 566*b20a3503SChristoph Lameter 567*b20a3503SChristoph Lameter } while (bh != head); 568*b20a3503SChristoph Lameter 569*b20a3503SChristoph Lameter SetPagePrivate(newpage); 570*b20a3503SChristoph Lameter 571*b20a3503SChristoph Lameter migrate_page_copy(newpage, page); 572*b20a3503SChristoph Lameter 573*b20a3503SChristoph Lameter bh = head; 574*b20a3503SChristoph Lameter do { 575*b20a3503SChristoph Lameter unlock_buffer(bh); 576*b20a3503SChristoph Lameter put_bh(bh); 577*b20a3503SChristoph Lameter bh = bh->b_this_page; 578*b20a3503SChristoph Lameter 579*b20a3503SChristoph Lameter } while (bh != head); 580*b20a3503SChristoph Lameter 581*b20a3503SChristoph Lameter return 0; 582*b20a3503SChristoph Lameter } 583*b20a3503SChristoph Lameter EXPORT_SYMBOL(buffer_migrate_page); 584*b20a3503SChristoph Lameter 585*b20a3503SChristoph Lameter /* 586*b20a3503SChristoph Lameter * Migrate the list 'pagelist' of pages to a certain destination. 587*b20a3503SChristoph Lameter * 588*b20a3503SChristoph Lameter * Specify destination with either non-NULL vma or dest_node >= 0 589*b20a3503SChristoph Lameter * Return the number of pages not migrated or error code 590*b20a3503SChristoph Lameter */ 591*b20a3503SChristoph Lameter int migrate_pages_to(struct list_head *pagelist, 592*b20a3503SChristoph Lameter struct vm_area_struct *vma, int dest) 593*b20a3503SChristoph Lameter { 594*b20a3503SChristoph Lameter LIST_HEAD(newlist); 595*b20a3503SChristoph Lameter LIST_HEAD(moved); 596*b20a3503SChristoph Lameter LIST_HEAD(failed); 597*b20a3503SChristoph Lameter int err = 0; 598*b20a3503SChristoph Lameter unsigned long offset = 0; 599*b20a3503SChristoph Lameter int nr_pages; 600*b20a3503SChristoph Lameter struct page *page; 601*b20a3503SChristoph Lameter struct list_head *p; 602*b20a3503SChristoph Lameter 603*b20a3503SChristoph Lameter redo: 604*b20a3503SChristoph Lameter nr_pages = 0; 605*b20a3503SChristoph Lameter list_for_each(p, pagelist) { 606*b20a3503SChristoph Lameter if (vma) { 607*b20a3503SChristoph Lameter /* 608*b20a3503SChristoph Lameter * The address passed to alloc_page_vma is used to 609*b20a3503SChristoph Lameter * generate the proper interleave behavior. We fake 610*b20a3503SChristoph Lameter * the address here by an increasing offset in order 611*b20a3503SChristoph Lameter * to get the proper distribution of pages. 612*b20a3503SChristoph Lameter * 613*b20a3503SChristoph Lameter * No decision has been made as to which page 614*b20a3503SChristoph Lameter * a certain old page is moved to so we cannot 615*b20a3503SChristoph Lameter * specify the correct address. 616*b20a3503SChristoph Lameter */ 617*b20a3503SChristoph Lameter page = alloc_page_vma(GFP_HIGHUSER, vma, 618*b20a3503SChristoph Lameter offset + vma->vm_start); 619*b20a3503SChristoph Lameter offset += PAGE_SIZE; 620*b20a3503SChristoph Lameter } 621*b20a3503SChristoph Lameter else 622*b20a3503SChristoph Lameter page = alloc_pages_node(dest, GFP_HIGHUSER, 0); 623*b20a3503SChristoph Lameter 624*b20a3503SChristoph Lameter if (!page) { 625*b20a3503SChristoph Lameter err = -ENOMEM; 626*b20a3503SChristoph Lameter goto out; 627*b20a3503SChristoph Lameter } 628*b20a3503SChristoph Lameter list_add_tail(&page->lru, &newlist); 629*b20a3503SChristoph Lameter nr_pages++; 630*b20a3503SChristoph Lameter if (nr_pages > MIGRATE_CHUNK_SIZE) 631*b20a3503SChristoph Lameter break; 632*b20a3503SChristoph Lameter } 633*b20a3503SChristoph Lameter err = migrate_pages(pagelist, &newlist, &moved, &failed); 634*b20a3503SChristoph Lameter 635*b20a3503SChristoph Lameter putback_lru_pages(&moved); /* Call release pages instead ?? */ 636*b20a3503SChristoph Lameter 637*b20a3503SChristoph Lameter if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) 638*b20a3503SChristoph Lameter goto redo; 639*b20a3503SChristoph Lameter out: 640*b20a3503SChristoph Lameter /* Return leftover allocated pages */ 641*b20a3503SChristoph Lameter while (!list_empty(&newlist)) { 642*b20a3503SChristoph Lameter page = list_entry(newlist.next, struct page, lru); 643*b20a3503SChristoph Lameter list_del(&page->lru); 644*b20a3503SChristoph Lameter __free_page(page); 645*b20a3503SChristoph Lameter } 646*b20a3503SChristoph Lameter list_splice(&failed, pagelist); 647*b20a3503SChristoph Lameter if (err < 0) 648*b20a3503SChristoph Lameter return err; 649*b20a3503SChristoph Lameter 650*b20a3503SChristoph Lameter /* Calculate number of leftover pages */ 651*b20a3503SChristoph Lameter nr_pages = 0; 652*b20a3503SChristoph Lameter list_for_each(p, pagelist) 653*b20a3503SChristoph Lameter nr_pages++; 654*b20a3503SChristoph Lameter return nr_pages; 655*b20a3503SChristoph Lameter } 656