11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * linux/mm/swap_state.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 51da177e4SLinus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Rewritten to use page cache, (C) 1998 Stephen Tweedie 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds #include <linux/module.h> 101da177e4SLinus Torvalds #include <linux/mm.h> 111da177e4SLinus Torvalds #include <linux/kernel_stat.h> 121da177e4SLinus Torvalds #include <linux/swap.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/pagemap.h> 151da177e4SLinus Torvalds #include <linux/buffer_head.h> 161da177e4SLinus Torvalds #include <linux/backing-dev.h> 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds #include <asm/pgtable.h> 191da177e4SLinus Torvalds 201da177e4SLinus Torvalds /* 211da177e4SLinus Torvalds * swapper_space is a fiction, retained to simplify the path through 221da177e4SLinus Torvalds * vmscan's shrink_list, to make sync_page look nicer, and to allow 231da177e4SLinus Torvalds * future use of radix_tree tags in the swap cache. 241da177e4SLinus Torvalds */ 251da177e4SLinus Torvalds static struct address_space_operations swap_aops = { 261da177e4SLinus Torvalds .writepage = swap_writepage, 271da177e4SLinus Torvalds .sync_page = block_sync_page, 281da177e4SLinus Torvalds .set_page_dirty = __set_page_dirty_nobuffers, 291da177e4SLinus Torvalds }; 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds static struct backing_dev_info swap_backing_dev_info = { 321da177e4SLinus Torvalds .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 331da177e4SLinus Torvalds .unplug_io_fn = swap_unplug_io_fn, 341da177e4SLinus Torvalds }; 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds struct address_space swapper_space = { 371da177e4SLinus Torvalds .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 381da177e4SLinus Torvalds .tree_lock = RW_LOCK_UNLOCKED, 391da177e4SLinus Torvalds .a_ops = &swap_aops, 401da177e4SLinus Torvalds .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 411da177e4SLinus Torvalds .backing_dev_info = &swap_backing_dev_info, 421da177e4SLinus Torvalds }; 431da177e4SLinus Torvalds EXPORT_SYMBOL(swapper_space); 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds static struct { 481da177e4SLinus Torvalds unsigned long add_total; 491da177e4SLinus Torvalds unsigned long del_total; 501da177e4SLinus Torvalds unsigned long find_success; 511da177e4SLinus Torvalds unsigned long find_total; 521da177e4SLinus Torvalds unsigned long noent_race; 531da177e4SLinus Torvalds unsigned long exist_race; 541da177e4SLinus Torvalds } swap_cache_info; 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds void show_swap_cache_info(void) 571da177e4SLinus Torvalds { 581da177e4SLinus Torvalds printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", 591da177e4SLinus Torvalds swap_cache_info.add_total, swap_cache_info.del_total, 601da177e4SLinus Torvalds swap_cache_info.find_success, swap_cache_info.find_total, 611da177e4SLinus Torvalds swap_cache_info.noent_race, swap_cache_info.exist_race); 621da177e4SLinus Torvalds printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 631da177e4SLinus Torvalds printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 641da177e4SLinus Torvalds } 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds /* 671da177e4SLinus Torvalds * __add_to_swap_cache resembles add_to_page_cache on swapper_space, 681da177e4SLinus Torvalds * but sets SwapCache flag and private instead of mapping and index. 691da177e4SLinus Torvalds */ 709de75d11SVictor Fusco static int __add_to_swap_cache(struct page *page, swp_entry_t entry, 71dd0fc66fSAl Viro gfp_t gfp_mask) 721da177e4SLinus Torvalds { 731da177e4SLinus Torvalds int error; 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds BUG_ON(PageSwapCache(page)); 761da177e4SLinus Torvalds BUG_ON(PagePrivate(page)); 771da177e4SLinus Torvalds error = radix_tree_preload(gfp_mask); 781da177e4SLinus Torvalds if (!error) { 791da177e4SLinus Torvalds write_lock_irq(&swapper_space.tree_lock); 801da177e4SLinus Torvalds error = radix_tree_insert(&swapper_space.page_tree, 811da177e4SLinus Torvalds entry.val, page); 821da177e4SLinus Torvalds if (!error) { 831da177e4SLinus Torvalds page_cache_get(page); 841da177e4SLinus Torvalds SetPageLocked(page); 851da177e4SLinus Torvalds SetPageSwapCache(page); 864c21e2f2SHugh Dickins set_page_private(page, entry.val); 871da177e4SLinus Torvalds total_swapcache_pages++; 881da177e4SLinus Torvalds pagecache_acct(1); 891da177e4SLinus Torvalds } 901da177e4SLinus Torvalds write_unlock_irq(&swapper_space.tree_lock); 911da177e4SLinus Torvalds radix_tree_preload_end(); 921da177e4SLinus Torvalds } 931da177e4SLinus Torvalds return error; 941da177e4SLinus Torvalds } 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds static int add_to_swap_cache(struct page *page, swp_entry_t entry) 971da177e4SLinus Torvalds { 981da177e4SLinus Torvalds int error; 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds if (!swap_duplicate(entry)) { 1011da177e4SLinus Torvalds INC_CACHE_INFO(noent_race); 1021da177e4SLinus Torvalds return -ENOENT; 1031da177e4SLinus Torvalds } 1041da177e4SLinus Torvalds error = __add_to_swap_cache(page, entry, GFP_KERNEL); 1051da177e4SLinus Torvalds /* 1061da177e4SLinus Torvalds * Anon pages are already on the LRU, we don't run lru_cache_add here. 1071da177e4SLinus Torvalds */ 1081da177e4SLinus Torvalds if (error) { 1091da177e4SLinus Torvalds swap_free(entry); 1101da177e4SLinus Torvalds if (error == -EEXIST) 1111da177e4SLinus Torvalds INC_CACHE_INFO(exist_race); 1121da177e4SLinus Torvalds return error; 1131da177e4SLinus Torvalds } 1141da177e4SLinus Torvalds INC_CACHE_INFO(add_total); 1151da177e4SLinus Torvalds return 0; 1161da177e4SLinus Torvalds } 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds /* 1191da177e4SLinus Torvalds * This must be called only on pages that have 1201da177e4SLinus Torvalds * been verified to be in the swap cache. 1211da177e4SLinus Torvalds */ 1221da177e4SLinus Torvalds void __delete_from_swap_cache(struct page *page) 1231da177e4SLinus Torvalds { 1241da177e4SLinus Torvalds BUG_ON(!PageLocked(page)); 1251da177e4SLinus Torvalds BUG_ON(!PageSwapCache(page)); 1261da177e4SLinus Torvalds BUG_ON(PageWriteback(page)); 1273279ffd9SHugh Dickins BUG_ON(PagePrivate(page)); 1281da177e4SLinus Torvalds 1294c21e2f2SHugh Dickins radix_tree_delete(&swapper_space.page_tree, page_private(page)); 1304c21e2f2SHugh Dickins set_page_private(page, 0); 1311da177e4SLinus Torvalds ClearPageSwapCache(page); 1321da177e4SLinus Torvalds total_swapcache_pages--; 1331da177e4SLinus Torvalds pagecache_acct(-1); 1341da177e4SLinus Torvalds INC_CACHE_INFO(del_total); 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds 1371da177e4SLinus Torvalds /** 1381da177e4SLinus Torvalds * add_to_swap - allocate swap space for a page 1391da177e4SLinus Torvalds * @page: page we want to move to swap 1401da177e4SLinus Torvalds * 1411da177e4SLinus Torvalds * Allocate swap space for the page and add the page to the 1421da177e4SLinus Torvalds * swap cache. Caller needs to hold the page lock. 1431da177e4SLinus Torvalds */ 1441da177e4SLinus Torvalds int add_to_swap(struct page * page) 1451da177e4SLinus Torvalds { 1461da177e4SLinus Torvalds swp_entry_t entry; 1471da177e4SLinus Torvalds int err; 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds if (!PageLocked(page)) 1501da177e4SLinus Torvalds BUG(); 1511da177e4SLinus Torvalds 1521da177e4SLinus Torvalds for (;;) { 1531da177e4SLinus Torvalds entry = get_swap_page(); 1541da177e4SLinus Torvalds if (!entry.val) 1551da177e4SLinus Torvalds return 0; 1561da177e4SLinus Torvalds 157bd53b714SNick Piggin /* 158bd53b714SNick Piggin * Radix-tree node allocations from PF_MEMALLOC contexts could 159bd53b714SNick Piggin * completely exhaust the page allocator. __GFP_NOMEMALLOC 160bd53b714SNick Piggin * stops emergency reserves from being allocated. 1611da177e4SLinus Torvalds * 162bd53b714SNick Piggin * TODO: this could cause a theoretical memory reclaim 163bd53b714SNick Piggin * deadlock in the swap out path. 1641da177e4SLinus Torvalds */ 1651da177e4SLinus Torvalds /* 1661da177e4SLinus Torvalds * Add it to the swap cache and mark it dirty 1671da177e4SLinus Torvalds */ 168bd53b714SNick Piggin err = __add_to_swap_cache(page, entry, 169bd53b714SNick Piggin GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); 1701da177e4SLinus Torvalds 1711da177e4SLinus Torvalds switch (err) { 1721da177e4SLinus Torvalds case 0: /* Success */ 1731da177e4SLinus Torvalds SetPageUptodate(page); 1741da177e4SLinus Torvalds SetPageDirty(page); 1751da177e4SLinus Torvalds INC_CACHE_INFO(add_total); 1761da177e4SLinus Torvalds return 1; 1771da177e4SLinus Torvalds case -EEXIST: 1781da177e4SLinus Torvalds /* Raced with "speculative" read_swap_cache_async */ 1791da177e4SLinus Torvalds INC_CACHE_INFO(exist_race); 1801da177e4SLinus Torvalds swap_free(entry); 1811da177e4SLinus Torvalds continue; 1821da177e4SLinus Torvalds default: 1831da177e4SLinus Torvalds /* -ENOMEM radix-tree allocation failure */ 1841da177e4SLinus Torvalds swap_free(entry); 1851da177e4SLinus Torvalds return 0; 1861da177e4SLinus Torvalds } 1871da177e4SLinus Torvalds } 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds /* 1911da177e4SLinus Torvalds * This must be called only on pages that have 1921da177e4SLinus Torvalds * been verified to be in the swap cache and locked. 1931da177e4SLinus Torvalds * It will never put the page into the free list, 1941da177e4SLinus Torvalds * the caller has a reference on the page. 1951da177e4SLinus Torvalds */ 1961da177e4SLinus Torvalds void delete_from_swap_cache(struct page *page) 1971da177e4SLinus Torvalds { 1981da177e4SLinus Torvalds swp_entry_t entry; 1991da177e4SLinus Torvalds 2004c21e2f2SHugh Dickins entry.val = page_private(page); 2011da177e4SLinus Torvalds 2021da177e4SLinus Torvalds write_lock_irq(&swapper_space.tree_lock); 2031da177e4SLinus Torvalds __delete_from_swap_cache(page); 2041da177e4SLinus Torvalds write_unlock_irq(&swapper_space.tree_lock); 2051da177e4SLinus Torvalds 2061da177e4SLinus Torvalds swap_free(entry); 2071da177e4SLinus Torvalds page_cache_release(page); 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds /* 2111da177e4SLinus Torvalds * Strange swizzling function only for use by shmem_writepage 2121da177e4SLinus Torvalds */ 2131da177e4SLinus Torvalds int move_to_swap_cache(struct page *page, swp_entry_t entry) 2141da177e4SLinus Torvalds { 2151da177e4SLinus Torvalds int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); 2161da177e4SLinus Torvalds if (!err) { 2171da177e4SLinus Torvalds remove_from_page_cache(page); 2181da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 2191da177e4SLinus Torvalds if (!swap_duplicate(entry)) 2201da177e4SLinus Torvalds BUG(); 2211da177e4SLinus Torvalds SetPageDirty(page); 2221da177e4SLinus Torvalds INC_CACHE_INFO(add_total); 2231da177e4SLinus Torvalds } else if (err == -EEXIST) 2241da177e4SLinus Torvalds INC_CACHE_INFO(exist_race); 2251da177e4SLinus Torvalds return err; 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds /* 2291da177e4SLinus Torvalds * Strange swizzling function for shmem_getpage (and shmem_unuse) 2301da177e4SLinus Torvalds */ 2311da177e4SLinus Torvalds int move_from_swap_cache(struct page *page, unsigned long index, 2321da177e4SLinus Torvalds struct address_space *mapping) 2331da177e4SLinus Torvalds { 2341da177e4SLinus Torvalds int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); 2351da177e4SLinus Torvalds if (!err) { 2361da177e4SLinus Torvalds delete_from_swap_cache(page); 2371da177e4SLinus Torvalds /* shift page from clean_pages to dirty_pages list */ 2381da177e4SLinus Torvalds ClearPageDirty(page); 2391da177e4SLinus Torvalds set_page_dirty(page); 2401da177e4SLinus Torvalds } 2411da177e4SLinus Torvalds return err; 2421da177e4SLinus Torvalds } 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds /* 2451da177e4SLinus Torvalds * If we are the only user, then try to free up the swap cache. 2461da177e4SLinus Torvalds * 2471da177e4SLinus Torvalds * Its ok to check for PageSwapCache without the page lock 2481da177e4SLinus Torvalds * here because we are going to recheck again inside 2491da177e4SLinus Torvalds * exclusive_swap_page() _with_ the lock. 2501da177e4SLinus Torvalds * - Marcelo 2511da177e4SLinus Torvalds */ 2521da177e4SLinus Torvalds static inline void free_swap_cache(struct page *page) 2531da177e4SLinus Torvalds { 2541da177e4SLinus Torvalds if (PageSwapCache(page) && !TestSetPageLocked(page)) { 2551da177e4SLinus Torvalds remove_exclusive_swap_page(page); 2561da177e4SLinus Torvalds unlock_page(page); 2571da177e4SLinus Torvalds } 2581da177e4SLinus Torvalds } 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds /* 2611da177e4SLinus Torvalds * Perform a free_page(), also freeing any swap cache associated with 262*b8072f09SHugh Dickins * this page if it is the last user of the page. 2631da177e4SLinus Torvalds */ 2641da177e4SLinus Torvalds void free_page_and_swap_cache(struct page *page) 2651da177e4SLinus Torvalds { 2661da177e4SLinus Torvalds free_swap_cache(page); 2671da177e4SLinus Torvalds page_cache_release(page); 2681da177e4SLinus Torvalds } 2691da177e4SLinus Torvalds 2701da177e4SLinus Torvalds /* 2711da177e4SLinus Torvalds * Passed an array of pages, drop them all from swapcache and then release 2721da177e4SLinus Torvalds * them. They are removed from the LRU and freed if this is their last use. 2731da177e4SLinus Torvalds */ 2741da177e4SLinus Torvalds void free_pages_and_swap_cache(struct page **pages, int nr) 2751da177e4SLinus Torvalds { 2761da177e4SLinus Torvalds int chunk = 16; 2771da177e4SLinus Torvalds struct page **pagep = pages; 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds lru_add_drain(); 2801da177e4SLinus Torvalds while (nr) { 2811da177e4SLinus Torvalds int todo = min(chunk, nr); 2821da177e4SLinus Torvalds int i; 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds for (i = 0; i < todo; i++) 2851da177e4SLinus Torvalds free_swap_cache(pagep[i]); 2861da177e4SLinus Torvalds release_pages(pagep, todo, 0); 2871da177e4SLinus Torvalds pagep += todo; 2881da177e4SLinus Torvalds nr -= todo; 2891da177e4SLinus Torvalds } 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds /* 2931da177e4SLinus Torvalds * Lookup a swap entry in the swap cache. A found page will be returned 2941da177e4SLinus Torvalds * unlocked and with its refcount incremented - we rely on the kernel 2951da177e4SLinus Torvalds * lock getting page table operations atomic even if we drop the page 2961da177e4SLinus Torvalds * lock before returning. 2971da177e4SLinus Torvalds */ 2981da177e4SLinus Torvalds struct page * lookup_swap_cache(swp_entry_t entry) 2991da177e4SLinus Torvalds { 3001da177e4SLinus Torvalds struct page *page; 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds page = find_get_page(&swapper_space, entry.val); 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds if (page) 3051da177e4SLinus Torvalds INC_CACHE_INFO(find_success); 3061da177e4SLinus Torvalds 3071da177e4SLinus Torvalds INC_CACHE_INFO(find_total); 3081da177e4SLinus Torvalds return page; 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds /* 3121da177e4SLinus Torvalds * Locate a page of swap in physical memory, reserving swap cache space 3131da177e4SLinus Torvalds * and reading the disk if it is not already cached. 3141da177e4SLinus Torvalds * A failure return means that either the page allocation failed or that 3151da177e4SLinus Torvalds * the swap entry is no longer in use. 3161da177e4SLinus Torvalds */ 3171da177e4SLinus Torvalds struct page *read_swap_cache_async(swp_entry_t entry, 3181da177e4SLinus Torvalds struct vm_area_struct *vma, unsigned long addr) 3191da177e4SLinus Torvalds { 3201da177e4SLinus Torvalds struct page *found_page, *new_page = NULL; 3211da177e4SLinus Torvalds int err; 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds do { 3241da177e4SLinus Torvalds /* 3251da177e4SLinus Torvalds * First check the swap cache. Since this is normally 3261da177e4SLinus Torvalds * called after lookup_swap_cache() failed, re-calling 3271da177e4SLinus Torvalds * that would confuse statistics. 3281da177e4SLinus Torvalds */ 3291da177e4SLinus Torvalds found_page = find_get_page(&swapper_space, entry.val); 3301da177e4SLinus Torvalds if (found_page) 3311da177e4SLinus Torvalds break; 3321da177e4SLinus Torvalds 3331da177e4SLinus Torvalds /* 3341da177e4SLinus Torvalds * Get a new page to read into from swap. 3351da177e4SLinus Torvalds */ 3361da177e4SLinus Torvalds if (!new_page) { 3371da177e4SLinus Torvalds new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); 3381da177e4SLinus Torvalds if (!new_page) 3391da177e4SLinus Torvalds break; /* Out of memory */ 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds /* 3431da177e4SLinus Torvalds * Associate the page with swap entry in the swap cache. 3441da177e4SLinus Torvalds * May fail (-ENOENT) if swap entry has been freed since 3451da177e4SLinus Torvalds * our caller observed it. May fail (-EEXIST) if there 3461da177e4SLinus Torvalds * is already a page associated with this entry in the 3471da177e4SLinus Torvalds * swap cache: added by a racing read_swap_cache_async, 3481da177e4SLinus Torvalds * or by try_to_swap_out (or shmem_writepage) re-using 3491da177e4SLinus Torvalds * the just freed swap entry for an existing page. 3501da177e4SLinus Torvalds * May fail (-ENOMEM) if radix-tree node allocation failed. 3511da177e4SLinus Torvalds */ 3521da177e4SLinus Torvalds err = add_to_swap_cache(new_page, entry); 3531da177e4SLinus Torvalds if (!err) { 3541da177e4SLinus Torvalds /* 3551da177e4SLinus Torvalds * Initiate read into locked page and return. 3561da177e4SLinus Torvalds */ 3571da177e4SLinus Torvalds lru_cache_add_active(new_page); 3581da177e4SLinus Torvalds swap_readpage(NULL, new_page); 3591da177e4SLinus Torvalds return new_page; 3601da177e4SLinus Torvalds } 3611da177e4SLinus Torvalds } while (err != -ENOENT && err != -ENOMEM); 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds if (new_page) 3641da177e4SLinus Torvalds page_cache_release(new_page); 3651da177e4SLinus Torvalds return found_page; 3661da177e4SLinus Torvalds } 367