1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * linux/mm/swap_state.c 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 61da177e4SLinus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * Rewritten to use page cache, (C) 1998 Stephen Tweedie 91da177e4SLinus Torvalds */ 101da177e4SLinus Torvalds #include <linux/mm.h> 115a0e3ad6STejun Heo #include <linux/gfp.h> 121da177e4SLinus Torvalds #include <linux/kernel_stat.h> 131da177e4SLinus Torvalds #include <linux/swap.h> 1446017e95SHugh Dickins #include <linux/swapops.h> 151da177e4SLinus Torvalds #include <linux/init.h> 161da177e4SLinus Torvalds #include <linux/pagemap.h> 171da177e4SLinus Torvalds #include <linux/backing-dev.h> 183fb5c298SChristian Ehrhardt #include <linux/blkdev.h> 19c484d410SHugh Dickins #include <linux/pagevec.h> 20b20a3503SChristoph Lameter #include <linux/migrate.h> 214b3ef9daSHuang, Ying #include <linux/vmalloc.h> 2267afa38eSTim Chen #include <linux/swap_slots.h> 2338d8b4e6SHuang Ying #include <linux/huge_mm.h> 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds #include <asm/pgtable.h> 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds /* 281da177e4SLinus Torvalds * swapper_space is a fiction, retained to simplify the path through 297eaceaccSJens Axboe * vmscan's shrink_page_list. 301da177e4SLinus Torvalds */ 31f5e54d6eSChristoph Hellwig static const struct address_space_operations swap_aops = { 321da177e4SLinus Torvalds .writepage = swap_writepage, 3362c230bcSMel Gorman .set_page_dirty = swap_set_page_dirty, 341c93923cSAndrew Morton #ifdef CONFIG_MIGRATION 35e965f963SChristoph Lameter .migratepage = migrate_page, 361c93923cSAndrew Morton #endif 371da177e4SLinus Torvalds }; 381da177e4SLinus Torvalds 39783cb68eSChangbin Du struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; 40783cb68eSChangbin Du static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; 41f5c754d6SColin Ian King static bool enable_vma_readahead __read_mostly = true; 42ec560175SHuang Ying 43ec560175SHuang Ying #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 44ec560175SHuang Ying #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 45ec560175SHuang Ying #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK 46ec560175SHuang Ying #define SWAP_RA_WIN_MASK (~PAGE_MASK & ~SWAP_RA_HITS_MASK) 47ec560175SHuang Ying 48ec560175SHuang Ying #define SWAP_RA_HITS(v) ((v) & SWAP_RA_HITS_MASK) 49ec560175SHuang Ying #define SWAP_RA_WIN(v) (((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT) 50ec560175SHuang Ying #define SWAP_RA_ADDR(v) ((v) & PAGE_MASK) 51ec560175SHuang Ying 52ec560175SHuang Ying #define SWAP_RA_VAL(addr, win, hits) \ 53ec560175SHuang Ying (((addr) & PAGE_MASK) | \ 54ec560175SHuang Ying (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) | \ 55ec560175SHuang Ying ((hits) & SWAP_RA_HITS_MASK)) 56ec560175SHuang Ying 57ec560175SHuang Ying /* Initial readahead hits is 4 to start up with a small window */ 58ec560175SHuang Ying #define GET_SWAP_RA_VAL(vma) \ 59ec560175SHuang Ying (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 6238d8b4e6SHuang Ying #define ADD_CACHE_INFO(x, nr) do { swap_cache_info.x += (nr); } while (0) 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds static struct { 651da177e4SLinus Torvalds unsigned long add_total; 661da177e4SLinus Torvalds unsigned long del_total; 671da177e4SLinus Torvalds unsigned long find_success; 681da177e4SLinus Torvalds unsigned long find_total; 691da177e4SLinus Torvalds } swap_cache_info; 701da177e4SLinus Torvalds 7133806f06SShaohua Li unsigned long total_swapcache_pages(void) 7233806f06SShaohua Li { 734b3ef9daSHuang, Ying unsigned int i, j, nr; 7433806f06SShaohua Li unsigned long ret = 0; 754b3ef9daSHuang, Ying struct address_space *spaces; 76054f1d1fSHuang Ying struct swap_info_struct *si; 7733806f06SShaohua Li 784b3ef9daSHuang, Ying for (i = 0; i < MAX_SWAPFILES; i++) { 79054f1d1fSHuang Ying swp_entry_t entry = swp_entry(i, 1); 80054f1d1fSHuang Ying 81054f1d1fSHuang Ying /* Avoid get_swap_device() to warn for bad swap entry */ 82054f1d1fSHuang Ying if (!swp_swap_info(entry)) 834b3ef9daSHuang, Ying continue; 84054f1d1fSHuang Ying /* Prevent swapoff to free swapper_spaces */ 85054f1d1fSHuang Ying si = get_swap_device(entry); 86054f1d1fSHuang Ying if (!si) 87054f1d1fSHuang Ying continue; 88054f1d1fSHuang Ying nr = nr_swapper_spaces[i]; 89054f1d1fSHuang Ying spaces = swapper_spaces[i]; 904b3ef9daSHuang, Ying for (j = 0; j < nr; j++) 914b3ef9daSHuang, Ying ret += spaces[j].nrpages; 92054f1d1fSHuang Ying put_swap_device(si); 934b3ef9daSHuang, Ying } 9433806f06SShaohua Li return ret; 9533806f06SShaohua Li } 9633806f06SShaohua Li 97579f8290SShaohua Li static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); 98579f8290SShaohua Li 991da177e4SLinus Torvalds void show_swap_cache_info(void) 1001da177e4SLinus Torvalds { 10133806f06SShaohua Li printk("%lu pages in swap cache\n", total_swapcache_pages()); 1022c97b7fcSJohannes Weiner printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", 1031da177e4SLinus Torvalds swap_cache_info.add_total, swap_cache_info.del_total, 104bb63be0aSHugh Dickins swap_cache_info.find_success, swap_cache_info.find_total); 105ec8acf20SShaohua Li printk("Free swap = %ldkB\n", 106ec8acf20SShaohua Li get_nr_swap_pages() << (PAGE_SHIFT - 10)); 1071da177e4SLinus Torvalds printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds /* 1118d93b41cSMatthew Wilcox * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, 1121da177e4SLinus Torvalds * but sets SwapCache flag and private instead of mapping and index. 1131da177e4SLinus Torvalds */ 1148d93b41cSMatthew Wilcox int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) 1151da177e4SLinus Torvalds { 1168d93b41cSMatthew Wilcox struct address_space *address_space = swap_address_space(entry); 11738d8b4e6SHuang Ying pgoff_t idx = swp_offset(entry); 1188d93b41cSMatthew Wilcox XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); 119cb774451SWei Yang unsigned long i, nr = hpage_nr_pages(page); 1201da177e4SLinus Torvalds 121309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 122309381feSSasha Levin VM_BUG_ON_PAGE(PageSwapCache(page), page); 123309381feSSasha Levin VM_BUG_ON_PAGE(!PageSwapBacked(page), page); 12451726b12SHugh Dickins 12538d8b4e6SHuang Ying page_ref_add(page, nr); 1261da177e4SLinus Torvalds SetPageSwapCache(page); 127e286781dSNick Piggin 1288d93b41cSMatthew Wilcox do { 1298d93b41cSMatthew Wilcox xas_lock_irq(&xas); 1308d93b41cSMatthew Wilcox xas_create_range(&xas); 1318d93b41cSMatthew Wilcox if (xas_error(&xas)) 1328d93b41cSMatthew Wilcox goto unlock; 13338d8b4e6SHuang Ying for (i = 0; i < nr; i++) { 1348d93b41cSMatthew Wilcox VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); 13538d8b4e6SHuang Ying set_page_private(page + i, entry.val + i); 1364101196bSMatthew Wilcox (Oracle) xas_store(&xas, page); 1378d93b41cSMatthew Wilcox xas_next(&xas); 1381da177e4SLinus Torvalds } 13938d8b4e6SHuang Ying address_space->nrpages += nr; 14038d8b4e6SHuang Ying __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); 14138d8b4e6SHuang Ying ADD_CACHE_INFO(add_total, nr); 1428d93b41cSMatthew Wilcox unlock: 1438d93b41cSMatthew Wilcox xas_unlock_irq(&xas); 1448d93b41cSMatthew Wilcox } while (xas_nomem(&xas, gfp)); 1458d93b41cSMatthew Wilcox 1468d93b41cSMatthew Wilcox if (!xas_error(&xas)) 1478d93b41cSMatthew Wilcox return 0; 1488d93b41cSMatthew Wilcox 14938d8b4e6SHuang Ying ClearPageSwapCache(page); 15038d8b4e6SHuang Ying page_ref_sub(page, nr); 1518d93b41cSMatthew Wilcox return xas_error(&xas); 1521da177e4SLinus Torvalds } 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds /* 1551da177e4SLinus Torvalds * This must be called only on pages that have 1561da177e4SLinus Torvalds * been verified to be in the swap cache. 1571da177e4SLinus Torvalds */ 1584e17ec25SMatthew Wilcox void __delete_from_swap_cache(struct page *page, swp_entry_t entry) 1591da177e4SLinus Torvalds { 1604e17ec25SMatthew Wilcox struct address_space *address_space = swap_address_space(entry); 16138d8b4e6SHuang Ying int i, nr = hpage_nr_pages(page); 1624e17ec25SMatthew Wilcox pgoff_t idx = swp_offset(entry); 1634e17ec25SMatthew Wilcox XA_STATE(xas, &address_space->i_pages, idx); 16433806f06SShaohua Li 165309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 166309381feSSasha Levin VM_BUG_ON_PAGE(!PageSwapCache(page), page); 167309381feSSasha Levin VM_BUG_ON_PAGE(PageWriteback(page), page); 1681da177e4SLinus Torvalds 16938d8b4e6SHuang Ying for (i = 0; i < nr; i++) { 1704e17ec25SMatthew Wilcox void *entry = xas_store(&xas, NULL); 1714101196bSMatthew Wilcox (Oracle) VM_BUG_ON_PAGE(entry != page, entry); 17238d8b4e6SHuang Ying set_page_private(page + i, 0); 1734e17ec25SMatthew Wilcox xas_next(&xas); 17438d8b4e6SHuang Ying } 1751da177e4SLinus Torvalds ClearPageSwapCache(page); 17638d8b4e6SHuang Ying address_space->nrpages -= nr; 17738d8b4e6SHuang Ying __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); 17838d8b4e6SHuang Ying ADD_CACHE_INFO(del_total, nr); 1791da177e4SLinus Torvalds } 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds /** 1821da177e4SLinus Torvalds * add_to_swap - allocate swap space for a page 1831da177e4SLinus Torvalds * @page: page we want to move to swap 1841da177e4SLinus Torvalds * 1851da177e4SLinus Torvalds * Allocate swap space for the page and add the page to the 1861da177e4SLinus Torvalds * swap cache. Caller needs to hold the page lock. 1871da177e4SLinus Torvalds */ 1880f074658SMinchan Kim int add_to_swap(struct page *page) 1891da177e4SLinus Torvalds { 1901da177e4SLinus Torvalds swp_entry_t entry; 1911da177e4SLinus Torvalds int err; 1921da177e4SLinus Torvalds 193309381feSSasha Levin VM_BUG_ON_PAGE(!PageLocked(page), page); 194309381feSSasha Levin VM_BUG_ON_PAGE(!PageUptodate(page), page); 1951da177e4SLinus Torvalds 19638d8b4e6SHuang Ying entry = get_swap_page(page); 1971da177e4SLinus Torvalds if (!entry.val) 1980f074658SMinchan Kim return 0; 1990f074658SMinchan Kim 200bd53b714SNick Piggin /* 2018d93b41cSMatthew Wilcox * XArray node allocations from PF_MEMALLOC contexts could 202bd53b714SNick Piggin * completely exhaust the page allocator. __GFP_NOMEMALLOC 203bd53b714SNick Piggin * stops emergency reserves from being allocated. 2041da177e4SLinus Torvalds * 205bd53b714SNick Piggin * TODO: this could cause a theoretical memory reclaim 206bd53b714SNick Piggin * deadlock in the swap out path. 2071da177e4SLinus Torvalds */ 2081da177e4SLinus Torvalds /* 209854e9ed0SMinchan Kim * Add it to the swap cache. 2101da177e4SLinus Torvalds */ 211f000944dSHugh Dickins err = add_to_swap_cache(page, entry, 212ac47b003SHugh Dickins __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); 21338d8b4e6SHuang Ying if (err) 2142ca4532aSDaisuke Nishimura /* 2152ca4532aSDaisuke Nishimura * add_to_swap_cache() doesn't return -EEXIST, so we can safely 2162ca4532aSDaisuke Nishimura * clear SWAP_HAS_CACHE flag. 2172ca4532aSDaisuke Nishimura */ 2180f074658SMinchan Kim goto fail; 2199625456cSShaohua Li /* 2209625456cSShaohua Li * Normally the page will be dirtied in unmap because its pte should be 2219625456cSShaohua Li * dirty. A special case is MADV_FREE page. The page'e pte could have 2229625456cSShaohua Li * dirty bit cleared but the page's SwapBacked bit is still set because 2239625456cSShaohua Li * clearing the dirty bit and SwapBacked bit has no lock protected. For 2249625456cSShaohua Li * such page, unmap will not set dirty bit for it, so page reclaim will 2259625456cSShaohua Li * not write the page out. This can cause data corruption when the page 2269625456cSShaohua Li * is swap in later. Always setting the dirty bit for the page solves 2279625456cSShaohua Li * the problem. 2289625456cSShaohua Li */ 2299625456cSShaohua Li set_page_dirty(page); 2301da177e4SLinus Torvalds 23138d8b4e6SHuang Ying return 1; 23238d8b4e6SHuang Ying 23338d8b4e6SHuang Ying fail: 2340f074658SMinchan Kim put_swap_page(page, entry); 23538d8b4e6SHuang Ying return 0; 23638d8b4e6SHuang Ying } 23738d8b4e6SHuang Ying 2381da177e4SLinus Torvalds /* 2391da177e4SLinus Torvalds * This must be called only on pages that have 2401da177e4SLinus Torvalds * been verified to be in the swap cache and locked. 2411da177e4SLinus Torvalds * It will never put the page into the free list, 2421da177e4SLinus Torvalds * the caller has a reference on the page. 2431da177e4SLinus Torvalds */ 2441da177e4SLinus Torvalds void delete_from_swap_cache(struct page *page) 2451da177e4SLinus Torvalds { 2464e17ec25SMatthew Wilcox swp_entry_t entry = { .val = page_private(page) }; 2474e17ec25SMatthew Wilcox struct address_space *address_space = swap_address_space(entry); 2481da177e4SLinus Torvalds 249b93b0163SMatthew Wilcox xa_lock_irq(&address_space->i_pages); 2504e17ec25SMatthew Wilcox __delete_from_swap_cache(page, entry); 251b93b0163SMatthew Wilcox xa_unlock_irq(&address_space->i_pages); 2521da177e4SLinus Torvalds 25375f6d6d2SMinchan Kim put_swap_page(page, entry); 25438d8b4e6SHuang Ying page_ref_sub(page, hpage_nr_pages(page)); 2551da177e4SLinus Torvalds } 2561da177e4SLinus Torvalds 2571da177e4SLinus Torvalds /* 2581da177e4SLinus Torvalds * If we are the only user, then try to free up the swap cache. 2591da177e4SLinus Torvalds * 2601da177e4SLinus Torvalds * Its ok to check for PageSwapCache without the page lock 2611da177e4SLinus Torvalds * here because we are going to recheck again inside 262a2c43eedSHugh Dickins * try_to_free_swap() _with_ the lock. 2631da177e4SLinus Torvalds * - Marcelo 2641da177e4SLinus Torvalds */ 2651da177e4SLinus Torvalds static inline void free_swap_cache(struct page *page) 2661da177e4SLinus Torvalds { 267a2c43eedSHugh Dickins if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) { 268a2c43eedSHugh Dickins try_to_free_swap(page); 2691da177e4SLinus Torvalds unlock_page(page); 2701da177e4SLinus Torvalds } 2711da177e4SLinus Torvalds } 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds /* 2741da177e4SLinus Torvalds * Perform a free_page(), also freeing any swap cache associated with 275b8072f09SHugh Dickins * this page if it is the last user of the page. 2761da177e4SLinus Torvalds */ 2771da177e4SLinus Torvalds void free_page_and_swap_cache(struct page *page) 2781da177e4SLinus Torvalds { 2791da177e4SLinus Torvalds free_swap_cache(page); 2806fcb52a5SAaron Lu if (!is_huge_zero_page(page)) 28109cbfeafSKirill A. Shutemov put_page(page); 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds /* 2851da177e4SLinus Torvalds * Passed an array of pages, drop them all from swapcache and then release 2861da177e4SLinus Torvalds * them. They are removed from the LRU and freed if this is their last use. 2871da177e4SLinus Torvalds */ 2881da177e4SLinus Torvalds void free_pages_and_swap_cache(struct page **pages, int nr) 2891da177e4SLinus Torvalds { 2901da177e4SLinus Torvalds struct page **pagep = pages; 2911da177e4SLinus Torvalds int i; 2921da177e4SLinus Torvalds 293aabfb572SMichal Hocko lru_add_drain(); 294aabfb572SMichal Hocko for (i = 0; i < nr; i++) 2951da177e4SLinus Torvalds free_swap_cache(pagep[i]); 296c6f92f9fSMel Gorman release_pages(pagep, nr); 2971da177e4SLinus Torvalds } 2981da177e4SLinus Torvalds 299e9e9b7ecSMinchan Kim static inline bool swap_use_vma_readahead(void) 300e9e9b7ecSMinchan Kim { 301e9e9b7ecSMinchan Kim return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); 302e9e9b7ecSMinchan Kim } 303e9e9b7ecSMinchan Kim 3041da177e4SLinus Torvalds /* 3051da177e4SLinus Torvalds * Lookup a swap entry in the swap cache. A found page will be returned 3061da177e4SLinus Torvalds * unlocked and with its refcount incremented - we rely on the kernel 3071da177e4SLinus Torvalds * lock getting page table operations atomic even if we drop the page 3081da177e4SLinus Torvalds * lock before returning. 3091da177e4SLinus Torvalds */ 310ec560175SHuang Ying struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, 311ec560175SHuang Ying unsigned long addr) 3121da177e4SLinus Torvalds { 3131da177e4SLinus Torvalds struct page *page; 314eb085574SHuang Ying struct swap_info_struct *si; 3151da177e4SLinus Torvalds 316eb085574SHuang Ying si = get_swap_device(entry); 317eb085574SHuang Ying if (!si) 318eb085574SHuang Ying return NULL; 319f6ab1f7fSHuang Ying page = find_get_page(swap_address_space(entry), swp_offset(entry)); 320eb085574SHuang Ying put_swap_device(si); 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds INC_CACHE_INFO(find_total); 323ec560175SHuang Ying if (page) { 324eaf649ebSMinchan Kim bool vma_ra = swap_use_vma_readahead(); 325eaf649ebSMinchan Kim bool readahead; 326eaf649ebSMinchan Kim 327ec560175SHuang Ying INC_CACHE_INFO(find_success); 328eaf649ebSMinchan Kim /* 329eaf649ebSMinchan Kim * At the moment, we don't support PG_readahead for anon THP 330eaf649ebSMinchan Kim * so let's bail out rather than confusing the readahead stat. 331eaf649ebSMinchan Kim */ 332ec560175SHuang Ying if (unlikely(PageTransCompound(page))) 333ec560175SHuang Ying return page; 334eaf649ebSMinchan Kim 335ec560175SHuang Ying readahead = TestClearPageReadahead(page); 336eaf649ebSMinchan Kim if (vma && vma_ra) { 337eaf649ebSMinchan Kim unsigned long ra_val; 338eaf649ebSMinchan Kim int win, hits; 339eaf649ebSMinchan Kim 340eaf649ebSMinchan Kim ra_val = GET_SWAP_RA_VAL(vma); 341eaf649ebSMinchan Kim win = SWAP_RA_WIN(ra_val); 342eaf649ebSMinchan Kim hits = SWAP_RA_HITS(ra_val); 343ec560175SHuang Ying if (readahead) 344ec560175SHuang Ying hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); 345ec560175SHuang Ying atomic_long_set(&vma->swap_readahead_info, 346ec560175SHuang Ying SWAP_RA_VAL(addr, win, hits)); 347ec560175SHuang Ying } 348eaf649ebSMinchan Kim 349ec560175SHuang Ying if (readahead) { 350ec560175SHuang Ying count_vm_event(SWAP_RA_HIT); 351eaf649ebSMinchan Kim if (!vma || !vma_ra) 352ec560175SHuang Ying atomic_inc(&swapin_readahead_hits); 353ec560175SHuang Ying } 354ec560175SHuang Ying } 355eaf649ebSMinchan Kim 3561da177e4SLinus Torvalds return page; 3571da177e4SLinus Torvalds } 3581da177e4SLinus Torvalds 3595b999aadSDmitry Safonov struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 3605b999aadSDmitry Safonov struct vm_area_struct *vma, unsigned long addr, 3615b999aadSDmitry Safonov bool *new_page_allocated) 3621da177e4SLinus Torvalds { 363eb085574SHuang Ying struct page *found_page = NULL, *new_page = NULL; 364eb085574SHuang Ying struct swap_info_struct *si; 3651da177e4SLinus Torvalds int err; 3665b999aadSDmitry Safonov *new_page_allocated = false; 3671da177e4SLinus Torvalds 3681da177e4SLinus Torvalds do { 3691da177e4SLinus Torvalds /* 3701da177e4SLinus Torvalds * First check the swap cache. Since this is normally 3711da177e4SLinus Torvalds * called after lookup_swap_cache() failed, re-calling 3721da177e4SLinus Torvalds * that would confuse statistics. 3731da177e4SLinus Torvalds */ 374eb085574SHuang Ying si = get_swap_device(entry); 375eb085574SHuang Ying if (!si) 376eb085574SHuang Ying break; 377eb085574SHuang Ying found_page = find_get_page(swap_address_space(entry), 378eb085574SHuang Ying swp_offset(entry)); 379eb085574SHuang Ying put_swap_device(si); 3801da177e4SLinus Torvalds if (found_page) 3811da177e4SLinus Torvalds break; 3821da177e4SLinus Torvalds 383ba81f838SHuang Ying /* 384ba81f838SHuang Ying * Just skip read ahead for unused swap slot. 385ba81f838SHuang Ying * During swap_off when swap_slot_cache is disabled, 386ba81f838SHuang Ying * we have to handle the race between putting 387ba81f838SHuang Ying * swap entry in swap cache and marking swap slot 388ba81f838SHuang Ying * as SWAP_HAS_CACHE. That's done in later part of code or 389ba81f838SHuang Ying * else swap_off will be aborted if we return NULL. 390ba81f838SHuang Ying */ 391ba81f838SHuang Ying if (!__swp_swapcount(entry) && swap_slot_cache_enabled) 392ba81f838SHuang Ying break; 393e8c26ab6STim Chen 3941da177e4SLinus Torvalds /* 3951da177e4SLinus Torvalds * Get a new page to read into from swap. 3961da177e4SLinus Torvalds */ 3971da177e4SLinus Torvalds if (!new_page) { 39802098feaSHugh Dickins new_page = alloc_page_vma(gfp_mask, vma, addr); 3991da177e4SLinus Torvalds if (!new_page) 4001da177e4SLinus Torvalds break; /* Out of memory */ 4011da177e4SLinus Torvalds } 4021da177e4SLinus Torvalds 4031da177e4SLinus Torvalds /* 404f000944dSHugh Dickins * Swap entry may have been freed since our caller observed it. 405f000944dSHugh Dickins */ 406355cfa73SKAMEZAWA Hiroyuki err = swapcache_prepare(entry); 407cbab0e4eSRafael Aquini if (err == -EEXIST) { 408cbab0e4eSRafael Aquini /* 409cbab0e4eSRafael Aquini * We might race against get_swap_page() and stumble 410cbab0e4eSRafael Aquini * across a SWAP_HAS_CACHE swap_map entry whose page 4119c1cc2e4SHuang Ying * has not been brought into the swapcache yet. 412cbab0e4eSRafael Aquini */ 413cbab0e4eSRafael Aquini cond_resched(); 414355cfa73SKAMEZAWA Hiroyuki continue; 4158d93b41cSMatthew Wilcox } else if (err) /* swp entry is obsolete ? */ 416f000944dSHugh Dickins break; 417f000944dSHugh Dickins 4188d93b41cSMatthew Wilcox /* May fail (-ENOMEM) if XArray node allocation failed. */ 41948c935adSKirill A. Shutemov __SetPageLocked(new_page); 420fa9949daSHugh Dickins __SetPageSwapBacked(new_page); 4218d93b41cSMatthew Wilcox err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); 422529ae9aaSNick Piggin if (likely(!err)) { 4238d93b41cSMatthew Wilcox /* Initiate read into locked page */ 4241899ad18SJohannes Weiner SetPageWorkingset(new_page); 425c5fdae46SRik van Riel lru_cache_add_anon(new_page); 4265b999aadSDmitry Safonov *new_page_allocated = true; 4271da177e4SLinus Torvalds return new_page; 4281da177e4SLinus Torvalds } 42948c935adSKirill A. Shutemov __ClearPageLocked(new_page); 4302ca4532aSDaisuke Nishimura /* 4312ca4532aSDaisuke Nishimura * add_to_swap_cache() doesn't return -EEXIST, so we can safely 4322ca4532aSDaisuke Nishimura * clear SWAP_HAS_CACHE flag. 4332ca4532aSDaisuke Nishimura */ 43475f6d6d2SMinchan Kim put_swap_page(new_page, entry); 435f000944dSHugh Dickins } while (err != -ENOMEM); 4361da177e4SLinus Torvalds 4371da177e4SLinus Torvalds if (new_page) 43809cbfeafSKirill A. Shutemov put_page(new_page); 4391da177e4SLinus Torvalds return found_page; 4401da177e4SLinus Torvalds } 44146017e95SHugh Dickins 4425b999aadSDmitry Safonov /* 4435b999aadSDmitry Safonov * Locate a page of swap in physical memory, reserving swap cache space 4445b999aadSDmitry Safonov * and reading the disk if it is not already cached. 4455b999aadSDmitry Safonov * A failure return means that either the page allocation failed or that 4465b999aadSDmitry Safonov * the swap entry is no longer in use. 4475b999aadSDmitry Safonov */ 4485b999aadSDmitry Safonov struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 44923955622SShaohua Li struct vm_area_struct *vma, unsigned long addr, bool do_poll) 4505b999aadSDmitry Safonov { 4515b999aadSDmitry Safonov bool page_was_allocated; 4525b999aadSDmitry Safonov struct page *retpage = __read_swap_cache_async(entry, gfp_mask, 4535b999aadSDmitry Safonov vma, addr, &page_was_allocated); 4545b999aadSDmitry Safonov 4555b999aadSDmitry Safonov if (page_was_allocated) 45623955622SShaohua Li swap_readpage(retpage, do_poll); 4575b999aadSDmitry Safonov 4585b999aadSDmitry Safonov return retpage; 4595b999aadSDmitry Safonov } 4605b999aadSDmitry Safonov 461ec560175SHuang Ying static unsigned int __swapin_nr_pages(unsigned long prev_offset, 462ec560175SHuang Ying unsigned long offset, 463ec560175SHuang Ying int hits, 464ec560175SHuang Ying int max_pages, 465ec560175SHuang Ying int prev_win) 466579f8290SShaohua Li { 467ec560175SHuang Ying unsigned int pages, last_ra; 468579f8290SShaohua Li 469579f8290SShaohua Li /* 470579f8290SShaohua Li * This heuristic has been found to work well on both sequential and 471579f8290SShaohua Li * random loads, swapping to hard disk or to SSD: please don't ask 472579f8290SShaohua Li * what the "+ 2" means, it just happens to work well, that's all. 473579f8290SShaohua Li */ 474ec560175SHuang Ying pages = hits + 2; 475579f8290SShaohua Li if (pages == 2) { 476579f8290SShaohua Li /* 477579f8290SShaohua Li * We can have no readahead hits to judge by: but must not get 478579f8290SShaohua Li * stuck here forever, so check for an adjacent offset instead 479579f8290SShaohua Li * (and don't even bother to check whether swap type is same). 480579f8290SShaohua Li */ 481579f8290SShaohua Li if (offset != prev_offset + 1 && offset != prev_offset - 1) 482579f8290SShaohua Li pages = 1; 483579f8290SShaohua Li } else { 484579f8290SShaohua Li unsigned int roundup = 4; 485579f8290SShaohua Li while (roundup < pages) 486579f8290SShaohua Li roundup <<= 1; 487579f8290SShaohua Li pages = roundup; 488579f8290SShaohua Li } 489579f8290SShaohua Li 490579f8290SShaohua Li if (pages > max_pages) 491579f8290SShaohua Li pages = max_pages; 492579f8290SShaohua Li 493579f8290SShaohua Li /* Don't shrink readahead too fast */ 494ec560175SHuang Ying last_ra = prev_win / 2; 495579f8290SShaohua Li if (pages < last_ra) 496579f8290SShaohua Li pages = last_ra; 497ec560175SHuang Ying 498ec560175SHuang Ying return pages; 499ec560175SHuang Ying } 500ec560175SHuang Ying 501ec560175SHuang Ying static unsigned long swapin_nr_pages(unsigned long offset) 502ec560175SHuang Ying { 503ec560175SHuang Ying static unsigned long prev_offset; 504ec560175SHuang Ying unsigned int hits, pages, max_pages; 505ec560175SHuang Ying static atomic_t last_readahead_pages; 506ec560175SHuang Ying 507ec560175SHuang Ying max_pages = 1 << READ_ONCE(page_cluster); 508ec560175SHuang Ying if (max_pages <= 1) 509ec560175SHuang Ying return 1; 510ec560175SHuang Ying 511ec560175SHuang Ying hits = atomic_xchg(&swapin_readahead_hits, 0); 512*d6c1f098SQian Cai pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits, 513*d6c1f098SQian Cai max_pages, 514ec560175SHuang Ying atomic_read(&last_readahead_pages)); 515ec560175SHuang Ying if (!hits) 516*d6c1f098SQian Cai WRITE_ONCE(prev_offset, offset); 517579f8290SShaohua Li atomic_set(&last_readahead_pages, pages); 518579f8290SShaohua Li 519579f8290SShaohua Li return pages; 520579f8290SShaohua Li } 521579f8290SShaohua Li 52246017e95SHugh Dickins /** 523e9e9b7ecSMinchan Kim * swap_cluster_readahead - swap in pages in hope we need them soon 52446017e95SHugh Dickins * @entry: swap entry of this memory 5257682486bSRandy Dunlap * @gfp_mask: memory allocation flags 526e9e9b7ecSMinchan Kim * @vmf: fault information 52746017e95SHugh Dickins * 52846017e95SHugh Dickins * Returns the struct page for entry and addr, after queueing swapin. 52946017e95SHugh Dickins * 53046017e95SHugh Dickins * Primitive swap readahead code. We simply read an aligned block of 53146017e95SHugh Dickins * (1 << page_cluster) entries in the swap area. This method is chosen 53246017e95SHugh Dickins * because it doesn't cost us any seek time. We also make sure to queue 53346017e95SHugh Dickins * the 'original' request together with the readahead ones... 53446017e95SHugh Dickins * 53546017e95SHugh Dickins * This has been extended to use the NUMA policies from the mm triggering 53646017e95SHugh Dickins * the readahead. 53746017e95SHugh Dickins * 538e9f59873SYang Shi * Caller must hold read mmap_sem if vmf->vma is not NULL. 53946017e95SHugh Dickins */ 540e9e9b7ecSMinchan Kim struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, 541e9e9b7ecSMinchan Kim struct vm_fault *vmf) 54246017e95SHugh Dickins { 54346017e95SHugh Dickins struct page *page; 544579f8290SShaohua Li unsigned long entry_offset = swp_offset(entry); 545579f8290SShaohua Li unsigned long offset = entry_offset; 54667f96aa2SRik van Riel unsigned long start_offset, end_offset; 547579f8290SShaohua Li unsigned long mask; 548e9a6effaSHuang Ying struct swap_info_struct *si = swp_swap_info(entry); 5493fb5c298SChristian Ehrhardt struct blk_plug plug; 550c4fa6309SHuang Ying bool do_poll = true, page_allocated; 551e9e9b7ecSMinchan Kim struct vm_area_struct *vma = vmf->vma; 552e9e9b7ecSMinchan Kim unsigned long addr = vmf->address; 55346017e95SHugh Dickins 554579f8290SShaohua Li mask = swapin_nr_pages(offset) - 1; 555579f8290SShaohua Li if (!mask) 556579f8290SShaohua Li goto skip; 557579f8290SShaohua Li 5588fd2e0b5SYang Shi /* Test swap type to make sure the dereference is safe */ 5598fd2e0b5SYang Shi if (likely(si->flags & (SWP_BLKDEV | SWP_FS))) { 5608fd2e0b5SYang Shi struct inode *inode = si->swap_file->f_mapping->host; 5618fd2e0b5SYang Shi if (inode_read_congested(inode)) 5628fd2e0b5SYang Shi goto skip; 5638fd2e0b5SYang Shi } 5648fd2e0b5SYang Shi 56523955622SShaohua Li do_poll = false; 56667f96aa2SRik van Riel /* Read a page_cluster sized and aligned cluster around offset. */ 56767f96aa2SRik van Riel start_offset = offset & ~mask; 56867f96aa2SRik van Riel end_offset = offset | mask; 56967f96aa2SRik van Riel if (!start_offset) /* First page is swap header. */ 57067f96aa2SRik van Riel start_offset++; 571e9a6effaSHuang Ying if (end_offset >= si->max) 572e9a6effaSHuang Ying end_offset = si->max - 1; 57367f96aa2SRik van Riel 5743fb5c298SChristian Ehrhardt blk_start_plug(&plug); 57567f96aa2SRik van Riel for (offset = start_offset; offset <= end_offset ; offset++) { 57646017e95SHugh Dickins /* Ok, do the async read-ahead now */ 577c4fa6309SHuang Ying page = __read_swap_cache_async( 578c4fa6309SHuang Ying swp_entry(swp_type(entry), offset), 579c4fa6309SHuang Ying gfp_mask, vma, addr, &page_allocated); 58046017e95SHugh Dickins if (!page) 58167f96aa2SRik van Riel continue; 582c4fa6309SHuang Ying if (page_allocated) { 583c4fa6309SHuang Ying swap_readpage(page, false); 584eaf649ebSMinchan Kim if (offset != entry_offset) { 585579f8290SShaohua Li SetPageReadahead(page); 586cbc65df2SHuang Ying count_vm_event(SWAP_RA); 587cbc65df2SHuang Ying } 588c4fa6309SHuang Ying } 58909cbfeafSKirill A. Shutemov put_page(page); 59046017e95SHugh Dickins } 5913fb5c298SChristian Ehrhardt blk_finish_plug(&plug); 5923fb5c298SChristian Ehrhardt 59346017e95SHugh Dickins lru_add_drain(); /* Push any new pages onto the LRU now */ 594579f8290SShaohua Li skip: 59523955622SShaohua Li return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll); 59646017e95SHugh Dickins } 5974b3ef9daSHuang, Ying 5984b3ef9daSHuang, Ying int init_swap_address_space(unsigned int type, unsigned long nr_pages) 5994b3ef9daSHuang, Ying { 6004b3ef9daSHuang, Ying struct address_space *spaces, *space; 6014b3ef9daSHuang, Ying unsigned int i, nr; 6024b3ef9daSHuang, Ying 6034b3ef9daSHuang, Ying nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); 604778e1cddSKees Cook spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL); 6054b3ef9daSHuang, Ying if (!spaces) 6064b3ef9daSHuang, Ying return -ENOMEM; 6074b3ef9daSHuang, Ying for (i = 0; i < nr; i++) { 6084b3ef9daSHuang, Ying space = spaces + i; 609a2833486SMatthew Wilcox xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ); 6104b3ef9daSHuang, Ying atomic_set(&space->i_mmap_writable, 0); 6114b3ef9daSHuang, Ying space->a_ops = &swap_aops; 6124b3ef9daSHuang, Ying /* swap cache doesn't use writeback related tags */ 6134b3ef9daSHuang, Ying mapping_set_no_writeback_tags(space); 6144b3ef9daSHuang, Ying } 6154b3ef9daSHuang, Ying nr_swapper_spaces[type] = nr; 616054f1d1fSHuang Ying swapper_spaces[type] = spaces; 6174b3ef9daSHuang, Ying 6184b3ef9daSHuang, Ying return 0; 6194b3ef9daSHuang, Ying } 6204b3ef9daSHuang, Ying 6214b3ef9daSHuang, Ying void exit_swap_address_space(unsigned int type) 6224b3ef9daSHuang, Ying { 623054f1d1fSHuang Ying kvfree(swapper_spaces[type]); 6244b3ef9daSHuang, Ying nr_swapper_spaces[type] = 0; 625054f1d1fSHuang Ying swapper_spaces[type] = NULL; 6264b3ef9daSHuang, Ying } 627ec560175SHuang Ying 628ec560175SHuang Ying static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, 629ec560175SHuang Ying unsigned long faddr, 630ec560175SHuang Ying unsigned long lpfn, 631ec560175SHuang Ying unsigned long rpfn, 632ec560175SHuang Ying unsigned long *start, 633ec560175SHuang Ying unsigned long *end) 634ec560175SHuang Ying { 635ec560175SHuang Ying *start = max3(lpfn, PFN_DOWN(vma->vm_start), 636ec560175SHuang Ying PFN_DOWN(faddr & PMD_MASK)); 637ec560175SHuang Ying *end = min3(rpfn, PFN_DOWN(vma->vm_end), 638ec560175SHuang Ying PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); 639ec560175SHuang Ying } 640ec560175SHuang Ying 641eaf649ebSMinchan Kim static void swap_ra_info(struct vm_fault *vmf, 642eaf649ebSMinchan Kim struct vma_swap_readahead *ra_info) 643ec560175SHuang Ying { 644ec560175SHuang Ying struct vm_area_struct *vma = vmf->vma; 645eaf649ebSMinchan Kim unsigned long ra_val; 646ec560175SHuang Ying swp_entry_t entry; 647ec560175SHuang Ying unsigned long faddr, pfn, fpfn; 648ec560175SHuang Ying unsigned long start, end; 649eaf649ebSMinchan Kim pte_t *pte, *orig_pte; 650ec560175SHuang Ying unsigned int max_win, hits, prev_win, win, left; 651ec560175SHuang Ying #ifndef CONFIG_64BIT 652ec560175SHuang Ying pte_t *tpte; 653ec560175SHuang Ying #endif 654ec560175SHuang Ying 65561b63972SHuang Ying max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), 65661b63972SHuang Ying SWAP_RA_ORDER_CEILING); 65761b63972SHuang Ying if (max_win == 1) { 658eaf649ebSMinchan Kim ra_info->win = 1; 659eaf649ebSMinchan Kim return; 66061b63972SHuang Ying } 66161b63972SHuang Ying 662ec560175SHuang Ying faddr = vmf->address; 663eaf649ebSMinchan Kim orig_pte = pte = pte_offset_map(vmf->pmd, faddr); 664eaf649ebSMinchan Kim entry = pte_to_swp_entry(*pte); 665eaf649ebSMinchan Kim if ((unlikely(non_swap_entry(entry)))) { 666eaf649ebSMinchan Kim pte_unmap(orig_pte); 667eaf649ebSMinchan Kim return; 668eaf649ebSMinchan Kim } 669ec560175SHuang Ying 670ec560175SHuang Ying fpfn = PFN_DOWN(faddr); 671eaf649ebSMinchan Kim ra_val = GET_SWAP_RA_VAL(vma); 672eaf649ebSMinchan Kim pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); 673eaf649ebSMinchan Kim prev_win = SWAP_RA_WIN(ra_val); 674eaf649ebSMinchan Kim hits = SWAP_RA_HITS(ra_val); 675eaf649ebSMinchan Kim ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, 676ec560175SHuang Ying max_win, prev_win); 677ec560175SHuang Ying atomic_long_set(&vma->swap_readahead_info, 678ec560175SHuang Ying SWAP_RA_VAL(faddr, win, 0)); 679ec560175SHuang Ying 680eaf649ebSMinchan Kim if (win == 1) { 681eaf649ebSMinchan Kim pte_unmap(orig_pte); 682eaf649ebSMinchan Kim return; 683eaf649ebSMinchan Kim } 684ec560175SHuang Ying 685ec560175SHuang Ying /* Copy the PTEs because the page table may be unmapped */ 686ec560175SHuang Ying if (fpfn == pfn + 1) 687ec560175SHuang Ying swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end); 688ec560175SHuang Ying else if (pfn == fpfn + 1) 689ec560175SHuang Ying swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1, 690ec560175SHuang Ying &start, &end); 691ec560175SHuang Ying else { 692ec560175SHuang Ying left = (win - 1) / 2; 693ec560175SHuang Ying swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, 694ec560175SHuang Ying &start, &end); 695ec560175SHuang Ying } 696eaf649ebSMinchan Kim ra_info->nr_pte = end - start; 697eaf649ebSMinchan Kim ra_info->offset = fpfn - start; 698eaf649ebSMinchan Kim pte -= ra_info->offset; 699ec560175SHuang Ying #ifdef CONFIG_64BIT 700eaf649ebSMinchan Kim ra_info->ptes = pte; 701ec560175SHuang Ying #else 702eaf649ebSMinchan Kim tpte = ra_info->ptes; 703ec560175SHuang Ying for (pfn = start; pfn != end; pfn++) 704ec560175SHuang Ying *tpte++ = *pte++; 705ec560175SHuang Ying #endif 706eaf649ebSMinchan Kim pte_unmap(orig_pte); 707ec560175SHuang Ying } 708ec560175SHuang Ying 709e9f59873SYang Shi /** 710e9f59873SYang Shi * swap_vma_readahead - swap in pages in hope we need them soon 711e9f59873SYang Shi * @entry: swap entry of this memory 712e9f59873SYang Shi * @gfp_mask: memory allocation flags 713e9f59873SYang Shi * @vmf: fault information 714e9f59873SYang Shi * 715e9f59873SYang Shi * Returns the struct page for entry and addr, after queueing swapin. 716e9f59873SYang Shi * 717e9f59873SYang Shi * Primitive swap readahead code. We simply read in a few pages whoes 718e9f59873SYang Shi * virtual addresses are around the fault address in the same vma. 719e9f59873SYang Shi * 720e9f59873SYang Shi * Caller must hold read mmap_sem if vmf->vma is not NULL. 721e9f59873SYang Shi * 722e9f59873SYang Shi */ 723f5c754d6SColin Ian King static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask, 724eaf649ebSMinchan Kim struct vm_fault *vmf) 725ec560175SHuang Ying { 726ec560175SHuang Ying struct blk_plug plug; 727ec560175SHuang Ying struct vm_area_struct *vma = vmf->vma; 728ec560175SHuang Ying struct page *page; 729ec560175SHuang Ying pte_t *pte, pentry; 730ec560175SHuang Ying swp_entry_t entry; 731ec560175SHuang Ying unsigned int i; 732ec560175SHuang Ying bool page_allocated; 733eaf649ebSMinchan Kim struct vma_swap_readahead ra_info = {0,}; 734ec560175SHuang Ying 735eaf649ebSMinchan Kim swap_ra_info(vmf, &ra_info); 736eaf649ebSMinchan Kim if (ra_info.win == 1) 737ec560175SHuang Ying goto skip; 738ec560175SHuang Ying 739ec560175SHuang Ying blk_start_plug(&plug); 740eaf649ebSMinchan Kim for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte; 741ec560175SHuang Ying i++, pte++) { 742ec560175SHuang Ying pentry = *pte; 743ec560175SHuang Ying if (pte_none(pentry)) 744ec560175SHuang Ying continue; 745ec560175SHuang Ying if (pte_present(pentry)) 746ec560175SHuang Ying continue; 747ec560175SHuang Ying entry = pte_to_swp_entry(pentry); 748ec560175SHuang Ying if (unlikely(non_swap_entry(entry))) 749ec560175SHuang Ying continue; 750ec560175SHuang Ying page = __read_swap_cache_async(entry, gfp_mask, vma, 751ec560175SHuang Ying vmf->address, &page_allocated); 752ec560175SHuang Ying if (!page) 753ec560175SHuang Ying continue; 754ec560175SHuang Ying if (page_allocated) { 755ec560175SHuang Ying swap_readpage(page, false); 756eaf649ebSMinchan Kim if (i != ra_info.offset) { 757ec560175SHuang Ying SetPageReadahead(page); 758ec560175SHuang Ying count_vm_event(SWAP_RA); 759ec560175SHuang Ying } 760ec560175SHuang Ying } 761ec560175SHuang Ying put_page(page); 762ec560175SHuang Ying } 763ec560175SHuang Ying blk_finish_plug(&plug); 764ec560175SHuang Ying lru_add_drain(); 765ec560175SHuang Ying skip: 766ec560175SHuang Ying return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, 767eaf649ebSMinchan Kim ra_info.win == 1); 768ec560175SHuang Ying } 769d9bfcfdcSHuang Ying 770e9e9b7ecSMinchan Kim /** 771e9e9b7ecSMinchan Kim * swapin_readahead - swap in pages in hope we need them soon 772e9e9b7ecSMinchan Kim * @entry: swap entry of this memory 773e9e9b7ecSMinchan Kim * @gfp_mask: memory allocation flags 774e9e9b7ecSMinchan Kim * @vmf: fault information 775e9e9b7ecSMinchan Kim * 776e9e9b7ecSMinchan Kim * Returns the struct page for entry and addr, after queueing swapin. 777e9e9b7ecSMinchan Kim * 778e9e9b7ecSMinchan Kim * It's a main entry function for swap readahead. By the configuration, 779e9e9b7ecSMinchan Kim * it will read ahead blocks by cluster-based(ie, physical disk based) 780e9e9b7ecSMinchan Kim * or vma-based(ie, virtual address based on faulty address) readahead. 781e9e9b7ecSMinchan Kim */ 782e9e9b7ecSMinchan Kim struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, 783e9e9b7ecSMinchan Kim struct vm_fault *vmf) 784e9e9b7ecSMinchan Kim { 785e9e9b7ecSMinchan Kim return swap_use_vma_readahead() ? 786e9e9b7ecSMinchan Kim swap_vma_readahead(entry, gfp_mask, vmf) : 787e9e9b7ecSMinchan Kim swap_cluster_readahead(entry, gfp_mask, vmf); 788e9e9b7ecSMinchan Kim } 789e9e9b7ecSMinchan Kim 790d9bfcfdcSHuang Ying #ifdef CONFIG_SYSFS 791d9bfcfdcSHuang Ying static ssize_t vma_ra_enabled_show(struct kobject *kobj, 792d9bfcfdcSHuang Ying struct kobj_attribute *attr, char *buf) 793d9bfcfdcSHuang Ying { 794e9e9b7ecSMinchan Kim return sprintf(buf, "%s\n", enable_vma_readahead ? "true" : "false"); 795d9bfcfdcSHuang Ying } 796d9bfcfdcSHuang Ying static ssize_t vma_ra_enabled_store(struct kobject *kobj, 797d9bfcfdcSHuang Ying struct kobj_attribute *attr, 798d9bfcfdcSHuang Ying const char *buf, size_t count) 799d9bfcfdcSHuang Ying { 800d9bfcfdcSHuang Ying if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) 801e9e9b7ecSMinchan Kim enable_vma_readahead = true; 802d9bfcfdcSHuang Ying else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) 803e9e9b7ecSMinchan Kim enable_vma_readahead = false; 804d9bfcfdcSHuang Ying else 805d9bfcfdcSHuang Ying return -EINVAL; 806d9bfcfdcSHuang Ying 807d9bfcfdcSHuang Ying return count; 808d9bfcfdcSHuang Ying } 809d9bfcfdcSHuang Ying static struct kobj_attribute vma_ra_enabled_attr = 810d9bfcfdcSHuang Ying __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, 811d9bfcfdcSHuang Ying vma_ra_enabled_store); 812d9bfcfdcSHuang Ying 813d9bfcfdcSHuang Ying static struct attribute *swap_attrs[] = { 814d9bfcfdcSHuang Ying &vma_ra_enabled_attr.attr, 815d9bfcfdcSHuang Ying NULL, 816d9bfcfdcSHuang Ying }; 817d9bfcfdcSHuang Ying 818d9bfcfdcSHuang Ying static struct attribute_group swap_attr_group = { 819d9bfcfdcSHuang Ying .attrs = swap_attrs, 820d9bfcfdcSHuang Ying }; 821d9bfcfdcSHuang Ying 822d9bfcfdcSHuang Ying static int __init swap_init_sysfs(void) 823d9bfcfdcSHuang Ying { 824d9bfcfdcSHuang Ying int err; 825d9bfcfdcSHuang Ying struct kobject *swap_kobj; 826d9bfcfdcSHuang Ying 827d9bfcfdcSHuang Ying swap_kobj = kobject_create_and_add("swap", mm_kobj); 828d9bfcfdcSHuang Ying if (!swap_kobj) { 829d9bfcfdcSHuang Ying pr_err("failed to create swap kobject\n"); 830d9bfcfdcSHuang Ying return -ENOMEM; 831d9bfcfdcSHuang Ying } 832d9bfcfdcSHuang Ying err = sysfs_create_group(swap_kobj, &swap_attr_group); 833d9bfcfdcSHuang Ying if (err) { 834d9bfcfdcSHuang Ying pr_err("failed to register swap group\n"); 835d9bfcfdcSHuang Ying goto delete_obj; 836d9bfcfdcSHuang Ying } 837d9bfcfdcSHuang Ying return 0; 838d9bfcfdcSHuang Ying 839d9bfcfdcSHuang Ying delete_obj: 840d9bfcfdcSHuang Ying kobject_put(swap_kobj); 841d9bfcfdcSHuang Ying return err; 842d9bfcfdcSHuang Ying } 843d9bfcfdcSHuang Ying subsys_initcall(swap_init_sysfs); 844d9bfcfdcSHuang Ying #endif 845