1 /* 2 * linux/mm/swap_state.c 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 5 * Swap reorganised 29.12.95, Stephen Tweedie 6 * 7 * Rewritten to use page cache, (C) 1998 Stephen Tweedie 8 */ 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/kernel_stat.h> 12 #include <linux/swap.h> 13 #include <linux/init.h> 14 #include <linux/pagemap.h> 15 #include <linux/buffer_head.h> 16 #include <linux/backing-dev.h> 17 18 #include <asm/pgtable.h> 19 20 /* 21 * swapper_space is a fiction, retained to simplify the path through 22 * vmscan's shrink_list, to make sync_page look nicer, and to allow 23 * future use of radix_tree tags in the swap cache. 24 */ 25 static struct address_space_operations swap_aops = { 26 .writepage = swap_writepage, 27 .sync_page = block_sync_page, 28 .set_page_dirty = __set_page_dirty_nobuffers, 29 }; 30 31 static struct backing_dev_info swap_backing_dev_info = { 32 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 33 .unplug_io_fn = swap_unplug_io_fn, 34 }; 35 36 struct address_space swapper_space = { 37 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), 38 .tree_lock = RW_LOCK_UNLOCKED, 39 .a_ops = &swap_aops, 40 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 41 .backing_dev_info = &swap_backing_dev_info, 42 }; 43 44 #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 45 46 static struct { 47 unsigned long add_total; 48 unsigned long del_total; 49 unsigned long find_success; 50 unsigned long find_total; 51 unsigned long noent_race; 52 unsigned long exist_race; 53 } swap_cache_info; 54 55 void show_swap_cache_info(void) 56 { 57 printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", 58 swap_cache_info.add_total, swap_cache_info.del_total, 59 swap_cache_info.find_success, swap_cache_info.find_total, 60 swap_cache_info.noent_race, swap_cache_info.exist_race); 61 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 62 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 63 } 64 65 /* 66 * __add_to_swap_cache resembles add_to_page_cache on swapper_space, 67 * but sets SwapCache flag and private instead of mapping and index. 68 */ 69 static int __add_to_swap_cache(struct page *page, swp_entry_t entry, 70 gfp_t gfp_mask) 71 { 72 int error; 73 74 BUG_ON(PageSwapCache(page)); 75 BUG_ON(PagePrivate(page)); 76 error = radix_tree_preload(gfp_mask); 77 if (!error) { 78 write_lock_irq(&swapper_space.tree_lock); 79 error = radix_tree_insert(&swapper_space.page_tree, 80 entry.val, page); 81 if (!error) { 82 page_cache_get(page); 83 SetPageLocked(page); 84 SetPageSwapCache(page); 85 set_page_private(page, entry.val); 86 total_swapcache_pages++; 87 pagecache_acct(1); 88 } 89 write_unlock_irq(&swapper_space.tree_lock); 90 radix_tree_preload_end(); 91 } 92 return error; 93 } 94 95 static int add_to_swap_cache(struct page *page, swp_entry_t entry) 96 { 97 int error; 98 99 if (!swap_duplicate(entry)) { 100 INC_CACHE_INFO(noent_race); 101 return -ENOENT; 102 } 103 error = __add_to_swap_cache(page, entry, GFP_KERNEL); 104 /* 105 * Anon pages are already on the LRU, we don't run lru_cache_add here. 106 */ 107 if (error) { 108 swap_free(entry); 109 if (error == -EEXIST) 110 INC_CACHE_INFO(exist_race); 111 return error; 112 } 113 INC_CACHE_INFO(add_total); 114 return 0; 115 } 116 117 /* 118 * This must be called only on pages that have 119 * been verified to be in the swap cache. 120 */ 121 void __delete_from_swap_cache(struct page *page) 122 { 123 BUG_ON(!PageLocked(page)); 124 BUG_ON(!PageSwapCache(page)); 125 BUG_ON(PageWriteback(page)); 126 BUG_ON(PagePrivate(page)); 127 128 radix_tree_delete(&swapper_space.page_tree, page_private(page)); 129 set_page_private(page, 0); 130 ClearPageSwapCache(page); 131 total_swapcache_pages--; 132 pagecache_acct(-1); 133 INC_CACHE_INFO(del_total); 134 } 135 136 /** 137 * add_to_swap - allocate swap space for a page 138 * @page: page we want to move to swap 139 * 140 * Allocate swap space for the page and add the page to the 141 * swap cache. Caller needs to hold the page lock. 142 */ 143 int add_to_swap(struct page * page) 144 { 145 swp_entry_t entry; 146 int err; 147 148 if (!PageLocked(page)) 149 BUG(); 150 151 for (;;) { 152 entry = get_swap_page(); 153 if (!entry.val) 154 return 0; 155 156 /* 157 * Radix-tree node allocations from PF_MEMALLOC contexts could 158 * completely exhaust the page allocator. __GFP_NOMEMALLOC 159 * stops emergency reserves from being allocated. 160 * 161 * TODO: this could cause a theoretical memory reclaim 162 * deadlock in the swap out path. 163 */ 164 /* 165 * Add it to the swap cache and mark it dirty 166 */ 167 err = __add_to_swap_cache(page, entry, 168 GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); 169 170 switch (err) { 171 case 0: /* Success */ 172 SetPageUptodate(page); 173 SetPageDirty(page); 174 INC_CACHE_INFO(add_total); 175 return 1; 176 case -EEXIST: 177 /* Raced with "speculative" read_swap_cache_async */ 178 INC_CACHE_INFO(exist_race); 179 swap_free(entry); 180 continue; 181 default: 182 /* -ENOMEM radix-tree allocation failure */ 183 swap_free(entry); 184 return 0; 185 } 186 } 187 } 188 189 /* 190 * This must be called only on pages that have 191 * been verified to be in the swap cache and locked. 192 * It will never put the page into the free list, 193 * the caller has a reference on the page. 194 */ 195 void delete_from_swap_cache(struct page *page) 196 { 197 swp_entry_t entry; 198 199 entry.val = page_private(page); 200 201 write_lock_irq(&swapper_space.tree_lock); 202 __delete_from_swap_cache(page); 203 write_unlock_irq(&swapper_space.tree_lock); 204 205 swap_free(entry); 206 page_cache_release(page); 207 } 208 209 /* 210 * Strange swizzling function only for use by shmem_writepage 211 */ 212 int move_to_swap_cache(struct page *page, swp_entry_t entry) 213 { 214 int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); 215 if (!err) { 216 remove_from_page_cache(page); 217 page_cache_release(page); /* pagecache ref */ 218 if (!swap_duplicate(entry)) 219 BUG(); 220 SetPageDirty(page); 221 INC_CACHE_INFO(add_total); 222 } else if (err == -EEXIST) 223 INC_CACHE_INFO(exist_race); 224 return err; 225 } 226 227 /* 228 * Strange swizzling function for shmem_getpage (and shmem_unuse) 229 */ 230 int move_from_swap_cache(struct page *page, unsigned long index, 231 struct address_space *mapping) 232 { 233 int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); 234 if (!err) { 235 delete_from_swap_cache(page); 236 /* shift page from clean_pages to dirty_pages list */ 237 ClearPageDirty(page); 238 set_page_dirty(page); 239 } 240 return err; 241 } 242 243 /* 244 * If we are the only user, then try to free up the swap cache. 245 * 246 * Its ok to check for PageSwapCache without the page lock 247 * here because we are going to recheck again inside 248 * exclusive_swap_page() _with_ the lock. 249 * - Marcelo 250 */ 251 static inline void free_swap_cache(struct page *page) 252 { 253 if (PageSwapCache(page) && !TestSetPageLocked(page)) { 254 remove_exclusive_swap_page(page); 255 unlock_page(page); 256 } 257 } 258 259 /* 260 * Perform a free_page(), also freeing any swap cache associated with 261 * this page if it is the last user of the page. 262 */ 263 void free_page_and_swap_cache(struct page *page) 264 { 265 free_swap_cache(page); 266 page_cache_release(page); 267 } 268 269 /* 270 * Passed an array of pages, drop them all from swapcache and then release 271 * them. They are removed from the LRU and freed if this is their last use. 272 */ 273 void free_pages_and_swap_cache(struct page **pages, int nr) 274 { 275 int chunk = 16; 276 struct page **pagep = pages; 277 278 lru_add_drain(); 279 while (nr) { 280 int todo = min(chunk, nr); 281 int i; 282 283 for (i = 0; i < todo; i++) 284 free_swap_cache(pagep[i]); 285 release_pages(pagep, todo, 0); 286 pagep += todo; 287 nr -= todo; 288 } 289 } 290 291 /* 292 * Lookup a swap entry in the swap cache. A found page will be returned 293 * unlocked and with its refcount incremented - we rely on the kernel 294 * lock getting page table operations atomic even if we drop the page 295 * lock before returning. 296 */ 297 struct page * lookup_swap_cache(swp_entry_t entry) 298 { 299 struct page *page; 300 301 page = find_get_page(&swapper_space, entry.val); 302 303 if (page) 304 INC_CACHE_INFO(find_success); 305 306 INC_CACHE_INFO(find_total); 307 return page; 308 } 309 310 /* 311 * Locate a page of swap in physical memory, reserving swap cache space 312 * and reading the disk if it is not already cached. 313 * A failure return means that either the page allocation failed or that 314 * the swap entry is no longer in use. 315 */ 316 struct page *read_swap_cache_async(swp_entry_t entry, 317 struct vm_area_struct *vma, unsigned long addr) 318 { 319 struct page *found_page, *new_page = NULL; 320 int err; 321 322 do { 323 /* 324 * First check the swap cache. Since this is normally 325 * called after lookup_swap_cache() failed, re-calling 326 * that would confuse statistics. 327 */ 328 found_page = find_get_page(&swapper_space, entry.val); 329 if (found_page) 330 break; 331 332 /* 333 * Get a new page to read into from swap. 334 */ 335 if (!new_page) { 336 new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); 337 if (!new_page) 338 break; /* Out of memory */ 339 } 340 341 /* 342 * Associate the page with swap entry in the swap cache. 343 * May fail (-ENOENT) if swap entry has been freed since 344 * our caller observed it. May fail (-EEXIST) if there 345 * is already a page associated with this entry in the 346 * swap cache: added by a racing read_swap_cache_async, 347 * or by try_to_swap_out (or shmem_writepage) re-using 348 * the just freed swap entry for an existing page. 349 * May fail (-ENOMEM) if radix-tree node allocation failed. 350 */ 351 err = add_to_swap_cache(new_page, entry); 352 if (!err) { 353 /* 354 * Initiate read into locked page and return. 355 */ 356 lru_cache_add_active(new_page); 357 swap_readpage(NULL, new_page); 358 return new_page; 359 } 360 } while (err != -ENOENT && err != -ENOMEM); 361 362 if (new_page) 363 page_cache_release(new_page); 364 return found_page; 365 } 366