1 /* 2 * linux/mm/page_io.c 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 5 * 6 * Swap reorganised 29.12.95, 7 * Asynchronous swapping added 30.12.95. Stephen Tweedie 8 * Removed race in async swapping. 14.4.1996. Bruno Haible 9 * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie 10 * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman 11 */ 12 13 #include <linux/mm.h> 14 #include <linux/kernel_stat.h> 15 #include <linux/gfp.h> 16 #include <linux/pagemap.h> 17 #include <linux/swap.h> 18 #include <linux/bio.h> 19 #include <linux/swapops.h> 20 #include <linux/buffer_head.h> 21 #include <linux/writeback.h> 22 #include <linux/frontswap.h> 23 #include <linux/blkdev.h> 24 #include <linux/uio.h> 25 #include <asm/pgtable.h> 26 27 static struct bio *get_swap_bio(gfp_t gfp_flags, 28 struct page *page, bio_end_io_t end_io) 29 { 30 struct bio *bio; 31 32 bio = bio_alloc(gfp_flags, 1); 33 if (bio) { 34 bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev); 35 bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; 36 bio->bi_end_io = end_io; 37 38 bio_add_page(bio, page, PAGE_SIZE, 0); 39 BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE); 40 } 41 return bio; 42 } 43 44 void end_swap_bio_write(struct bio *bio) 45 { 46 struct page *page = bio->bi_io_vec[0].bv_page; 47 48 if (bio->bi_status) { 49 SetPageError(page); 50 /* 51 * We failed to write the page out to swap-space. 52 * Re-dirty the page in order to avoid it being reclaimed. 53 * Also print a dire warning that things will go BAD (tm) 54 * very quickly. 55 * 56 * Also clear PG_reclaim to avoid rotate_reclaimable_page() 57 */ 58 set_page_dirty(page); 59 pr_alert("Write-error on swap-device (%u:%u:%llu)\n", 60 imajor(bio->bi_bdev->bd_inode), 61 iminor(bio->bi_bdev->bd_inode), 62 (unsigned long long)bio->bi_iter.bi_sector); 63 ClearPageReclaim(page); 64 } 65 end_page_writeback(page); 66 bio_put(bio); 67 } 68 69 static void swap_slot_free_notify(struct page *page) 70 { 71 struct swap_info_struct *sis; 72 struct gendisk *disk; 73 74 /* 75 * There is no guarantee that the page is in swap cache - the software 76 * suspend code (at least) uses end_swap_bio_read() against a non- 77 * swapcache page. So we must check PG_swapcache before proceeding with 78 * this optimization. 79 */ 80 if (unlikely(!PageSwapCache(page))) 81 return; 82 83 sis = page_swap_info(page); 84 if (!(sis->flags & SWP_BLKDEV)) 85 return; 86 87 /* 88 * The swap subsystem performs lazy swap slot freeing, 89 * expecting that the page will be swapped out again. 90 * So we can avoid an unnecessary write if the page 91 * isn't redirtied. 92 * This is good for real swap storage because we can 93 * reduce unnecessary I/O and enhance wear-leveling 94 * if an SSD is used as the as swap device. 95 * But if in-memory swap device (eg zram) is used, 96 * this causes a duplicated copy between uncompressed 97 * data in VM-owned memory and compressed data in 98 * zram-owned memory. So let's free zram-owned memory 99 * and make the VM-owned decompressed page *dirty*, 100 * so the page should be swapped out somewhere again if 101 * we again wish to reclaim it. 102 */ 103 disk = sis->bdev->bd_disk; 104 if (disk->fops->swap_slot_free_notify) { 105 swp_entry_t entry; 106 unsigned long offset; 107 108 entry.val = page_private(page); 109 offset = swp_offset(entry); 110 111 SetPageDirty(page); 112 disk->fops->swap_slot_free_notify(sis->bdev, 113 offset); 114 } 115 } 116 117 static void end_swap_bio_read(struct bio *bio) 118 { 119 struct page *page = bio->bi_io_vec[0].bv_page; 120 struct task_struct *waiter = bio->bi_private; 121 122 if (bio->bi_status) { 123 SetPageError(page); 124 ClearPageUptodate(page); 125 pr_alert("Read-error on swap-device (%u:%u:%llu)\n", 126 imajor(bio->bi_bdev->bd_inode), 127 iminor(bio->bi_bdev->bd_inode), 128 (unsigned long long)bio->bi_iter.bi_sector); 129 goto out; 130 } 131 132 SetPageUptodate(page); 133 swap_slot_free_notify(page); 134 out: 135 unlock_page(page); 136 WRITE_ONCE(bio->bi_private, NULL); 137 bio_put(bio); 138 wake_up_process(waiter); 139 } 140 141 int generic_swapfile_activate(struct swap_info_struct *sis, 142 struct file *swap_file, 143 sector_t *span) 144 { 145 struct address_space *mapping = swap_file->f_mapping; 146 struct inode *inode = mapping->host; 147 unsigned blocks_per_page; 148 unsigned long page_no; 149 unsigned blkbits; 150 sector_t probe_block; 151 sector_t last_block; 152 sector_t lowest_block = -1; 153 sector_t highest_block = 0; 154 int nr_extents = 0; 155 int ret; 156 157 blkbits = inode->i_blkbits; 158 blocks_per_page = PAGE_SIZE >> blkbits; 159 160 /* 161 * Map all the blocks into the extent list. This code doesn't try 162 * to be very smart. 163 */ 164 probe_block = 0; 165 page_no = 0; 166 last_block = i_size_read(inode) >> blkbits; 167 while ((probe_block + blocks_per_page) <= last_block && 168 page_no < sis->max) { 169 unsigned block_in_page; 170 sector_t first_block; 171 172 cond_resched(); 173 174 first_block = bmap(inode, probe_block); 175 if (first_block == 0) 176 goto bad_bmap; 177 178 /* 179 * It must be PAGE_SIZE aligned on-disk 180 */ 181 if (first_block & (blocks_per_page - 1)) { 182 probe_block++; 183 goto reprobe; 184 } 185 186 for (block_in_page = 1; block_in_page < blocks_per_page; 187 block_in_page++) { 188 sector_t block; 189 190 block = bmap(inode, probe_block + block_in_page); 191 if (block == 0) 192 goto bad_bmap; 193 if (block != first_block + block_in_page) { 194 /* Discontiguity */ 195 probe_block++; 196 goto reprobe; 197 } 198 } 199 200 first_block >>= (PAGE_SHIFT - blkbits); 201 if (page_no) { /* exclude the header page */ 202 if (first_block < lowest_block) 203 lowest_block = first_block; 204 if (first_block > highest_block) 205 highest_block = first_block; 206 } 207 208 /* 209 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks 210 */ 211 ret = add_swap_extent(sis, page_no, 1, first_block); 212 if (ret < 0) 213 goto out; 214 nr_extents += ret; 215 page_no++; 216 probe_block += blocks_per_page; 217 reprobe: 218 continue; 219 } 220 ret = nr_extents; 221 *span = 1 + highest_block - lowest_block; 222 if (page_no == 0) 223 page_no = 1; /* force Empty message */ 224 sis->max = page_no; 225 sis->pages = page_no - 1; 226 sis->highest_bit = page_no - 1; 227 out: 228 return ret; 229 bad_bmap: 230 pr_err("swapon: swapfile has holes\n"); 231 ret = -EINVAL; 232 goto out; 233 } 234 235 /* 236 * We may have stale swap cache pages in memory: notice 237 * them here and get rid of the unnecessary final write. 238 */ 239 int swap_writepage(struct page *page, struct writeback_control *wbc) 240 { 241 int ret = 0; 242 243 if (try_to_free_swap(page)) { 244 unlock_page(page); 245 goto out; 246 } 247 if (frontswap_store(page) == 0) { 248 set_page_writeback(page); 249 unlock_page(page); 250 end_page_writeback(page); 251 goto out; 252 } 253 ret = __swap_writepage(page, wbc, end_swap_bio_write); 254 out: 255 return ret; 256 } 257 258 static sector_t swap_page_sector(struct page *page) 259 { 260 return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); 261 } 262 263 int __swap_writepage(struct page *page, struct writeback_control *wbc, 264 bio_end_io_t end_write_func) 265 { 266 struct bio *bio; 267 int ret; 268 struct swap_info_struct *sis = page_swap_info(page); 269 270 VM_BUG_ON_PAGE(!PageSwapCache(page), page); 271 if (sis->flags & SWP_FILE) { 272 struct kiocb kiocb; 273 struct file *swap_file = sis->swap_file; 274 struct address_space *mapping = swap_file->f_mapping; 275 struct bio_vec bv = { 276 .bv_page = page, 277 .bv_len = PAGE_SIZE, 278 .bv_offset = 0 279 }; 280 struct iov_iter from; 281 282 iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE); 283 init_sync_kiocb(&kiocb, swap_file); 284 kiocb.ki_pos = page_file_offset(page); 285 286 set_page_writeback(page); 287 unlock_page(page); 288 ret = mapping->a_ops->direct_IO(&kiocb, &from); 289 if (ret == PAGE_SIZE) { 290 count_vm_event(PSWPOUT); 291 ret = 0; 292 } else { 293 /* 294 * In the case of swap-over-nfs, this can be a 295 * temporary failure if the system has limited 296 * memory for allocating transmit buffers. 297 * Mark the page dirty and avoid 298 * rotate_reclaimable_page but rate-limit the 299 * messages but do not flag PageError like 300 * the normal direct-to-bio case as it could 301 * be temporary. 302 */ 303 set_page_dirty(page); 304 ClearPageReclaim(page); 305 pr_err_ratelimited("Write error on dio swapfile (%llu)\n", 306 page_file_offset(page)); 307 } 308 end_page_writeback(page); 309 return ret; 310 } 311 312 ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); 313 if (!ret) { 314 count_vm_event(PSWPOUT); 315 return 0; 316 } 317 318 ret = 0; 319 bio = get_swap_bio(GFP_NOIO, page, end_write_func); 320 if (bio == NULL) { 321 set_page_dirty(page); 322 unlock_page(page); 323 ret = -ENOMEM; 324 goto out; 325 } 326 bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 327 count_vm_event(PSWPOUT); 328 set_page_writeback(page); 329 unlock_page(page); 330 submit_bio(bio); 331 out: 332 return ret; 333 } 334 335 int swap_readpage(struct page *page, bool do_poll) 336 { 337 struct bio *bio; 338 int ret = 0; 339 struct swap_info_struct *sis = page_swap_info(page); 340 blk_qc_t qc; 341 struct block_device *bdev; 342 343 VM_BUG_ON_PAGE(!PageSwapCache(page), page); 344 VM_BUG_ON_PAGE(!PageLocked(page), page); 345 VM_BUG_ON_PAGE(PageUptodate(page), page); 346 if (frontswap_load(page) == 0) { 347 SetPageUptodate(page); 348 unlock_page(page); 349 goto out; 350 } 351 352 if (sis->flags & SWP_FILE) { 353 struct file *swap_file = sis->swap_file; 354 struct address_space *mapping = swap_file->f_mapping; 355 356 ret = mapping->a_ops->readpage(swap_file, page); 357 if (!ret) 358 count_vm_event(PSWPIN); 359 return ret; 360 } 361 362 ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); 363 if (!ret) { 364 if (trylock_page(page)) { 365 swap_slot_free_notify(page); 366 unlock_page(page); 367 } 368 369 count_vm_event(PSWPIN); 370 return 0; 371 } 372 373 ret = 0; 374 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); 375 if (bio == NULL) { 376 unlock_page(page); 377 ret = -ENOMEM; 378 goto out; 379 } 380 bdev = bio->bi_bdev; 381 bio->bi_private = current; 382 bio_set_op_attrs(bio, REQ_OP_READ, 0); 383 count_vm_event(PSWPIN); 384 bio_get(bio); 385 qc = submit_bio(bio); 386 while (do_poll) { 387 set_current_state(TASK_UNINTERRUPTIBLE); 388 if (!READ_ONCE(bio->bi_private)) 389 break; 390 391 if (!blk_mq_poll(bdev_get_queue(bdev), qc)) 392 break; 393 } 394 __set_current_state(TASK_RUNNING); 395 bio_put(bio); 396 397 out: 398 return ret; 399 } 400 401 int swap_set_page_dirty(struct page *page) 402 { 403 struct swap_info_struct *sis = page_swap_info(page); 404 405 if (sis->flags & SWP_FILE) { 406 struct address_space *mapping = sis->swap_file->f_mapping; 407 408 VM_BUG_ON_PAGE(!PageSwapCache(page), page); 409 return mapping->a_ops->set_page_dirty(page); 410 } else { 411 return __set_page_dirty_no_writeback(page); 412 } 413 } 414