1 /* 2 * fs/f2fs/data.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/buffer_head.h> 14 #include <linux/mpage.h> 15 #include <linux/writeback.h> 16 #include <linux/backing-dev.h> 17 #include <linux/blkdev.h> 18 #include <linux/bio.h> 19 20 #include "f2fs.h" 21 #include "node.h" 22 #include "segment.h" 23 24 /* 25 * Lock ordering for the change of data block address: 26 * ->data_page 27 * ->node_page 28 * update block addresses in the node page 29 */ 30 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) 31 { 32 struct f2fs_node *rn; 33 __le32 *addr_array; 34 struct page *node_page = dn->node_page; 35 unsigned int ofs_in_node = dn->ofs_in_node; 36 37 wait_on_page_writeback(node_page); 38 39 rn = (struct f2fs_node *)page_address(node_page); 40 41 /* Get physical address of data block */ 42 addr_array = blkaddr_in_node(rn); 43 addr_array[ofs_in_node] = cpu_to_le32(new_addr); 44 set_page_dirty(node_page); 45 } 46 47 int reserve_new_block(struct dnode_of_data *dn) 48 { 49 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 50 51 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) 52 return -EPERM; 53 if (!inc_valid_block_count(sbi, dn->inode, 1)) 54 return -ENOSPC; 55 56 __set_data_blkaddr(dn, NEW_ADDR); 57 dn->data_blkaddr = NEW_ADDR; 58 sync_inode_page(dn); 59 return 0; 60 } 61 62 static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 63 struct buffer_head *bh_result) 64 { 65 struct f2fs_inode_info *fi = F2FS_I(inode); 66 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 67 pgoff_t start_fofs, end_fofs; 68 block_t start_blkaddr; 69 70 read_lock(&fi->ext.ext_lock); 71 if (fi->ext.len == 0) { 72 read_unlock(&fi->ext.ext_lock); 73 return 0; 74 } 75 76 sbi->total_hit_ext++; 77 start_fofs = fi->ext.fofs; 78 end_fofs = fi->ext.fofs + fi->ext.len - 1; 79 start_blkaddr = fi->ext.blk_addr; 80 81 if (pgofs >= start_fofs && pgofs <= end_fofs) { 82 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 83 size_t count; 84 85 clear_buffer_new(bh_result); 86 map_bh(bh_result, inode->i_sb, 87 start_blkaddr + pgofs - start_fofs); 88 count = end_fofs - pgofs + 1; 89 if (count < (UINT_MAX >> blkbits)) 90 bh_result->b_size = (count << blkbits); 91 else 92 bh_result->b_size = UINT_MAX; 93 94 sbi->read_hit_ext++; 95 read_unlock(&fi->ext.ext_lock); 96 return 1; 97 } 98 read_unlock(&fi->ext.ext_lock); 99 return 0; 100 } 101 102 void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) 103 { 104 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 105 pgoff_t fofs, start_fofs, end_fofs; 106 block_t start_blkaddr, end_blkaddr; 107 108 BUG_ON(blk_addr == NEW_ADDR); 109 fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node; 110 111 /* Update the page address in the parent node */ 112 __set_data_blkaddr(dn, blk_addr); 113 114 write_lock(&fi->ext.ext_lock); 115 116 start_fofs = fi->ext.fofs; 117 end_fofs = fi->ext.fofs + fi->ext.len - 1; 118 start_blkaddr = fi->ext.blk_addr; 119 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; 120 121 /* Drop and initialize the matched extent */ 122 if (fi->ext.len == 1 && fofs == start_fofs) 123 fi->ext.len = 0; 124 125 /* Initial extent */ 126 if (fi->ext.len == 0) { 127 if (blk_addr != NULL_ADDR) { 128 fi->ext.fofs = fofs; 129 fi->ext.blk_addr = blk_addr; 130 fi->ext.len = 1; 131 } 132 goto end_update; 133 } 134 135 /* Frone merge */ 136 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { 137 fi->ext.fofs--; 138 fi->ext.blk_addr--; 139 fi->ext.len++; 140 goto end_update; 141 } 142 143 /* Back merge */ 144 if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { 145 fi->ext.len++; 146 goto end_update; 147 } 148 149 /* Split the existing extent */ 150 if (fi->ext.len > 1 && 151 fofs >= start_fofs && fofs <= end_fofs) { 152 if ((end_fofs - fofs) < (fi->ext.len >> 1)) { 153 fi->ext.len = fofs - start_fofs; 154 } else { 155 fi->ext.fofs = fofs + 1; 156 fi->ext.blk_addr = start_blkaddr + 157 fofs - start_fofs + 1; 158 fi->ext.len -= fofs - start_fofs + 1; 159 } 160 goto end_update; 161 } 162 write_unlock(&fi->ext.ext_lock); 163 return; 164 165 end_update: 166 write_unlock(&fi->ext.ext_lock); 167 sync_inode_page(dn); 168 return; 169 } 170 171 struct page *find_data_page(struct inode *inode, pgoff_t index) 172 { 173 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 174 struct address_space *mapping = inode->i_mapping; 175 struct dnode_of_data dn; 176 struct page *page; 177 int err; 178 179 page = find_get_page(mapping, index); 180 if (page && PageUptodate(page)) 181 return page; 182 f2fs_put_page(page, 0); 183 184 set_new_dnode(&dn, inode, NULL, NULL, 0); 185 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 186 if (err) 187 return ERR_PTR(err); 188 f2fs_put_dnode(&dn); 189 190 if (dn.data_blkaddr == NULL_ADDR) 191 return ERR_PTR(-ENOENT); 192 193 /* By fallocate(), there is no cached page, but with NEW_ADDR */ 194 if (dn.data_blkaddr == NEW_ADDR) 195 return ERR_PTR(-EINVAL); 196 197 page = grab_cache_page(mapping, index); 198 if (!page) 199 return ERR_PTR(-ENOMEM); 200 201 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 202 if (err) { 203 f2fs_put_page(page, 1); 204 return ERR_PTR(err); 205 } 206 unlock_page(page); 207 return page; 208 } 209 210 /* 211 * If it tries to access a hole, return an error. 212 * Because, the callers, functions in dir.c and GC, should be able to know 213 * whether this page exists or not. 214 */ 215 struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 216 { 217 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 218 struct address_space *mapping = inode->i_mapping; 219 struct dnode_of_data dn; 220 struct page *page; 221 int err; 222 223 set_new_dnode(&dn, inode, NULL, NULL, 0); 224 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 225 if (err) 226 return ERR_PTR(err); 227 f2fs_put_dnode(&dn); 228 229 if (dn.data_blkaddr == NULL_ADDR) 230 return ERR_PTR(-ENOENT); 231 232 page = grab_cache_page(mapping, index); 233 if (!page) 234 return ERR_PTR(-ENOMEM); 235 236 if (PageUptodate(page)) 237 return page; 238 239 BUG_ON(dn.data_blkaddr == NEW_ADDR); 240 BUG_ON(dn.data_blkaddr == NULL_ADDR); 241 242 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 243 if (err) { 244 f2fs_put_page(page, 1); 245 return ERR_PTR(err); 246 } 247 return page; 248 } 249 250 /* 251 * Caller ensures that this data page is never allocated. 252 * A new zero-filled data page is allocated in the page cache. 253 */ 254 struct page *get_new_data_page(struct inode *inode, pgoff_t index, 255 bool new_i_size) 256 { 257 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 258 struct address_space *mapping = inode->i_mapping; 259 struct page *page; 260 struct dnode_of_data dn; 261 int err; 262 263 set_new_dnode(&dn, inode, NULL, NULL, 0); 264 err = get_dnode_of_data(&dn, index, 0); 265 if (err) 266 return ERR_PTR(err); 267 268 if (dn.data_blkaddr == NULL_ADDR) { 269 if (reserve_new_block(&dn)) { 270 f2fs_put_dnode(&dn); 271 return ERR_PTR(-ENOSPC); 272 } 273 } 274 f2fs_put_dnode(&dn); 275 276 page = grab_cache_page(mapping, index); 277 if (!page) 278 return ERR_PTR(-ENOMEM); 279 280 if (PageUptodate(page)) 281 return page; 282 283 if (dn.data_blkaddr == NEW_ADDR) { 284 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 285 } else { 286 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 287 if (err) { 288 f2fs_put_page(page, 1); 289 return ERR_PTR(err); 290 } 291 } 292 SetPageUptodate(page); 293 294 if (new_i_size && 295 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { 296 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); 297 mark_inode_dirty_sync(inode); 298 } 299 return page; 300 } 301 302 static void read_end_io(struct bio *bio, int err) 303 { 304 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 305 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 306 307 do { 308 struct page *page = bvec->bv_page; 309 310 if (--bvec >= bio->bi_io_vec) 311 prefetchw(&bvec->bv_page->flags); 312 313 if (uptodate) { 314 SetPageUptodate(page); 315 } else { 316 ClearPageUptodate(page); 317 SetPageError(page); 318 } 319 unlock_page(page); 320 } while (bvec >= bio->bi_io_vec); 321 kfree(bio->bi_private); 322 bio_put(bio); 323 } 324 325 /* 326 * Fill the locked page with data located in the block address. 327 * Read operation is synchronous, and caller must unlock the page. 328 */ 329 int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, 330 block_t blk_addr, int type) 331 { 332 struct block_device *bdev = sbi->sb->s_bdev; 333 bool sync = (type == READ_SYNC); 334 struct bio *bio; 335 336 /* This page can be already read by other threads */ 337 if (PageUptodate(page)) { 338 if (!sync) 339 unlock_page(page); 340 return 0; 341 } 342 343 down_read(&sbi->bio_sem); 344 345 /* Allocate a new bio */ 346 bio = f2fs_bio_alloc(bdev, 1); 347 348 /* Initialize the bio */ 349 bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 350 bio->bi_end_io = read_end_io; 351 352 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 353 kfree(bio->bi_private); 354 bio_put(bio); 355 up_read(&sbi->bio_sem); 356 return -EFAULT; 357 } 358 359 submit_bio(type, bio); 360 up_read(&sbi->bio_sem); 361 362 /* wait for read completion if sync */ 363 if (sync) { 364 lock_page(page); 365 if (PageError(page)) 366 return -EIO; 367 } 368 return 0; 369 } 370 371 /* 372 * This function should be used by the data read flow only where it 373 * does not check the "create" flag that indicates block allocation. 374 * The reason for this special functionality is to exploit VFS readahead 375 * mechanism. 376 */ 377 static int get_data_block_ro(struct inode *inode, sector_t iblock, 378 struct buffer_head *bh_result, int create) 379 { 380 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 381 unsigned maxblocks = bh_result->b_size >> blkbits; 382 struct dnode_of_data dn; 383 pgoff_t pgofs; 384 int err; 385 386 /* Get the page offset from the block offset(iblock) */ 387 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 388 389 if (check_extent_cache(inode, pgofs, bh_result)) 390 return 0; 391 392 /* When reading holes, we need its node page */ 393 set_new_dnode(&dn, inode, NULL, NULL, 0); 394 err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE); 395 if (err) 396 return (err == -ENOENT) ? 0 : err; 397 398 /* It does not support data allocation */ 399 BUG_ON(create); 400 401 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { 402 int i; 403 unsigned int end_offset; 404 405 end_offset = IS_INODE(dn.node_page) ? 406 ADDRS_PER_INODE : 407 ADDRS_PER_BLOCK; 408 409 clear_buffer_new(bh_result); 410 411 /* Give more consecutive addresses for the read ahead */ 412 for (i = 0; i < end_offset - dn.ofs_in_node; i++) 413 if (((datablock_addr(dn.node_page, 414 dn.ofs_in_node + i)) 415 != (dn.data_blkaddr + i)) || maxblocks == i) 416 break; 417 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 418 bh_result->b_size = (i << blkbits); 419 } 420 f2fs_put_dnode(&dn); 421 return 0; 422 } 423 424 static int f2fs_read_data_page(struct file *file, struct page *page) 425 { 426 return mpage_readpage(page, get_data_block_ro); 427 } 428 429 static int f2fs_read_data_pages(struct file *file, 430 struct address_space *mapping, 431 struct list_head *pages, unsigned nr_pages) 432 { 433 return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); 434 } 435 436 int do_write_data_page(struct page *page) 437 { 438 struct inode *inode = page->mapping->host; 439 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 440 block_t old_blk_addr, new_blk_addr; 441 struct dnode_of_data dn; 442 int err = 0; 443 444 set_new_dnode(&dn, inode, NULL, NULL, 0); 445 err = get_dnode_of_data(&dn, page->index, RDONLY_NODE); 446 if (err) 447 return err; 448 449 old_blk_addr = dn.data_blkaddr; 450 451 /* This page is already truncated */ 452 if (old_blk_addr == NULL_ADDR) 453 goto out_writepage; 454 455 set_page_writeback(page); 456 457 /* 458 * If current allocation needs SSR, 459 * it had better in-place writes for updated data. 460 */ 461 if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && 462 need_inplace_update(inode)) { 463 rewrite_data_page(F2FS_SB(inode->i_sb), page, 464 old_blk_addr); 465 } else { 466 write_data_page(inode, page, &dn, 467 old_blk_addr, &new_blk_addr); 468 update_extent_cache(new_blk_addr, &dn); 469 F2FS_I(inode)->data_version = 470 le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); 471 } 472 out_writepage: 473 f2fs_put_dnode(&dn); 474 return err; 475 } 476 477 static int f2fs_write_data_page(struct page *page, 478 struct writeback_control *wbc) 479 { 480 struct inode *inode = page->mapping->host; 481 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 482 loff_t i_size = i_size_read(inode); 483 const pgoff_t end_index = ((unsigned long long) i_size) 484 >> PAGE_CACHE_SHIFT; 485 unsigned offset; 486 int err = 0; 487 488 if (page->index < end_index) 489 goto out; 490 491 /* 492 * If the offset is out-of-range of file size, 493 * this page does not have to be written to disk. 494 */ 495 offset = i_size & (PAGE_CACHE_SIZE - 1); 496 if ((page->index >= end_index + 1) || !offset) { 497 if (S_ISDIR(inode->i_mode)) { 498 dec_page_count(sbi, F2FS_DIRTY_DENTS); 499 inode_dec_dirty_dents(inode); 500 } 501 goto unlock_out; 502 } 503 504 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 505 out: 506 if (sbi->por_doing) 507 goto redirty_out; 508 509 if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page)) 510 goto redirty_out; 511 512 mutex_lock_op(sbi, DATA_WRITE); 513 if (S_ISDIR(inode->i_mode)) { 514 dec_page_count(sbi, F2FS_DIRTY_DENTS); 515 inode_dec_dirty_dents(inode); 516 } 517 err = do_write_data_page(page); 518 if (err && err != -ENOENT) { 519 wbc->pages_skipped++; 520 set_page_dirty(page); 521 } 522 mutex_unlock_op(sbi, DATA_WRITE); 523 524 if (wbc->for_reclaim) 525 f2fs_submit_bio(sbi, DATA, true); 526 527 if (err == -ENOENT) 528 goto unlock_out; 529 530 clear_cold_data(page); 531 unlock_page(page); 532 533 if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode)) 534 f2fs_balance_fs(sbi); 535 return 0; 536 537 unlock_out: 538 unlock_page(page); 539 return (err == -ENOENT) ? 0 : err; 540 541 redirty_out: 542 wbc->pages_skipped++; 543 set_page_dirty(page); 544 return AOP_WRITEPAGE_ACTIVATE; 545 } 546 547 #define MAX_DESIRED_PAGES_WP 4096 548 549 static int f2fs_write_data_pages(struct address_space *mapping, 550 struct writeback_control *wbc) 551 { 552 struct inode *inode = mapping->host; 553 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 554 int ret; 555 long excess_nrtw = 0, desired_nrtw; 556 557 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { 558 desired_nrtw = MAX_DESIRED_PAGES_WP; 559 excess_nrtw = desired_nrtw - wbc->nr_to_write; 560 wbc->nr_to_write = desired_nrtw; 561 } 562 563 if (!S_ISDIR(inode->i_mode)) 564 mutex_lock(&sbi->writepages); 565 ret = generic_writepages(mapping, wbc); 566 if (!S_ISDIR(inode->i_mode)) 567 mutex_unlock(&sbi->writepages); 568 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 569 570 remove_dirty_dir_inode(inode); 571 572 wbc->nr_to_write -= excess_nrtw; 573 return ret; 574 } 575 576 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 577 loff_t pos, unsigned len, unsigned flags, 578 struct page **pagep, void **fsdata) 579 { 580 struct inode *inode = mapping->host; 581 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 582 struct page *page; 583 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 584 struct dnode_of_data dn; 585 int err = 0; 586 587 /* for nobh_write_end */ 588 *fsdata = NULL; 589 590 f2fs_balance_fs(sbi); 591 592 page = grab_cache_page_write_begin(mapping, index, flags); 593 if (!page) 594 return -ENOMEM; 595 *pagep = page; 596 597 mutex_lock_op(sbi, DATA_NEW); 598 599 set_new_dnode(&dn, inode, NULL, NULL, 0); 600 err = get_dnode_of_data(&dn, index, 0); 601 if (err) { 602 mutex_unlock_op(sbi, DATA_NEW); 603 f2fs_put_page(page, 1); 604 return err; 605 } 606 607 if (dn.data_blkaddr == NULL_ADDR) { 608 err = reserve_new_block(&dn); 609 if (err) { 610 f2fs_put_dnode(&dn); 611 mutex_unlock_op(sbi, DATA_NEW); 612 f2fs_put_page(page, 1); 613 return err; 614 } 615 } 616 f2fs_put_dnode(&dn); 617 618 mutex_unlock_op(sbi, DATA_NEW); 619 620 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 621 return 0; 622 623 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { 624 unsigned start = pos & (PAGE_CACHE_SIZE - 1); 625 unsigned end = start + len; 626 627 /* Reading beyond i_size is simple: memset to zero */ 628 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); 629 return 0; 630 } 631 632 if (dn.data_blkaddr == NEW_ADDR) { 633 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 634 } else { 635 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 636 if (err) { 637 f2fs_put_page(page, 1); 638 return err; 639 } 640 } 641 SetPageUptodate(page); 642 clear_cold_data(page); 643 return 0; 644 } 645 646 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 647 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 648 { 649 struct file *file = iocb->ki_filp; 650 struct inode *inode = file->f_mapping->host; 651 652 if (rw == WRITE) 653 return 0; 654 655 /* Needs synchronization with the cleaner */ 656 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 657 get_data_block_ro); 658 } 659 660 static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) 661 { 662 struct inode *inode = page->mapping->host; 663 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 664 if (S_ISDIR(inode->i_mode) && PageDirty(page)) { 665 dec_page_count(sbi, F2FS_DIRTY_DENTS); 666 inode_dec_dirty_dents(inode); 667 } 668 ClearPagePrivate(page); 669 } 670 671 static int f2fs_release_data_page(struct page *page, gfp_t wait) 672 { 673 ClearPagePrivate(page); 674 return 0; 675 } 676 677 static int f2fs_set_data_page_dirty(struct page *page) 678 { 679 struct address_space *mapping = page->mapping; 680 struct inode *inode = mapping->host; 681 682 SetPageUptodate(page); 683 if (!PageDirty(page)) { 684 __set_page_dirty_nobuffers(page); 685 set_dirty_dir_page(inode, page); 686 return 1; 687 } 688 return 0; 689 } 690 691 const struct address_space_operations f2fs_dblock_aops = { 692 .readpage = f2fs_read_data_page, 693 .readpages = f2fs_read_data_pages, 694 .writepage = f2fs_write_data_page, 695 .writepages = f2fs_write_data_pages, 696 .write_begin = f2fs_write_begin, 697 .write_end = nobh_write_end, 698 .set_page_dirty = f2fs_set_data_page_dirty, 699 .invalidatepage = f2fs_invalidate_data_page, 700 .releasepage = f2fs_release_data_page, 701 .direct_IO = f2fs_direct_IO, 702 }; 703