1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS inode operations. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Ryusuke Konishi. 8 * 9 */ 10 11 #include <linux/buffer_head.h> 12 #include <linux/gfp.h> 13 #include <linux/mpage.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/uio.h> 17 #include <linux/fiemap.h> 18 #include <linux/random.h> 19 #include "nilfs.h" 20 #include "btnode.h" 21 #include "segment.h" 22 #include "page.h" 23 #include "mdt.h" 24 #include "cpfile.h" 25 #include "ifile.h" 26 27 /** 28 * struct nilfs_iget_args - arguments used during comparison between inodes 29 * @ino: inode number 30 * @cno: checkpoint number 31 * @root: pointer on NILFS root object (mounted checkpoint) 32 * @type: inode type 33 */ 34 struct nilfs_iget_args { 35 u64 ino; 36 __u64 cno; 37 struct nilfs_root *root; 38 unsigned int type; 39 }; 40 41 static int nilfs_iget_test(struct inode *inode, void *opaque); 42 43 void nilfs_inode_add_blocks(struct inode *inode, int n) 44 { 45 struct nilfs_root *root = NILFS_I(inode)->i_root; 46 47 inode_add_bytes(inode, i_blocksize(inode) * n); 48 if (root) 49 atomic64_add(n, &root->blocks_count); 50 } 51 52 void nilfs_inode_sub_blocks(struct inode *inode, int n) 53 { 54 struct nilfs_root *root = NILFS_I(inode)->i_root; 55 56 inode_sub_bytes(inode, i_blocksize(inode) * n); 57 if (root) 58 atomic64_sub(n, &root->blocks_count); 59 } 60 61 /** 62 * nilfs_get_block() - get a file block on the filesystem (callback function) 63 * @inode: inode struct of the target file 64 * @blkoff: file block number 65 * @bh_result: buffer head to be mapped on 66 * @create: indicate whether allocating the block or not when it has not 67 * been allocated yet. 68 * 69 * This function does not issue actual read request of the specified data 70 * block. It is done by VFS. 71 */ 72 int nilfs_get_block(struct inode *inode, sector_t blkoff, 73 struct buffer_head *bh_result, int create) 74 { 75 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 77 __u64 blknum = 0; 78 int err = 0, ret; 79 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; 80 81 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 82 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 84 if (ret >= 0) { /* found */ 85 map_bh(bh_result, inode->i_sb, blknum); 86 if (ret > 0) 87 bh_result->b_size = (ret << inode->i_blkbits); 88 goto out; 89 } 90 /* data block was not found */ 91 if (ret == -ENOENT && create) { 92 struct nilfs_transaction_info ti; 93 94 bh_result->b_blocknr = 0; 95 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 96 if (unlikely(err)) 97 goto out; 98 err = nilfs_bmap_insert(ii->i_bmap, blkoff, 99 (unsigned long)bh_result); 100 if (unlikely(err != 0)) { 101 if (err == -EEXIST) { 102 /* 103 * The get_block() function could be called 104 * from multiple callers for an inode. 105 * However, the page having this block must 106 * be locked in this case. 107 */ 108 nilfs_warn(inode->i_sb, 109 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", 110 __func__, inode->i_ino, 111 (unsigned long long)blkoff); 112 err = -EAGAIN; 113 } 114 nilfs_transaction_abort(inode->i_sb); 115 goto out; 116 } 117 nilfs_mark_inode_dirty_sync(inode); 118 nilfs_transaction_commit(inode->i_sb); /* never fails */ 119 /* Error handling should be detailed */ 120 set_buffer_new(bh_result); 121 set_buffer_delay(bh_result); 122 map_bh(bh_result, inode->i_sb, 0); 123 /* Disk block number must be changed to proper value */ 124 125 } else if (ret == -ENOENT) { 126 /* 127 * not found is not error (e.g. hole); must return without 128 * the mapped state flag. 129 */ 130 ; 131 } else { 132 err = ret; 133 } 134 135 out: 136 return err; 137 } 138 139 /** 140 * nilfs_read_folio() - implement read_folio() method of nilfs_aops {} 141 * address_space_operations. 142 * @file: file struct of the file to be read 143 * @folio: the folio to be read 144 */ 145 static int nilfs_read_folio(struct file *file, struct folio *folio) 146 { 147 return mpage_read_folio(folio, nilfs_get_block); 148 } 149 150 static void nilfs_readahead(struct readahead_control *rac) 151 { 152 mpage_readahead(rac, nilfs_get_block); 153 } 154 155 static int nilfs_writepages(struct address_space *mapping, 156 struct writeback_control *wbc) 157 { 158 struct inode *inode = mapping->host; 159 int err = 0; 160 161 if (sb_rdonly(inode->i_sb)) { 162 nilfs_clear_dirty_pages(mapping); 163 return -EROFS; 164 } 165 166 if (wbc->sync_mode == WB_SYNC_ALL) 167 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 168 wbc->range_start, 169 wbc->range_end); 170 return err; 171 } 172 173 static int nilfs_writepage(struct page *page, struct writeback_control *wbc) 174 { 175 struct folio *folio = page_folio(page); 176 struct inode *inode = folio->mapping->host; 177 int err; 178 179 if (sb_rdonly(inode->i_sb)) { 180 /* 181 * It means that filesystem was remounted in read-only 182 * mode because of error or metadata corruption. But we 183 * have dirty pages that try to be flushed in background. 184 * So, here we simply discard this dirty page. 185 */ 186 nilfs_clear_folio_dirty(folio); 187 folio_unlock(folio); 188 return -EROFS; 189 } 190 191 folio_redirty_for_writepage(wbc, folio); 192 folio_unlock(folio); 193 194 if (wbc->sync_mode == WB_SYNC_ALL) { 195 err = nilfs_construct_segment(inode->i_sb); 196 if (unlikely(err)) 197 return err; 198 } else if (wbc->for_reclaim) 199 nilfs_flush_segment(inode->i_sb, inode->i_ino); 200 201 return 0; 202 } 203 204 static bool nilfs_dirty_folio(struct address_space *mapping, 205 struct folio *folio) 206 { 207 struct inode *inode = mapping->host; 208 struct buffer_head *head; 209 unsigned int nr_dirty = 0; 210 bool ret = filemap_dirty_folio(mapping, folio); 211 212 /* 213 * The page may not be locked, eg if called from try_to_unmap_one() 214 */ 215 spin_lock(&mapping->i_private_lock); 216 head = folio_buffers(folio); 217 if (head) { 218 struct buffer_head *bh = head; 219 220 do { 221 /* Do not mark hole blocks dirty */ 222 if (buffer_dirty(bh) || !buffer_mapped(bh)) 223 continue; 224 225 set_buffer_dirty(bh); 226 nr_dirty++; 227 } while (bh = bh->b_this_page, bh != head); 228 } else if (ret) { 229 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); 230 } 231 spin_unlock(&mapping->i_private_lock); 232 233 if (nr_dirty) 234 nilfs_set_file_dirty(inode, nr_dirty); 235 return ret; 236 } 237 238 void nilfs_write_failed(struct address_space *mapping, loff_t to) 239 { 240 struct inode *inode = mapping->host; 241 242 if (to > inode->i_size) { 243 truncate_pagecache(inode, inode->i_size); 244 nilfs_truncate(inode); 245 } 246 } 247 248 static int nilfs_write_begin(struct file *file, struct address_space *mapping, 249 loff_t pos, unsigned len, 250 struct folio **foliop, void **fsdata) 251 252 { 253 struct inode *inode = mapping->host; 254 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); 255 256 if (unlikely(err)) 257 return err; 258 259 err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); 260 if (unlikely(err)) { 261 nilfs_write_failed(mapping, pos + len); 262 nilfs_transaction_abort(inode->i_sb); 263 } 264 return err; 265 } 266 267 static int nilfs_write_end(struct file *file, struct address_space *mapping, 268 loff_t pos, unsigned len, unsigned copied, 269 struct folio *folio, void *fsdata) 270 { 271 struct inode *inode = mapping->host; 272 unsigned int start = pos & (PAGE_SIZE - 1); 273 unsigned int nr_dirty; 274 int err; 275 276 nr_dirty = nilfs_page_count_clean_buffers(&folio->page, start, 277 start + copied); 278 copied = generic_write_end(file, mapping, pos, len, copied, folio, 279 fsdata); 280 nilfs_set_file_dirty(inode, nr_dirty); 281 err = nilfs_transaction_commit(inode->i_sb); 282 return err ? : copied; 283 } 284 285 static ssize_t 286 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 287 { 288 struct inode *inode = file_inode(iocb->ki_filp); 289 290 if (iov_iter_rw(iter) == WRITE) 291 return 0; 292 293 /* Needs synchronization with the cleaner */ 294 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); 295 } 296 297 const struct address_space_operations nilfs_aops = { 298 .writepage = nilfs_writepage, 299 .read_folio = nilfs_read_folio, 300 .writepages = nilfs_writepages, 301 .dirty_folio = nilfs_dirty_folio, 302 .readahead = nilfs_readahead, 303 .write_begin = nilfs_write_begin, 304 .write_end = nilfs_write_end, 305 .invalidate_folio = block_invalidate_folio, 306 .direct_IO = nilfs_direct_IO, 307 .is_partially_uptodate = block_is_partially_uptodate, 308 }; 309 310 static int nilfs_insert_inode_locked(struct inode *inode, 311 struct nilfs_root *root, 312 unsigned long ino) 313 { 314 struct nilfs_iget_args args = { 315 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 316 }; 317 318 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); 319 } 320 321 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) 322 { 323 struct super_block *sb = dir->i_sb; 324 struct inode *inode; 325 struct nilfs_inode_info *ii; 326 struct nilfs_root *root; 327 struct buffer_head *bh; 328 int err = -ENOMEM; 329 ino_t ino; 330 331 inode = new_inode(sb); 332 if (unlikely(!inode)) 333 goto failed; 334 335 mapping_set_gfp_mask(inode->i_mapping, 336 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 337 338 root = NILFS_I(dir)->i_root; 339 ii = NILFS_I(inode); 340 ii->i_state = BIT(NILFS_I_NEW); 341 ii->i_type = NILFS_I_TYPE_NORMAL; 342 ii->i_root = root; 343 344 err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); 345 if (unlikely(err)) 346 goto failed_ifile_create_inode; 347 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 348 ii->i_bh = bh; 349 350 atomic64_inc(&root->inodes_count); 351 inode_init_owner(&nop_mnt_idmap, inode, dir, mode); 352 inode->i_ino = ino; 353 simple_inode_init_ts(inode); 354 355 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 356 err = nilfs_bmap_read(ii->i_bmap, NULL); 357 if (err < 0) 358 goto failed_after_creation; 359 360 set_bit(NILFS_I_BMAP, &ii->i_state); 361 /* No lock is needed; iget() ensures it. */ 362 } 363 364 ii->i_flags = nilfs_mask_flags( 365 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); 366 367 /* ii->i_file_acl = 0; */ 368 /* ii->i_dir_acl = 0; */ 369 ii->i_dir_start_lookup = 0; 370 nilfs_set_inode_flags(inode); 371 inode->i_generation = get_random_u32(); 372 if (nilfs_insert_inode_locked(inode, root, ino) < 0) { 373 err = -EIO; 374 goto failed_after_creation; 375 } 376 377 err = nilfs_init_acl(inode, dir); 378 if (unlikely(err)) 379 /* 380 * Never occur. When supporting nilfs_init_acl(), 381 * proper cancellation of above jobs should be considered. 382 */ 383 goto failed_after_creation; 384 385 return inode; 386 387 failed_after_creation: 388 clear_nlink(inode); 389 if (inode->i_state & I_NEW) 390 unlock_new_inode(inode); 391 iput(inode); /* 392 * raw_inode will be deleted through 393 * nilfs_evict_inode(). 394 */ 395 goto failed; 396 397 failed_ifile_create_inode: 398 make_bad_inode(inode); 399 iput(inode); 400 failed: 401 return ERR_PTR(err); 402 } 403 404 void nilfs_set_inode_flags(struct inode *inode) 405 { 406 unsigned int flags = NILFS_I(inode)->i_flags; 407 unsigned int new_fl = 0; 408 409 if (flags & FS_SYNC_FL) 410 new_fl |= S_SYNC; 411 if (flags & FS_APPEND_FL) 412 new_fl |= S_APPEND; 413 if (flags & FS_IMMUTABLE_FL) 414 new_fl |= S_IMMUTABLE; 415 if (flags & FS_NOATIME_FL) 416 new_fl |= S_NOATIME; 417 if (flags & FS_DIRSYNC_FL) 418 new_fl |= S_DIRSYNC; 419 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | 420 S_NOATIME | S_DIRSYNC); 421 } 422 423 int nilfs_read_inode_common(struct inode *inode, 424 struct nilfs_inode *raw_inode) 425 { 426 struct nilfs_inode_info *ii = NILFS_I(inode); 427 int err; 428 429 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 430 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); 431 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); 432 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 433 inode->i_size = le64_to_cpu(raw_inode->i_size); 434 inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), 435 le32_to_cpu(raw_inode->i_mtime_nsec)); 436 inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), 437 le32_to_cpu(raw_inode->i_ctime_nsec)); 438 inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), 439 le32_to_cpu(raw_inode->i_mtime_nsec)); 440 if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) 441 return -EIO; /* this inode is for metadata and corrupted */ 442 if (inode->i_nlink == 0) 443 return -ESTALE; /* this inode is deleted */ 444 445 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); 446 ii->i_flags = le32_to_cpu(raw_inode->i_flags); 447 #if 0 448 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 449 ii->i_dir_acl = S_ISREG(inode->i_mode) ? 450 0 : le32_to_cpu(raw_inode->i_dir_acl); 451 #endif 452 ii->i_dir_start_lookup = 0; 453 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 454 455 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 456 S_ISLNK(inode->i_mode)) { 457 err = nilfs_bmap_read(ii->i_bmap, raw_inode); 458 if (err < 0) 459 return err; 460 set_bit(NILFS_I_BMAP, &ii->i_state); 461 /* No lock is needed; iget() ensures it. */ 462 } 463 return 0; 464 } 465 466 static int __nilfs_read_inode(struct super_block *sb, 467 struct nilfs_root *root, unsigned long ino, 468 struct inode *inode) 469 { 470 struct the_nilfs *nilfs = sb->s_fs_info; 471 struct buffer_head *bh; 472 struct nilfs_inode *raw_inode; 473 int err; 474 475 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 476 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 477 if (unlikely(err)) 478 goto bad_inode; 479 480 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); 481 482 err = nilfs_read_inode_common(inode, raw_inode); 483 if (err) 484 goto failed_unmap; 485 486 if (S_ISREG(inode->i_mode)) { 487 inode->i_op = &nilfs_file_inode_operations; 488 inode->i_fop = &nilfs_file_operations; 489 inode->i_mapping->a_ops = &nilfs_aops; 490 } else if (S_ISDIR(inode->i_mode)) { 491 inode->i_op = &nilfs_dir_inode_operations; 492 inode->i_fop = &nilfs_dir_operations; 493 inode->i_mapping->a_ops = &nilfs_aops; 494 } else if (S_ISLNK(inode->i_mode)) { 495 inode->i_op = &nilfs_symlink_inode_operations; 496 inode_nohighmem(inode); 497 inode->i_mapping->a_ops = &nilfs_aops; 498 } else { 499 inode->i_op = &nilfs_special_inode_operations; 500 init_special_inode( 501 inode, inode->i_mode, 502 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 503 } 504 nilfs_ifile_unmap_inode(raw_inode); 505 brelse(bh); 506 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 507 nilfs_set_inode_flags(inode); 508 mapping_set_gfp_mask(inode->i_mapping, 509 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 510 return 0; 511 512 failed_unmap: 513 nilfs_ifile_unmap_inode(raw_inode); 514 brelse(bh); 515 516 bad_inode: 517 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 518 return err; 519 } 520 521 static int nilfs_iget_test(struct inode *inode, void *opaque) 522 { 523 struct nilfs_iget_args *args = opaque; 524 struct nilfs_inode_info *ii; 525 526 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) 527 return 0; 528 529 ii = NILFS_I(inode); 530 if (ii->i_type != args->type) 531 return 0; 532 533 return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; 534 } 535 536 static int nilfs_iget_set(struct inode *inode, void *opaque) 537 { 538 struct nilfs_iget_args *args = opaque; 539 540 inode->i_ino = args->ino; 541 NILFS_I(inode)->i_cno = args->cno; 542 NILFS_I(inode)->i_root = args->root; 543 NILFS_I(inode)->i_type = args->type; 544 if (args->root && args->ino == NILFS_ROOT_INO) 545 nilfs_get_root(args->root); 546 return 0; 547 } 548 549 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, 550 unsigned long ino) 551 { 552 struct nilfs_iget_args args = { 553 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 554 }; 555 556 return ilookup5(sb, ino, nilfs_iget_test, &args); 557 } 558 559 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, 560 unsigned long ino) 561 { 562 struct nilfs_iget_args args = { 563 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 564 }; 565 566 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 567 } 568 569 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, 570 unsigned long ino) 571 { 572 struct inode *inode; 573 int err; 574 575 inode = nilfs_iget_locked(sb, root, ino); 576 if (unlikely(!inode)) 577 return ERR_PTR(-ENOMEM); 578 if (!(inode->i_state & I_NEW)) 579 return inode; 580 581 err = __nilfs_read_inode(sb, root, ino, inode); 582 if (unlikely(err)) { 583 iget_failed(inode); 584 return ERR_PTR(err); 585 } 586 unlock_new_inode(inode); 587 return inode; 588 } 589 590 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, 591 __u64 cno) 592 { 593 struct nilfs_iget_args args = { 594 .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC 595 }; 596 struct inode *inode; 597 int err; 598 599 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 600 if (unlikely(!inode)) 601 return ERR_PTR(-ENOMEM); 602 if (!(inode->i_state & I_NEW)) 603 return inode; 604 605 err = nilfs_init_gcinode(inode); 606 if (unlikely(err)) { 607 iget_failed(inode); 608 return ERR_PTR(err); 609 } 610 unlock_new_inode(inode); 611 return inode; 612 } 613 614 /** 615 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode 616 * @inode: inode object 617 * 618 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, 619 * or does nothing if the inode already has it. This function allocates 620 * an additional inode to maintain page cache of B-tree nodes one-on-one. 621 * 622 * Return Value: On success, 0 is returned. On errors, one of the following 623 * negative error code is returned. 624 * 625 * %-ENOMEM - Insufficient memory available. 626 */ 627 int nilfs_attach_btree_node_cache(struct inode *inode) 628 { 629 struct nilfs_inode_info *ii = NILFS_I(inode); 630 struct inode *btnc_inode; 631 struct nilfs_iget_args args; 632 633 if (ii->i_assoc_inode) 634 return 0; 635 636 args.ino = inode->i_ino; 637 args.root = ii->i_root; 638 args.cno = ii->i_cno; 639 args.type = ii->i_type | NILFS_I_TYPE_BTNC; 640 641 btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, 642 nilfs_iget_set, &args); 643 if (unlikely(!btnc_inode)) 644 return -ENOMEM; 645 if (btnc_inode->i_state & I_NEW) { 646 nilfs_init_btnc_inode(btnc_inode); 647 unlock_new_inode(btnc_inode); 648 } 649 NILFS_I(btnc_inode)->i_assoc_inode = inode; 650 NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; 651 ii->i_assoc_inode = btnc_inode; 652 653 return 0; 654 } 655 656 /** 657 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode 658 * @inode: inode object 659 * 660 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its 661 * holder inode bound to @inode, or does nothing if @inode doesn't have it. 662 */ 663 void nilfs_detach_btree_node_cache(struct inode *inode) 664 { 665 struct nilfs_inode_info *ii = NILFS_I(inode); 666 struct inode *btnc_inode = ii->i_assoc_inode; 667 668 if (btnc_inode) { 669 NILFS_I(btnc_inode)->i_assoc_inode = NULL; 670 ii->i_assoc_inode = NULL; 671 iput(btnc_inode); 672 } 673 } 674 675 /** 676 * nilfs_iget_for_shadow - obtain inode for shadow mapping 677 * @inode: inode object that uses shadow mapping 678 * 679 * nilfs_iget_for_shadow() allocates a pair of inodes that holds page 680 * caches for shadow mapping. The page cache for data pages is set up 681 * in one inode and the one for b-tree node pages is set up in the 682 * other inode, which is attached to the former inode. 683 * 684 * Return Value: On success, a pointer to the inode for data pages is 685 * returned. On errors, one of the following negative error code is returned 686 * in a pointer type. 687 * 688 * %-ENOMEM - Insufficient memory available. 689 */ 690 struct inode *nilfs_iget_for_shadow(struct inode *inode) 691 { 692 struct nilfs_iget_args args = { 693 .ino = inode->i_ino, .root = NULL, .cno = 0, 694 .type = NILFS_I_TYPE_SHADOW 695 }; 696 struct inode *s_inode; 697 int err; 698 699 s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, 700 nilfs_iget_set, &args); 701 if (unlikely(!s_inode)) 702 return ERR_PTR(-ENOMEM); 703 if (!(s_inode->i_state & I_NEW)) 704 return inode; 705 706 NILFS_I(s_inode)->i_flags = 0; 707 memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); 708 mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); 709 710 err = nilfs_attach_btree_node_cache(s_inode); 711 if (unlikely(err)) { 712 iget_failed(s_inode); 713 return ERR_PTR(err); 714 } 715 unlock_new_inode(s_inode); 716 return s_inode; 717 } 718 719 /** 720 * nilfs_write_inode_common - export common inode information to on-disk inode 721 * @inode: inode object 722 * @raw_inode: on-disk inode 723 * 724 * This function writes standard information from the on-memory inode @inode 725 * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap 726 * data is not exported, nilfs_bmap_write() must be called separately during 727 * log writing. 728 */ 729 void nilfs_write_inode_common(struct inode *inode, 730 struct nilfs_inode *raw_inode) 731 { 732 struct nilfs_inode_info *ii = NILFS_I(inode); 733 734 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 735 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); 736 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); 737 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 738 raw_inode->i_size = cpu_to_le64(inode->i_size); 739 raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); 740 raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); 741 raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); 742 raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); 743 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); 744 745 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 746 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 747 748 /* 749 * When extending inode, nilfs->ns_inode_size should be checked 750 * for substitutions of appended fields. 751 */ 752 } 753 754 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) 755 { 756 ino_t ino = inode->i_ino; 757 struct nilfs_inode_info *ii = NILFS_I(inode); 758 struct inode *ifile = ii->i_root->ifile; 759 struct nilfs_inode *raw_inode; 760 761 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); 762 763 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 764 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); 765 if (flags & I_DIRTY_DATASYNC) 766 set_bit(NILFS_I_INODE_SYNC, &ii->i_state); 767 768 nilfs_write_inode_common(inode, raw_inode); 769 770 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 771 raw_inode->i_device_code = 772 cpu_to_le64(huge_encode_dev(inode->i_rdev)); 773 774 nilfs_ifile_unmap_inode(raw_inode); 775 } 776 777 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ 778 779 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 780 unsigned long from) 781 { 782 __u64 b; 783 int ret; 784 785 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 786 return; 787 repeat: 788 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 789 if (ret == -ENOENT) 790 return; 791 else if (ret < 0) 792 goto failed; 793 794 if (b < from) 795 return; 796 797 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 798 ret = nilfs_bmap_truncate(ii->i_bmap, b); 799 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 800 if (!ret || (ret == -ENOMEM && 801 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 802 goto repeat; 803 804 failed: 805 nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", 806 ret, ii->vfs_inode.i_ino); 807 } 808 809 void nilfs_truncate(struct inode *inode) 810 { 811 unsigned long blkoff; 812 unsigned int blocksize; 813 struct nilfs_transaction_info ti; 814 struct super_block *sb = inode->i_sb; 815 struct nilfs_inode_info *ii = NILFS_I(inode); 816 817 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 818 return; 819 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 820 return; 821 822 blocksize = sb->s_blocksize; 823 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; 824 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 825 826 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); 827 828 nilfs_truncate_bmap(ii, blkoff); 829 830 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 831 if (IS_SYNC(inode)) 832 nilfs_set_transaction_flag(NILFS_TI_SYNC); 833 834 nilfs_mark_inode_dirty(inode); 835 nilfs_set_file_dirty(inode, 0); 836 nilfs_transaction_commit(sb); 837 /* 838 * May construct a logical segment and may fail in sync mode. 839 * But truncate has no return value. 840 */ 841 } 842 843 static void nilfs_clear_inode(struct inode *inode) 844 { 845 struct nilfs_inode_info *ii = NILFS_I(inode); 846 847 /* 848 * Free resources allocated in nilfs_read_inode(), here. 849 */ 850 BUG_ON(!list_empty(&ii->i_dirty)); 851 brelse(ii->i_bh); 852 ii->i_bh = NULL; 853 854 if (nilfs_is_metadata_file_inode(inode)) 855 nilfs_mdt_clear(inode); 856 857 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 858 nilfs_bmap_clear(ii->i_bmap); 859 860 if (!(ii->i_type & NILFS_I_TYPE_BTNC)) 861 nilfs_detach_btree_node_cache(inode); 862 863 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) 864 nilfs_put_root(ii->i_root); 865 } 866 867 void nilfs_evict_inode(struct inode *inode) 868 { 869 struct nilfs_transaction_info ti; 870 struct super_block *sb = inode->i_sb; 871 struct nilfs_inode_info *ii = NILFS_I(inode); 872 struct the_nilfs *nilfs; 873 int ret; 874 875 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 876 truncate_inode_pages_final(&inode->i_data); 877 clear_inode(inode); 878 nilfs_clear_inode(inode); 879 return; 880 } 881 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 882 883 truncate_inode_pages_final(&inode->i_data); 884 885 nilfs = sb->s_fs_info; 886 if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { 887 /* 888 * If this inode is about to be disposed after the file system 889 * has been degraded to read-only due to file system corruption 890 * or after the writer has been detached, do not make any 891 * changes that cause writes, just clear it. 892 * Do this check after read-locking ns_segctor_sem by 893 * nilfs_transaction_begin() in order to avoid a race with 894 * the writer detach operation. 895 */ 896 clear_inode(inode); 897 nilfs_clear_inode(inode); 898 nilfs_transaction_abort(sb); 899 return; 900 } 901 902 /* TODO: some of the following operations may fail. */ 903 nilfs_truncate_bmap(ii, 0); 904 nilfs_mark_inode_dirty(inode); 905 clear_inode(inode); 906 907 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 908 if (!ret) 909 atomic64_dec(&ii->i_root->inodes_count); 910 911 nilfs_clear_inode(inode); 912 913 if (IS_SYNC(inode)) 914 nilfs_set_transaction_flag(NILFS_TI_SYNC); 915 nilfs_transaction_commit(sb); 916 /* 917 * May construct a logical segment and may fail in sync mode. 918 * But delete_inode has no return value. 919 */ 920 } 921 922 int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 923 struct iattr *iattr) 924 { 925 struct nilfs_transaction_info ti; 926 struct inode *inode = d_inode(dentry); 927 struct super_block *sb = inode->i_sb; 928 int err; 929 930 err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 931 if (err) 932 return err; 933 934 err = nilfs_transaction_begin(sb, &ti, 0); 935 if (unlikely(err)) 936 return err; 937 938 if ((iattr->ia_valid & ATTR_SIZE) && 939 iattr->ia_size != i_size_read(inode)) { 940 inode_dio_wait(inode); 941 truncate_setsize(inode, iattr->ia_size); 942 nilfs_truncate(inode); 943 } 944 945 setattr_copy(&nop_mnt_idmap, inode, iattr); 946 mark_inode_dirty(inode); 947 948 if (iattr->ia_valid & ATTR_MODE) { 949 err = nilfs_acl_chmod(inode); 950 if (unlikely(err)) 951 goto out_err; 952 } 953 954 return nilfs_transaction_commit(sb); 955 956 out_err: 957 nilfs_transaction_abort(sb); 958 return err; 959 } 960 961 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, 962 int mask) 963 { 964 struct nilfs_root *root = NILFS_I(inode)->i_root; 965 966 if ((mask & MAY_WRITE) && root && 967 root->cno != NILFS_CPTREE_CURRENT_CNO) 968 return -EROFS; /* snapshot is not writable */ 969 970 return generic_permission(&nop_mnt_idmap, inode, mask); 971 } 972 973 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 974 { 975 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 976 struct nilfs_inode_info *ii = NILFS_I(inode); 977 int err; 978 979 spin_lock(&nilfs->ns_inode_lock); 980 if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { 981 spin_unlock(&nilfs->ns_inode_lock); 982 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 983 inode->i_ino, pbh); 984 if (unlikely(err)) 985 return err; 986 spin_lock(&nilfs->ns_inode_lock); 987 if (ii->i_bh == NULL) 988 ii->i_bh = *pbh; 989 else if (unlikely(!buffer_uptodate(ii->i_bh))) { 990 __brelse(ii->i_bh); 991 ii->i_bh = *pbh; 992 } else { 993 brelse(*pbh); 994 *pbh = ii->i_bh; 995 } 996 } else 997 *pbh = ii->i_bh; 998 999 get_bh(*pbh); 1000 spin_unlock(&nilfs->ns_inode_lock); 1001 return 0; 1002 } 1003 1004 int nilfs_inode_dirty(struct inode *inode) 1005 { 1006 struct nilfs_inode_info *ii = NILFS_I(inode); 1007 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1008 int ret = 0; 1009 1010 if (!list_empty(&ii->i_dirty)) { 1011 spin_lock(&nilfs->ns_inode_lock); 1012 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 1013 test_bit(NILFS_I_BUSY, &ii->i_state); 1014 spin_unlock(&nilfs->ns_inode_lock); 1015 } 1016 return ret; 1017 } 1018 1019 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) 1020 { 1021 struct nilfs_inode_info *ii = NILFS_I(inode); 1022 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1023 1024 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); 1025 1026 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 1027 return 0; 1028 1029 spin_lock(&nilfs->ns_inode_lock); 1030 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 1031 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 1032 /* 1033 * Because this routine may race with nilfs_dispose_list(), 1034 * we have to check NILFS_I_QUEUED here, too. 1035 */ 1036 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 1037 /* 1038 * This will happen when somebody is freeing 1039 * this inode. 1040 */ 1041 nilfs_warn(inode->i_sb, 1042 "cannot set file dirty (ino=%lu): the file is being freed", 1043 inode->i_ino); 1044 spin_unlock(&nilfs->ns_inode_lock); 1045 return -EINVAL; /* 1046 * NILFS_I_DIRTY may remain for 1047 * freeing inode. 1048 */ 1049 } 1050 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); 1051 set_bit(NILFS_I_QUEUED, &ii->i_state); 1052 } 1053 spin_unlock(&nilfs->ns_inode_lock); 1054 return 0; 1055 } 1056 1057 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 1058 { 1059 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1060 struct buffer_head *ibh; 1061 int err; 1062 1063 /* 1064 * Do not dirty inodes after the log writer has been detached 1065 * and its nilfs_root struct has been freed. 1066 */ 1067 if (unlikely(nilfs_purging(nilfs))) 1068 return 0; 1069 1070 err = nilfs_load_inode_block(inode, &ibh); 1071 if (unlikely(err)) { 1072 nilfs_warn(inode->i_sb, 1073 "cannot mark inode dirty (ino=%lu): error %d loading inode block", 1074 inode->i_ino, err); 1075 return err; 1076 } 1077 nilfs_update_inode(inode, ibh, flags); 1078 mark_buffer_dirty(ibh); 1079 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 1080 brelse(ibh); 1081 return 0; 1082 } 1083 1084 /** 1085 * nilfs_dirty_inode - reflect changes on given inode to an inode block. 1086 * @inode: inode of the file to be registered. 1087 * @flags: flags to determine the dirty state of the inode 1088 * 1089 * nilfs_dirty_inode() loads a inode block containing the specified 1090 * @inode and copies data from a nilfs_inode to a corresponding inode 1091 * entry in the inode block. This operation is excluded from the segment 1092 * construction. This function can be called both as a single operation 1093 * and as a part of indivisible file operations. 1094 */ 1095 void nilfs_dirty_inode(struct inode *inode, int flags) 1096 { 1097 struct nilfs_transaction_info ti; 1098 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 1099 1100 if (is_bad_inode(inode)) { 1101 nilfs_warn(inode->i_sb, 1102 "tried to mark bad_inode dirty. ignored."); 1103 dump_stack(); 1104 return; 1105 } 1106 if (mdi) { 1107 nilfs_mdt_mark_dirty(inode); 1108 return; 1109 } 1110 nilfs_transaction_begin(inode->i_sb, &ti, 0); 1111 __nilfs_mark_inode_dirty(inode, flags); 1112 nilfs_transaction_commit(inode->i_sb); /* never fails */ 1113 } 1114 1115 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1116 __u64 start, __u64 len) 1117 { 1118 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1119 __u64 logical = 0, phys = 0, size = 0; 1120 __u32 flags = 0; 1121 loff_t isize; 1122 sector_t blkoff, end_blkoff; 1123 sector_t delalloc_blkoff; 1124 unsigned long delalloc_blklen; 1125 unsigned int blkbits = inode->i_blkbits; 1126 int ret, n; 1127 1128 ret = fiemap_prep(inode, fieinfo, start, &len, 0); 1129 if (ret) 1130 return ret; 1131 1132 inode_lock(inode); 1133 1134 isize = i_size_read(inode); 1135 1136 blkoff = start >> blkbits; 1137 end_blkoff = (start + len - 1) >> blkbits; 1138 1139 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, 1140 &delalloc_blkoff); 1141 1142 do { 1143 __u64 blkphy; 1144 unsigned int maxblocks; 1145 1146 if (delalloc_blklen && blkoff == delalloc_blkoff) { 1147 if (size) { 1148 /* End of the current extent */ 1149 ret = fiemap_fill_next_extent( 1150 fieinfo, logical, phys, size, flags); 1151 if (ret) 1152 break; 1153 } 1154 if (blkoff > end_blkoff) 1155 break; 1156 1157 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; 1158 logical = blkoff << blkbits; 1159 phys = 0; 1160 size = delalloc_blklen << blkbits; 1161 1162 blkoff = delalloc_blkoff + delalloc_blklen; 1163 delalloc_blklen = nilfs_find_uncommitted_extent( 1164 inode, blkoff, &delalloc_blkoff); 1165 continue; 1166 } 1167 1168 /* 1169 * Limit the number of blocks that we look up so as 1170 * not to get into the next delayed allocation extent. 1171 */ 1172 maxblocks = INT_MAX; 1173 if (delalloc_blklen) 1174 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, 1175 maxblocks); 1176 blkphy = 0; 1177 1178 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1179 n = nilfs_bmap_lookup_contig( 1180 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); 1181 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1182 1183 if (n < 0) { 1184 int past_eof; 1185 1186 if (unlikely(n != -ENOENT)) 1187 break; /* error */ 1188 1189 /* HOLE */ 1190 blkoff++; 1191 past_eof = ((blkoff << blkbits) >= isize); 1192 1193 if (size) { 1194 /* End of the current extent */ 1195 1196 if (past_eof) 1197 flags |= FIEMAP_EXTENT_LAST; 1198 1199 ret = fiemap_fill_next_extent( 1200 fieinfo, logical, phys, size, flags); 1201 if (ret) 1202 break; 1203 size = 0; 1204 } 1205 if (blkoff > end_blkoff || past_eof) 1206 break; 1207 } else { 1208 if (size) { 1209 if (phys && blkphy << blkbits == phys + size) { 1210 /* The current extent goes on */ 1211 size += n << blkbits; 1212 } else { 1213 /* Terminate the current extent */ 1214 ret = fiemap_fill_next_extent( 1215 fieinfo, logical, phys, size, 1216 flags); 1217 if (ret || blkoff > end_blkoff) 1218 break; 1219 1220 /* Start another extent */ 1221 flags = FIEMAP_EXTENT_MERGED; 1222 logical = blkoff << blkbits; 1223 phys = blkphy << blkbits; 1224 size = n << blkbits; 1225 } 1226 } else { 1227 /* Start a new extent */ 1228 flags = FIEMAP_EXTENT_MERGED; 1229 logical = blkoff << blkbits; 1230 phys = blkphy << blkbits; 1231 size = n << blkbits; 1232 } 1233 blkoff += n; 1234 } 1235 cond_resched(); 1236 } while (true); 1237 1238 /* If ret is 1 then we just hit the end of the extent array */ 1239 if (ret == 1) 1240 ret = 0; 1241 1242 inode_unlock(inode); 1243 return ret; 1244 } 1245