1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS inode operations. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Ryusuke Konishi. 8 * 9 */ 10 11 #include <linux/buffer_head.h> 12 #include <linux/gfp.h> 13 #include <linux/mpage.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/uio.h> 17 #include <linux/fiemap.h> 18 #include <linux/random.h> 19 #include "nilfs.h" 20 #include "btnode.h" 21 #include "segment.h" 22 #include "page.h" 23 #include "mdt.h" 24 #include "cpfile.h" 25 #include "ifile.h" 26 27 /** 28 * struct nilfs_iget_args - arguments used during comparison between inodes 29 * @ino: inode number 30 * @cno: checkpoint number 31 * @root: pointer on NILFS root object (mounted checkpoint) 32 * @type: inode type 33 */ 34 struct nilfs_iget_args { 35 u64 ino; 36 __u64 cno; 37 struct nilfs_root *root; 38 unsigned int type; 39 }; 40 41 static int nilfs_iget_test(struct inode *inode, void *opaque); 42 43 void nilfs_inode_add_blocks(struct inode *inode, int n) 44 { 45 struct nilfs_root *root = NILFS_I(inode)->i_root; 46 47 inode_add_bytes(inode, i_blocksize(inode) * n); 48 if (root) 49 atomic64_add(n, &root->blocks_count); 50 } 51 52 void nilfs_inode_sub_blocks(struct inode *inode, int n) 53 { 54 struct nilfs_root *root = NILFS_I(inode)->i_root; 55 56 inode_sub_bytes(inode, i_blocksize(inode) * n); 57 if (root) 58 atomic64_sub(n, &root->blocks_count); 59 } 60 61 /** 62 * nilfs_get_block() - get a file block on the filesystem (callback function) 63 * @inode: inode struct of the target file 64 * @blkoff: file block number 65 * @bh_result: buffer head to be mapped on 66 * @create: indicate whether allocating the block or not when it has not 67 * been allocated yet. 68 * 69 * This function does not issue actual read request of the specified data 70 * block. It is done by VFS. 71 * 72 * Return: 0 on success, or a negative error code on failure. 73 */ 74 int nilfs_get_block(struct inode *inode, sector_t blkoff, 75 struct buffer_head *bh_result, int create) 76 { 77 struct nilfs_inode_info *ii = NILFS_I(inode); 78 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 79 __u64 blknum = 0; 80 int err = 0, ret; 81 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; 82 83 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 84 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 85 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 86 if (ret >= 0) { /* found */ 87 map_bh(bh_result, inode->i_sb, blknum); 88 if (ret > 0) 89 bh_result->b_size = (ret << inode->i_blkbits); 90 goto out; 91 } 92 /* data block was not found */ 93 if (ret == -ENOENT && create) { 94 struct nilfs_transaction_info ti; 95 96 bh_result->b_blocknr = 0; 97 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 98 if (unlikely(err)) 99 goto out; 100 err = nilfs_bmap_insert(ii->i_bmap, blkoff, 101 (unsigned long)bh_result); 102 if (unlikely(err != 0)) { 103 if (err == -EEXIST) { 104 /* 105 * The get_block() function could be called 106 * from multiple callers for an inode. 107 * However, the page having this block must 108 * be locked in this case. 109 */ 110 nilfs_warn(inode->i_sb, 111 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", 112 __func__, inode->i_ino, 113 (unsigned long long)blkoff); 114 err = -EAGAIN; 115 } 116 nilfs_transaction_abort(inode->i_sb); 117 goto out; 118 } 119 nilfs_mark_inode_dirty_sync(inode); 120 nilfs_transaction_commit(inode->i_sb); /* never fails */ 121 /* Error handling should be detailed */ 122 set_buffer_new(bh_result); 123 set_buffer_delay(bh_result); 124 map_bh(bh_result, inode->i_sb, 0); 125 /* Disk block number must be changed to proper value */ 126 127 } else if (ret == -ENOENT) { 128 /* 129 * not found is not error (e.g. hole); must return without 130 * the mapped state flag. 131 */ 132 ; 133 } else { 134 err = ret; 135 } 136 137 out: 138 return err; 139 } 140 141 /** 142 * nilfs_read_folio() - implement read_folio() method of nilfs_aops {} 143 * address_space_operations. 144 * @file: file struct of the file to be read 145 * @folio: the folio to be read 146 * 147 * Return: 0 on success, or a negative error code on failure. 148 */ 149 static int nilfs_read_folio(struct file *file, struct folio *folio) 150 { 151 return mpage_read_folio(folio, nilfs_get_block); 152 } 153 154 static void nilfs_readahead(struct readahead_control *rac) 155 { 156 mpage_readahead(rac, nilfs_get_block); 157 } 158 159 static int nilfs_writepages(struct address_space *mapping, 160 struct writeback_control *wbc) 161 { 162 struct inode *inode = mapping->host; 163 int err = 0; 164 165 if (sb_rdonly(inode->i_sb)) { 166 nilfs_clear_dirty_pages(mapping); 167 return -EROFS; 168 } 169 170 if (wbc->sync_mode == WB_SYNC_ALL) 171 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 172 wbc->range_start, 173 wbc->range_end); 174 return err; 175 } 176 177 static bool nilfs_dirty_folio(struct address_space *mapping, 178 struct folio *folio) 179 { 180 struct inode *inode = mapping->host; 181 struct buffer_head *head; 182 unsigned int nr_dirty = 0; 183 bool ret = filemap_dirty_folio(mapping, folio); 184 185 /* 186 * The page may not be locked, eg if called from try_to_unmap_one() 187 */ 188 spin_lock(&mapping->i_private_lock); 189 head = folio_buffers(folio); 190 if (head) { 191 struct buffer_head *bh = head; 192 193 do { 194 /* Do not mark hole blocks dirty */ 195 if (buffer_dirty(bh) || !buffer_mapped(bh)) 196 continue; 197 198 set_buffer_dirty(bh); 199 nr_dirty++; 200 } while (bh = bh->b_this_page, bh != head); 201 } else if (ret) { 202 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); 203 } 204 spin_unlock(&mapping->i_private_lock); 205 206 if (nr_dirty) 207 nilfs_set_file_dirty(inode, nr_dirty); 208 return ret; 209 } 210 211 void nilfs_write_failed(struct address_space *mapping, loff_t to) 212 { 213 struct inode *inode = mapping->host; 214 215 if (to > inode->i_size) { 216 truncate_pagecache(inode, inode->i_size); 217 nilfs_truncate(inode); 218 } 219 } 220 221 static int nilfs_write_begin(const struct kiocb *iocb, 222 struct address_space *mapping, 223 loff_t pos, unsigned len, 224 struct folio **foliop, void **fsdata) 225 226 { 227 struct inode *inode = mapping->host; 228 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); 229 230 if (unlikely(err)) 231 return err; 232 233 err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); 234 if (unlikely(err)) { 235 nilfs_write_failed(mapping, pos + len); 236 nilfs_transaction_abort(inode->i_sb); 237 } 238 return err; 239 } 240 241 static int nilfs_write_end(const struct kiocb *iocb, 242 struct address_space *mapping, 243 loff_t pos, unsigned len, unsigned copied, 244 struct folio *folio, void *fsdata) 245 { 246 struct inode *inode = mapping->host; 247 unsigned int start = pos & (PAGE_SIZE - 1); 248 unsigned int nr_dirty; 249 int err; 250 251 nr_dirty = nilfs_page_count_clean_buffers(folio, start, 252 start + copied); 253 copied = generic_write_end(iocb, mapping, pos, len, copied, folio, 254 fsdata); 255 nilfs_set_file_dirty(inode, nr_dirty); 256 err = nilfs_transaction_commit(inode->i_sb); 257 return err ? : copied; 258 } 259 260 static ssize_t 261 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 262 { 263 struct inode *inode = file_inode(iocb->ki_filp); 264 265 if (iov_iter_rw(iter) == WRITE) 266 return 0; 267 268 /* Needs synchronization with the cleaner */ 269 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); 270 } 271 272 const struct address_space_operations nilfs_aops = { 273 .read_folio = nilfs_read_folio, 274 .writepages = nilfs_writepages, 275 .dirty_folio = nilfs_dirty_folio, 276 .readahead = nilfs_readahead, 277 .write_begin = nilfs_write_begin, 278 .write_end = nilfs_write_end, 279 .invalidate_folio = block_invalidate_folio, 280 .direct_IO = nilfs_direct_IO, 281 .migrate_folio = buffer_migrate_folio_norefs, 282 .is_partially_uptodate = block_is_partially_uptodate, 283 }; 284 285 const struct address_space_operations nilfs_buffer_cache_aops = { 286 .invalidate_folio = block_invalidate_folio, 287 }; 288 289 static int nilfs_insert_inode_locked(struct inode *inode, 290 struct nilfs_root *root, 291 unsigned long ino) 292 { 293 struct nilfs_iget_args args = { 294 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 295 }; 296 297 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); 298 } 299 300 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) 301 { 302 struct super_block *sb = dir->i_sb; 303 struct inode *inode; 304 struct nilfs_inode_info *ii; 305 struct nilfs_root *root; 306 struct buffer_head *bh; 307 int err = -ENOMEM; 308 ino_t ino; 309 310 inode = new_inode(sb); 311 if (unlikely(!inode)) 312 goto failed; 313 314 mapping_set_gfp_mask(inode->i_mapping, 315 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 316 317 root = NILFS_I(dir)->i_root; 318 ii = NILFS_I(inode); 319 ii->i_state = BIT(NILFS_I_NEW); 320 ii->i_type = NILFS_I_TYPE_NORMAL; 321 ii->i_root = root; 322 323 err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); 324 if (unlikely(err)) 325 goto failed_ifile_create_inode; 326 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 327 ii->i_bh = bh; 328 329 atomic64_inc(&root->inodes_count); 330 inode_init_owner(&nop_mnt_idmap, inode, dir, mode); 331 inode->i_ino = ino; 332 simple_inode_init_ts(inode); 333 334 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 335 err = nilfs_bmap_read(ii->i_bmap, NULL); 336 if (err < 0) 337 goto failed_after_creation; 338 339 set_bit(NILFS_I_BMAP, &ii->i_state); 340 /* No lock is needed; iget() ensures it. */ 341 } 342 343 ii->i_flags = nilfs_mask_flags( 344 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); 345 346 /* ii->i_file_acl = 0; */ 347 /* ii->i_dir_acl = 0; */ 348 ii->i_dir_start_lookup = 0; 349 nilfs_set_inode_flags(inode); 350 inode->i_generation = get_random_u32(); 351 if (nilfs_insert_inode_locked(inode, root, ino) < 0) { 352 err = -EIO; 353 goto failed_after_creation; 354 } 355 356 err = nilfs_init_acl(inode, dir); 357 if (unlikely(err)) 358 /* 359 * Never occur. When supporting nilfs_init_acl(), 360 * proper cancellation of above jobs should be considered. 361 */ 362 goto failed_after_creation; 363 364 return inode; 365 366 failed_after_creation: 367 clear_nlink(inode); 368 if (inode->i_state & I_NEW) 369 unlock_new_inode(inode); 370 iput(inode); /* 371 * raw_inode will be deleted through 372 * nilfs_evict_inode(). 373 */ 374 goto failed; 375 376 failed_ifile_create_inode: 377 make_bad_inode(inode); 378 iput(inode); 379 failed: 380 return ERR_PTR(err); 381 } 382 383 void nilfs_set_inode_flags(struct inode *inode) 384 { 385 unsigned int flags = NILFS_I(inode)->i_flags; 386 unsigned int new_fl = 0; 387 388 if (flags & FS_SYNC_FL) 389 new_fl |= S_SYNC; 390 if (flags & FS_APPEND_FL) 391 new_fl |= S_APPEND; 392 if (flags & FS_IMMUTABLE_FL) 393 new_fl |= S_IMMUTABLE; 394 if (flags & FS_NOATIME_FL) 395 new_fl |= S_NOATIME; 396 if (flags & FS_DIRSYNC_FL) 397 new_fl |= S_DIRSYNC; 398 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | 399 S_NOATIME | S_DIRSYNC); 400 } 401 402 int nilfs_read_inode_common(struct inode *inode, 403 struct nilfs_inode *raw_inode) 404 { 405 struct nilfs_inode_info *ii = NILFS_I(inode); 406 int err; 407 408 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 409 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); 410 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); 411 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 412 inode->i_size = le64_to_cpu(raw_inode->i_size); 413 inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), 414 le32_to_cpu(raw_inode->i_mtime_nsec)); 415 inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), 416 le32_to_cpu(raw_inode->i_ctime_nsec)); 417 inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), 418 le32_to_cpu(raw_inode->i_mtime_nsec)); 419 if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) 420 return -EIO; /* this inode is for metadata and corrupted */ 421 if (inode->i_nlink == 0) 422 return -ESTALE; /* this inode is deleted */ 423 424 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); 425 ii->i_flags = le32_to_cpu(raw_inode->i_flags); 426 #if 0 427 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 428 ii->i_dir_acl = S_ISREG(inode->i_mode) ? 429 0 : le32_to_cpu(raw_inode->i_dir_acl); 430 #endif 431 ii->i_dir_start_lookup = 0; 432 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 433 434 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 435 S_ISLNK(inode->i_mode)) { 436 err = nilfs_bmap_read(ii->i_bmap, raw_inode); 437 if (err < 0) 438 return err; 439 set_bit(NILFS_I_BMAP, &ii->i_state); 440 /* No lock is needed; iget() ensures it. */ 441 } 442 return 0; 443 } 444 445 static int __nilfs_read_inode(struct super_block *sb, 446 struct nilfs_root *root, unsigned long ino, 447 struct inode *inode) 448 { 449 struct the_nilfs *nilfs = sb->s_fs_info; 450 struct buffer_head *bh; 451 struct nilfs_inode *raw_inode; 452 int err; 453 454 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 455 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 456 if (unlikely(err)) 457 goto bad_inode; 458 459 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); 460 461 err = nilfs_read_inode_common(inode, raw_inode); 462 if (err) 463 goto failed_unmap; 464 465 if (S_ISREG(inode->i_mode)) { 466 inode->i_op = &nilfs_file_inode_operations; 467 inode->i_fop = &nilfs_file_operations; 468 inode->i_mapping->a_ops = &nilfs_aops; 469 } else if (S_ISDIR(inode->i_mode)) { 470 inode->i_op = &nilfs_dir_inode_operations; 471 inode->i_fop = &nilfs_dir_operations; 472 inode->i_mapping->a_ops = &nilfs_aops; 473 } else if (S_ISLNK(inode->i_mode)) { 474 inode->i_op = &nilfs_symlink_inode_operations; 475 inode_nohighmem(inode); 476 inode->i_mapping->a_ops = &nilfs_aops; 477 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 478 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 479 inode->i_op = &nilfs_special_inode_operations; 480 init_special_inode( 481 inode, inode->i_mode, 482 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 483 } else { 484 nilfs_error(sb, 485 "invalid file type bits in mode 0%o for inode %lu", 486 inode->i_mode, ino); 487 err = -EIO; 488 goto failed_unmap; 489 } 490 nilfs_ifile_unmap_inode(raw_inode); 491 brelse(bh); 492 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 493 nilfs_set_inode_flags(inode); 494 mapping_set_gfp_mask(inode->i_mapping, 495 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 496 return 0; 497 498 failed_unmap: 499 nilfs_ifile_unmap_inode(raw_inode); 500 brelse(bh); 501 502 bad_inode: 503 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 504 return err; 505 } 506 507 static int nilfs_iget_test(struct inode *inode, void *opaque) 508 { 509 struct nilfs_iget_args *args = opaque; 510 struct nilfs_inode_info *ii; 511 512 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) 513 return 0; 514 515 ii = NILFS_I(inode); 516 if (ii->i_type != args->type) 517 return 0; 518 519 return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; 520 } 521 522 static int nilfs_iget_set(struct inode *inode, void *opaque) 523 { 524 struct nilfs_iget_args *args = opaque; 525 526 inode->i_ino = args->ino; 527 NILFS_I(inode)->i_cno = args->cno; 528 NILFS_I(inode)->i_root = args->root; 529 NILFS_I(inode)->i_type = args->type; 530 if (args->root && args->ino == NILFS_ROOT_INO) 531 nilfs_get_root(args->root); 532 return 0; 533 } 534 535 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, 536 unsigned long ino) 537 { 538 struct nilfs_iget_args args = { 539 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 540 }; 541 542 return ilookup5(sb, ino, nilfs_iget_test, &args); 543 } 544 545 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, 546 unsigned long ino) 547 { 548 struct nilfs_iget_args args = { 549 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 550 }; 551 552 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 553 } 554 555 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, 556 unsigned long ino) 557 { 558 struct inode *inode; 559 int err; 560 561 inode = nilfs_iget_locked(sb, root, ino); 562 if (unlikely(!inode)) 563 return ERR_PTR(-ENOMEM); 564 565 if (!(inode->i_state & I_NEW)) { 566 if (!inode->i_nlink) { 567 iput(inode); 568 return ERR_PTR(-ESTALE); 569 } 570 return inode; 571 } 572 573 err = __nilfs_read_inode(sb, root, ino, inode); 574 if (unlikely(err)) { 575 iget_failed(inode); 576 return ERR_PTR(err); 577 } 578 unlock_new_inode(inode); 579 return inode; 580 } 581 582 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, 583 __u64 cno) 584 { 585 struct nilfs_iget_args args = { 586 .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC 587 }; 588 struct inode *inode; 589 int err; 590 591 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 592 if (unlikely(!inode)) 593 return ERR_PTR(-ENOMEM); 594 if (!(inode->i_state & I_NEW)) 595 return inode; 596 597 err = nilfs_init_gcinode(inode); 598 if (unlikely(err)) { 599 iget_failed(inode); 600 return ERR_PTR(err); 601 } 602 unlock_new_inode(inode); 603 return inode; 604 } 605 606 /** 607 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode 608 * @inode: inode object 609 * 610 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, 611 * or does nothing if the inode already has it. This function allocates 612 * an additional inode to maintain page cache of B-tree nodes one-on-one. 613 * 614 * Return: 0 on success, or %-ENOMEM if memory is insufficient. 615 */ 616 int nilfs_attach_btree_node_cache(struct inode *inode) 617 { 618 struct nilfs_inode_info *ii = NILFS_I(inode); 619 struct inode *btnc_inode; 620 struct nilfs_iget_args args; 621 622 if (ii->i_assoc_inode) 623 return 0; 624 625 args.ino = inode->i_ino; 626 args.root = ii->i_root; 627 args.cno = ii->i_cno; 628 args.type = ii->i_type | NILFS_I_TYPE_BTNC; 629 630 btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, 631 nilfs_iget_set, &args); 632 if (unlikely(!btnc_inode)) 633 return -ENOMEM; 634 if (btnc_inode->i_state & I_NEW) { 635 nilfs_init_btnc_inode(btnc_inode); 636 unlock_new_inode(btnc_inode); 637 } 638 NILFS_I(btnc_inode)->i_assoc_inode = inode; 639 NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; 640 ii->i_assoc_inode = btnc_inode; 641 642 return 0; 643 } 644 645 /** 646 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode 647 * @inode: inode object 648 * 649 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its 650 * holder inode bound to @inode, or does nothing if @inode doesn't have it. 651 */ 652 void nilfs_detach_btree_node_cache(struct inode *inode) 653 { 654 struct nilfs_inode_info *ii = NILFS_I(inode); 655 struct inode *btnc_inode = ii->i_assoc_inode; 656 657 if (btnc_inode) { 658 NILFS_I(btnc_inode)->i_assoc_inode = NULL; 659 ii->i_assoc_inode = NULL; 660 iput(btnc_inode); 661 } 662 } 663 664 /** 665 * nilfs_iget_for_shadow - obtain inode for shadow mapping 666 * @inode: inode object that uses shadow mapping 667 * 668 * nilfs_iget_for_shadow() allocates a pair of inodes that holds page 669 * caches for shadow mapping. The page cache for data pages is set up 670 * in one inode and the one for b-tree node pages is set up in the 671 * other inode, which is attached to the former inode. 672 * 673 * Return: a pointer to the inode for data pages on success, or %-ENOMEM 674 * if memory is insufficient. 675 */ 676 struct inode *nilfs_iget_for_shadow(struct inode *inode) 677 { 678 struct nilfs_iget_args args = { 679 .ino = inode->i_ino, .root = NULL, .cno = 0, 680 .type = NILFS_I_TYPE_SHADOW 681 }; 682 struct inode *s_inode; 683 int err; 684 685 s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, 686 nilfs_iget_set, &args); 687 if (unlikely(!s_inode)) 688 return ERR_PTR(-ENOMEM); 689 if (!(s_inode->i_state & I_NEW)) 690 return inode; 691 692 NILFS_I(s_inode)->i_flags = 0; 693 memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); 694 mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); 695 s_inode->i_mapping->a_ops = &nilfs_buffer_cache_aops; 696 697 err = nilfs_attach_btree_node_cache(s_inode); 698 if (unlikely(err)) { 699 iget_failed(s_inode); 700 return ERR_PTR(err); 701 } 702 unlock_new_inode(s_inode); 703 return s_inode; 704 } 705 706 /** 707 * nilfs_write_inode_common - export common inode information to on-disk inode 708 * @inode: inode object 709 * @raw_inode: on-disk inode 710 * 711 * This function writes standard information from the on-memory inode @inode 712 * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap 713 * data is not exported, nilfs_bmap_write() must be called separately during 714 * log writing. 715 */ 716 void nilfs_write_inode_common(struct inode *inode, 717 struct nilfs_inode *raw_inode) 718 { 719 struct nilfs_inode_info *ii = NILFS_I(inode); 720 721 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 722 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); 723 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); 724 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 725 raw_inode->i_size = cpu_to_le64(inode->i_size); 726 raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); 727 raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); 728 raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); 729 raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); 730 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); 731 732 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 733 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 734 735 /* 736 * When extending inode, nilfs->ns_inode_size should be checked 737 * for substitutions of appended fields. 738 */ 739 } 740 741 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) 742 { 743 ino_t ino = inode->i_ino; 744 struct nilfs_inode_info *ii = NILFS_I(inode); 745 struct inode *ifile = ii->i_root->ifile; 746 struct nilfs_inode *raw_inode; 747 748 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); 749 750 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 751 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); 752 if (flags & I_DIRTY_DATASYNC) 753 set_bit(NILFS_I_INODE_SYNC, &ii->i_state); 754 755 nilfs_write_inode_common(inode, raw_inode); 756 757 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 758 raw_inode->i_device_code = 759 cpu_to_le64(huge_encode_dev(inode->i_rdev)); 760 761 nilfs_ifile_unmap_inode(raw_inode); 762 } 763 764 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ 765 766 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 767 unsigned long from) 768 { 769 __u64 b; 770 int ret; 771 772 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 773 return; 774 repeat: 775 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 776 if (ret == -ENOENT) 777 return; 778 else if (ret < 0) 779 goto failed; 780 781 if (b < from) 782 return; 783 784 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 785 ret = nilfs_bmap_truncate(ii->i_bmap, b); 786 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 787 if (!ret || (ret == -ENOMEM && 788 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 789 goto repeat; 790 791 failed: 792 nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", 793 ret, ii->vfs_inode.i_ino); 794 } 795 796 void nilfs_truncate(struct inode *inode) 797 { 798 unsigned long blkoff; 799 unsigned int blocksize; 800 struct nilfs_transaction_info ti; 801 struct super_block *sb = inode->i_sb; 802 struct nilfs_inode_info *ii = NILFS_I(inode); 803 804 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 805 return; 806 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 807 return; 808 809 blocksize = sb->s_blocksize; 810 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; 811 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 812 813 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); 814 815 nilfs_truncate_bmap(ii, blkoff); 816 817 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 818 if (IS_SYNC(inode)) 819 nilfs_set_transaction_flag(NILFS_TI_SYNC); 820 821 nilfs_mark_inode_dirty(inode); 822 nilfs_set_file_dirty(inode, 0); 823 nilfs_transaction_commit(sb); 824 /* 825 * May construct a logical segment and may fail in sync mode. 826 * But truncate has no return value. 827 */ 828 } 829 830 static void nilfs_clear_inode(struct inode *inode) 831 { 832 struct nilfs_inode_info *ii = NILFS_I(inode); 833 834 /* 835 * Free resources allocated in nilfs_read_inode(), here. 836 */ 837 BUG_ON(!list_empty(&ii->i_dirty)); 838 brelse(ii->i_bh); 839 ii->i_bh = NULL; 840 841 if (nilfs_is_metadata_file_inode(inode)) 842 nilfs_mdt_clear(inode); 843 844 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 845 nilfs_bmap_clear(ii->i_bmap); 846 847 if (!(ii->i_type & NILFS_I_TYPE_BTNC)) 848 nilfs_detach_btree_node_cache(inode); 849 850 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) 851 nilfs_put_root(ii->i_root); 852 } 853 854 void nilfs_evict_inode(struct inode *inode) 855 { 856 struct nilfs_transaction_info ti; 857 struct super_block *sb = inode->i_sb; 858 struct nilfs_inode_info *ii = NILFS_I(inode); 859 struct the_nilfs *nilfs; 860 int ret; 861 862 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 863 truncate_inode_pages_final(&inode->i_data); 864 clear_inode(inode); 865 nilfs_clear_inode(inode); 866 return; 867 } 868 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 869 870 truncate_inode_pages_final(&inode->i_data); 871 872 nilfs = sb->s_fs_info; 873 if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { 874 /* 875 * If this inode is about to be disposed after the file system 876 * has been degraded to read-only due to file system corruption 877 * or after the writer has been detached, do not make any 878 * changes that cause writes, just clear it. 879 * Do this check after read-locking ns_segctor_sem by 880 * nilfs_transaction_begin() in order to avoid a race with 881 * the writer detach operation. 882 */ 883 clear_inode(inode); 884 nilfs_clear_inode(inode); 885 nilfs_transaction_abort(sb); 886 return; 887 } 888 889 /* TODO: some of the following operations may fail. */ 890 nilfs_truncate_bmap(ii, 0); 891 nilfs_mark_inode_dirty(inode); 892 clear_inode(inode); 893 894 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 895 if (!ret) 896 atomic64_dec(&ii->i_root->inodes_count); 897 898 nilfs_clear_inode(inode); 899 900 if (IS_SYNC(inode)) 901 nilfs_set_transaction_flag(NILFS_TI_SYNC); 902 nilfs_transaction_commit(sb); 903 /* 904 * May construct a logical segment and may fail in sync mode. 905 * But delete_inode has no return value. 906 */ 907 } 908 909 int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 910 struct iattr *iattr) 911 { 912 struct nilfs_transaction_info ti; 913 struct inode *inode = d_inode(dentry); 914 struct super_block *sb = inode->i_sb; 915 int err; 916 917 err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 918 if (err) 919 return err; 920 921 err = nilfs_transaction_begin(sb, &ti, 0); 922 if (unlikely(err)) 923 return err; 924 925 if ((iattr->ia_valid & ATTR_SIZE) && 926 iattr->ia_size != i_size_read(inode)) { 927 inode_dio_wait(inode); 928 truncate_setsize(inode, iattr->ia_size); 929 nilfs_truncate(inode); 930 } 931 932 setattr_copy(&nop_mnt_idmap, inode, iattr); 933 mark_inode_dirty(inode); 934 935 if (iattr->ia_valid & ATTR_MODE) { 936 err = nilfs_acl_chmod(inode); 937 if (unlikely(err)) 938 goto out_err; 939 } 940 941 return nilfs_transaction_commit(sb); 942 943 out_err: 944 nilfs_transaction_abort(sb); 945 return err; 946 } 947 948 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, 949 int mask) 950 { 951 struct nilfs_root *root = NILFS_I(inode)->i_root; 952 953 if ((mask & MAY_WRITE) && root && 954 root->cno != NILFS_CPTREE_CURRENT_CNO) 955 return -EROFS; /* snapshot is not writable */ 956 957 return generic_permission(&nop_mnt_idmap, inode, mask); 958 } 959 960 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 961 { 962 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 963 struct nilfs_inode_info *ii = NILFS_I(inode); 964 int err; 965 966 spin_lock(&nilfs->ns_inode_lock); 967 if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { 968 spin_unlock(&nilfs->ns_inode_lock); 969 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 970 inode->i_ino, pbh); 971 if (unlikely(err)) 972 return err; 973 spin_lock(&nilfs->ns_inode_lock); 974 if (ii->i_bh == NULL) 975 ii->i_bh = *pbh; 976 else if (unlikely(!buffer_uptodate(ii->i_bh))) { 977 __brelse(ii->i_bh); 978 ii->i_bh = *pbh; 979 } else { 980 brelse(*pbh); 981 *pbh = ii->i_bh; 982 } 983 } else 984 *pbh = ii->i_bh; 985 986 get_bh(*pbh); 987 spin_unlock(&nilfs->ns_inode_lock); 988 return 0; 989 } 990 991 int nilfs_inode_dirty(struct inode *inode) 992 { 993 struct nilfs_inode_info *ii = NILFS_I(inode); 994 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 995 int ret = 0; 996 997 if (!list_empty(&ii->i_dirty)) { 998 spin_lock(&nilfs->ns_inode_lock); 999 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 1000 test_bit(NILFS_I_BUSY, &ii->i_state); 1001 spin_unlock(&nilfs->ns_inode_lock); 1002 } 1003 return ret; 1004 } 1005 1006 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) 1007 { 1008 struct nilfs_inode_info *ii = NILFS_I(inode); 1009 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1010 1011 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); 1012 1013 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 1014 return 0; 1015 1016 spin_lock(&nilfs->ns_inode_lock); 1017 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 1018 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 1019 /* 1020 * Because this routine may race with nilfs_dispose_list(), 1021 * we have to check NILFS_I_QUEUED here, too. 1022 */ 1023 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 1024 /* 1025 * This will happen when somebody is freeing 1026 * this inode. 1027 */ 1028 nilfs_warn(inode->i_sb, 1029 "cannot set file dirty (ino=%lu): the file is being freed", 1030 inode->i_ino); 1031 spin_unlock(&nilfs->ns_inode_lock); 1032 return -EINVAL; /* 1033 * NILFS_I_DIRTY may remain for 1034 * freeing inode. 1035 */ 1036 } 1037 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); 1038 set_bit(NILFS_I_QUEUED, &ii->i_state); 1039 } 1040 spin_unlock(&nilfs->ns_inode_lock); 1041 return 0; 1042 } 1043 1044 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 1045 { 1046 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1047 struct buffer_head *ibh; 1048 int err; 1049 1050 /* 1051 * Do not dirty inodes after the log writer has been detached 1052 * and its nilfs_root struct has been freed. 1053 */ 1054 if (unlikely(nilfs_purging(nilfs))) 1055 return 0; 1056 1057 err = nilfs_load_inode_block(inode, &ibh); 1058 if (unlikely(err)) { 1059 nilfs_warn(inode->i_sb, 1060 "cannot mark inode dirty (ino=%lu): error %d loading inode block", 1061 inode->i_ino, err); 1062 return err; 1063 } 1064 nilfs_update_inode(inode, ibh, flags); 1065 mark_buffer_dirty(ibh); 1066 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 1067 brelse(ibh); 1068 return 0; 1069 } 1070 1071 /** 1072 * nilfs_dirty_inode - reflect changes on given inode to an inode block. 1073 * @inode: inode of the file to be registered. 1074 * @flags: flags to determine the dirty state of the inode 1075 * 1076 * nilfs_dirty_inode() loads a inode block containing the specified 1077 * @inode and copies data from a nilfs_inode to a corresponding inode 1078 * entry in the inode block. This operation is excluded from the segment 1079 * construction. This function can be called both as a single operation 1080 * and as a part of indivisible file operations. 1081 */ 1082 void nilfs_dirty_inode(struct inode *inode, int flags) 1083 { 1084 struct nilfs_transaction_info ti; 1085 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 1086 1087 if (is_bad_inode(inode)) { 1088 nilfs_warn(inode->i_sb, 1089 "tried to mark bad_inode dirty. ignored."); 1090 dump_stack(); 1091 return; 1092 } 1093 if (mdi) { 1094 nilfs_mdt_mark_dirty(inode); 1095 return; 1096 } 1097 nilfs_transaction_begin(inode->i_sb, &ti, 0); 1098 __nilfs_mark_inode_dirty(inode, flags); 1099 nilfs_transaction_commit(inode->i_sb); /* never fails */ 1100 } 1101 1102 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1103 __u64 start, __u64 len) 1104 { 1105 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1106 __u64 logical = 0, phys = 0, size = 0; 1107 __u32 flags = 0; 1108 loff_t isize; 1109 sector_t blkoff, end_blkoff; 1110 sector_t delalloc_blkoff; 1111 unsigned long delalloc_blklen; 1112 unsigned int blkbits = inode->i_blkbits; 1113 int ret, n; 1114 1115 ret = fiemap_prep(inode, fieinfo, start, &len, 0); 1116 if (ret) 1117 return ret; 1118 1119 inode_lock(inode); 1120 1121 isize = i_size_read(inode); 1122 1123 blkoff = start >> blkbits; 1124 end_blkoff = (start + len - 1) >> blkbits; 1125 1126 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, 1127 &delalloc_blkoff); 1128 1129 do { 1130 __u64 blkphy; 1131 unsigned int maxblocks; 1132 1133 if (delalloc_blklen && blkoff == delalloc_blkoff) { 1134 if (size) { 1135 /* End of the current extent */ 1136 ret = fiemap_fill_next_extent( 1137 fieinfo, logical, phys, size, flags); 1138 if (ret) 1139 break; 1140 } 1141 if (blkoff > end_blkoff) 1142 break; 1143 1144 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; 1145 logical = blkoff << blkbits; 1146 phys = 0; 1147 size = delalloc_blklen << blkbits; 1148 1149 blkoff = delalloc_blkoff + delalloc_blklen; 1150 delalloc_blklen = nilfs_find_uncommitted_extent( 1151 inode, blkoff, &delalloc_blkoff); 1152 continue; 1153 } 1154 1155 /* 1156 * Limit the number of blocks that we look up so as 1157 * not to get into the next delayed allocation extent. 1158 */ 1159 maxblocks = INT_MAX; 1160 if (delalloc_blklen) 1161 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, 1162 maxblocks); 1163 blkphy = 0; 1164 1165 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1166 n = nilfs_bmap_lookup_contig( 1167 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); 1168 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1169 1170 if (n < 0) { 1171 int past_eof; 1172 1173 if (unlikely(n != -ENOENT)) 1174 break; /* error */ 1175 1176 /* HOLE */ 1177 blkoff++; 1178 past_eof = ((blkoff << blkbits) >= isize); 1179 1180 if (size) { 1181 /* End of the current extent */ 1182 1183 if (past_eof) 1184 flags |= FIEMAP_EXTENT_LAST; 1185 1186 ret = fiemap_fill_next_extent( 1187 fieinfo, logical, phys, size, flags); 1188 if (ret) 1189 break; 1190 size = 0; 1191 } 1192 if (blkoff > end_blkoff || past_eof) 1193 break; 1194 } else { 1195 if (size) { 1196 if (phys && blkphy << blkbits == phys + size) { 1197 /* The current extent goes on */ 1198 size += (u64)n << blkbits; 1199 } else { 1200 /* Terminate the current extent */ 1201 ret = fiemap_fill_next_extent( 1202 fieinfo, logical, phys, size, 1203 flags); 1204 if (ret || blkoff > end_blkoff) 1205 break; 1206 1207 /* Start another extent */ 1208 flags = FIEMAP_EXTENT_MERGED; 1209 logical = blkoff << blkbits; 1210 phys = blkphy << blkbits; 1211 size = (u64)n << blkbits; 1212 } 1213 } else { 1214 /* Start a new extent */ 1215 flags = FIEMAP_EXTENT_MERGED; 1216 logical = blkoff << blkbits; 1217 phys = blkphy << blkbits; 1218 size = (u64)n << blkbits; 1219 } 1220 blkoff += n; 1221 } 1222 cond_resched(); 1223 } while (true); 1224 1225 /* If ret is 1 then we just hit the end of the extent array */ 1226 if (ret == 1) 1227 ret = 0; 1228 1229 inode_unlock(inode); 1230 return ret; 1231 } 1232