1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS inode operations. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Ryusuke Konishi. 8 * 9 */ 10 11 #include <linux/buffer_head.h> 12 #include <linux/gfp.h> 13 #include <linux/mpage.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/uio.h> 17 #include <linux/fiemap.h> 18 #include <linux/random.h> 19 #include "nilfs.h" 20 #include "btnode.h" 21 #include "segment.h" 22 #include "page.h" 23 #include "mdt.h" 24 #include "cpfile.h" 25 #include "ifile.h" 26 27 /** 28 * struct nilfs_iget_args - arguments used during comparison between inodes 29 * @ino: inode number 30 * @cno: checkpoint number 31 * @root: pointer on NILFS root object (mounted checkpoint) 32 * @type: inode type 33 */ 34 struct nilfs_iget_args { 35 u64 ino; 36 __u64 cno; 37 struct nilfs_root *root; 38 unsigned int type; 39 }; 40 41 static int nilfs_iget_test(struct inode *inode, void *opaque); 42 43 void nilfs_inode_add_blocks(struct inode *inode, int n) 44 { 45 struct nilfs_root *root = NILFS_I(inode)->i_root; 46 47 inode_add_bytes(inode, i_blocksize(inode) * n); 48 if (root) 49 atomic64_add(n, &root->blocks_count); 50 } 51 52 void nilfs_inode_sub_blocks(struct inode *inode, int n) 53 { 54 struct nilfs_root *root = NILFS_I(inode)->i_root; 55 56 inode_sub_bytes(inode, i_blocksize(inode) * n); 57 if (root) 58 atomic64_sub(n, &root->blocks_count); 59 } 60 61 /** 62 * nilfs_get_block() - get a file block on the filesystem (callback function) 63 * @inode: inode struct of the target file 64 * @blkoff: file block number 65 * @bh_result: buffer head to be mapped on 66 * @create: indicate whether allocating the block or not when it has not 67 * been allocated yet. 68 * 69 * This function does not issue actual read request of the specified data 70 * block. It is done by VFS. 71 */ 72 int nilfs_get_block(struct inode *inode, sector_t blkoff, 73 struct buffer_head *bh_result, int create) 74 { 75 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 77 __u64 blknum = 0; 78 int err = 0, ret; 79 unsigned int maxblocks = bh_result->b_size >> inode->i_blkbits; 80 81 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 82 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 84 if (ret >= 0) { /* found */ 85 map_bh(bh_result, inode->i_sb, blknum); 86 if (ret > 0) 87 bh_result->b_size = (ret << inode->i_blkbits); 88 goto out; 89 } 90 /* data block was not found */ 91 if (ret == -ENOENT && create) { 92 struct nilfs_transaction_info ti; 93 94 bh_result->b_blocknr = 0; 95 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 96 if (unlikely(err)) 97 goto out; 98 err = nilfs_bmap_insert(ii->i_bmap, blkoff, 99 (unsigned long)bh_result); 100 if (unlikely(err != 0)) { 101 if (err == -EEXIST) { 102 /* 103 * The get_block() function could be called 104 * from multiple callers for an inode. 105 * However, the page having this block must 106 * be locked in this case. 107 */ 108 nilfs_warn(inode->i_sb, 109 "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", 110 __func__, inode->i_ino, 111 (unsigned long long)blkoff); 112 err = -EAGAIN; 113 } 114 nilfs_transaction_abort(inode->i_sb); 115 goto out; 116 } 117 nilfs_mark_inode_dirty_sync(inode); 118 nilfs_transaction_commit(inode->i_sb); /* never fails */ 119 /* Error handling should be detailed */ 120 set_buffer_new(bh_result); 121 set_buffer_delay(bh_result); 122 map_bh(bh_result, inode->i_sb, 0); 123 /* Disk block number must be changed to proper value */ 124 125 } else if (ret == -ENOENT) { 126 /* 127 * not found is not error (e.g. hole); must return without 128 * the mapped state flag. 129 */ 130 ; 131 } else { 132 err = ret; 133 } 134 135 out: 136 return err; 137 } 138 139 /** 140 * nilfs_read_folio() - implement read_folio() method of nilfs_aops {} 141 * address_space_operations. 142 * @file: file struct of the file to be read 143 * @folio: the folio to be read 144 */ 145 static int nilfs_read_folio(struct file *file, struct folio *folio) 146 { 147 return mpage_read_folio(folio, nilfs_get_block); 148 } 149 150 static void nilfs_readahead(struct readahead_control *rac) 151 { 152 mpage_readahead(rac, nilfs_get_block); 153 } 154 155 static int nilfs_writepages(struct address_space *mapping, 156 struct writeback_control *wbc) 157 { 158 struct inode *inode = mapping->host; 159 int err = 0; 160 161 if (sb_rdonly(inode->i_sb)) { 162 nilfs_clear_dirty_pages(mapping); 163 return -EROFS; 164 } 165 166 if (wbc->sync_mode == WB_SYNC_ALL) 167 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 168 wbc->range_start, 169 wbc->range_end); 170 return err; 171 } 172 173 static bool nilfs_dirty_folio(struct address_space *mapping, 174 struct folio *folio) 175 { 176 struct inode *inode = mapping->host; 177 struct buffer_head *head; 178 unsigned int nr_dirty = 0; 179 bool ret = filemap_dirty_folio(mapping, folio); 180 181 /* 182 * The page may not be locked, eg if called from try_to_unmap_one() 183 */ 184 spin_lock(&mapping->i_private_lock); 185 head = folio_buffers(folio); 186 if (head) { 187 struct buffer_head *bh = head; 188 189 do { 190 /* Do not mark hole blocks dirty */ 191 if (buffer_dirty(bh) || !buffer_mapped(bh)) 192 continue; 193 194 set_buffer_dirty(bh); 195 nr_dirty++; 196 } while (bh = bh->b_this_page, bh != head); 197 } else if (ret) { 198 nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); 199 } 200 spin_unlock(&mapping->i_private_lock); 201 202 if (nr_dirty) 203 nilfs_set_file_dirty(inode, nr_dirty); 204 return ret; 205 } 206 207 void nilfs_write_failed(struct address_space *mapping, loff_t to) 208 { 209 struct inode *inode = mapping->host; 210 211 if (to > inode->i_size) { 212 truncate_pagecache(inode, inode->i_size); 213 nilfs_truncate(inode); 214 } 215 } 216 217 static int nilfs_write_begin(struct file *file, struct address_space *mapping, 218 loff_t pos, unsigned len, 219 struct folio **foliop, void **fsdata) 220 221 { 222 struct inode *inode = mapping->host; 223 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); 224 225 if (unlikely(err)) 226 return err; 227 228 err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); 229 if (unlikely(err)) { 230 nilfs_write_failed(mapping, pos + len); 231 nilfs_transaction_abort(inode->i_sb); 232 } 233 return err; 234 } 235 236 static int nilfs_write_end(struct file *file, struct address_space *mapping, 237 loff_t pos, unsigned len, unsigned copied, 238 struct folio *folio, void *fsdata) 239 { 240 struct inode *inode = mapping->host; 241 unsigned int start = pos & (PAGE_SIZE - 1); 242 unsigned int nr_dirty; 243 int err; 244 245 nr_dirty = nilfs_page_count_clean_buffers(folio, start, 246 start + copied); 247 copied = generic_write_end(file, mapping, pos, len, copied, folio, 248 fsdata); 249 nilfs_set_file_dirty(inode, nr_dirty); 250 err = nilfs_transaction_commit(inode->i_sb); 251 return err ? : copied; 252 } 253 254 static ssize_t 255 nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 256 { 257 struct inode *inode = file_inode(iocb->ki_filp); 258 259 if (iov_iter_rw(iter) == WRITE) 260 return 0; 261 262 /* Needs synchronization with the cleaner */ 263 return blockdev_direct_IO(iocb, inode, iter, nilfs_get_block); 264 } 265 266 const struct address_space_operations nilfs_aops = { 267 .read_folio = nilfs_read_folio, 268 .writepages = nilfs_writepages, 269 .dirty_folio = nilfs_dirty_folio, 270 .readahead = nilfs_readahead, 271 .write_begin = nilfs_write_begin, 272 .write_end = nilfs_write_end, 273 .invalidate_folio = block_invalidate_folio, 274 .direct_IO = nilfs_direct_IO, 275 .migrate_folio = buffer_migrate_folio_norefs, 276 .is_partially_uptodate = block_is_partially_uptodate, 277 }; 278 279 static int nilfs_insert_inode_locked(struct inode *inode, 280 struct nilfs_root *root, 281 unsigned long ino) 282 { 283 struct nilfs_iget_args args = { 284 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 285 }; 286 287 return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); 288 } 289 290 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) 291 { 292 struct super_block *sb = dir->i_sb; 293 struct inode *inode; 294 struct nilfs_inode_info *ii; 295 struct nilfs_root *root; 296 struct buffer_head *bh; 297 int err = -ENOMEM; 298 ino_t ino; 299 300 inode = new_inode(sb); 301 if (unlikely(!inode)) 302 goto failed; 303 304 mapping_set_gfp_mask(inode->i_mapping, 305 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 306 307 root = NILFS_I(dir)->i_root; 308 ii = NILFS_I(inode); 309 ii->i_state = BIT(NILFS_I_NEW); 310 ii->i_type = NILFS_I_TYPE_NORMAL; 311 ii->i_root = root; 312 313 err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); 314 if (unlikely(err)) 315 goto failed_ifile_create_inode; 316 /* reference count of i_bh inherits from nilfs_mdt_read_block() */ 317 ii->i_bh = bh; 318 319 atomic64_inc(&root->inodes_count); 320 inode_init_owner(&nop_mnt_idmap, inode, dir, mode); 321 inode->i_ino = ino; 322 simple_inode_init_ts(inode); 323 324 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 325 err = nilfs_bmap_read(ii->i_bmap, NULL); 326 if (err < 0) 327 goto failed_after_creation; 328 329 set_bit(NILFS_I_BMAP, &ii->i_state); 330 /* No lock is needed; iget() ensures it. */ 331 } 332 333 ii->i_flags = nilfs_mask_flags( 334 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); 335 336 /* ii->i_file_acl = 0; */ 337 /* ii->i_dir_acl = 0; */ 338 ii->i_dir_start_lookup = 0; 339 nilfs_set_inode_flags(inode); 340 inode->i_generation = get_random_u32(); 341 if (nilfs_insert_inode_locked(inode, root, ino) < 0) { 342 err = -EIO; 343 goto failed_after_creation; 344 } 345 346 err = nilfs_init_acl(inode, dir); 347 if (unlikely(err)) 348 /* 349 * Never occur. When supporting nilfs_init_acl(), 350 * proper cancellation of above jobs should be considered. 351 */ 352 goto failed_after_creation; 353 354 return inode; 355 356 failed_after_creation: 357 clear_nlink(inode); 358 if (inode->i_state & I_NEW) 359 unlock_new_inode(inode); 360 iput(inode); /* 361 * raw_inode will be deleted through 362 * nilfs_evict_inode(). 363 */ 364 goto failed; 365 366 failed_ifile_create_inode: 367 make_bad_inode(inode); 368 iput(inode); 369 failed: 370 return ERR_PTR(err); 371 } 372 373 void nilfs_set_inode_flags(struct inode *inode) 374 { 375 unsigned int flags = NILFS_I(inode)->i_flags; 376 unsigned int new_fl = 0; 377 378 if (flags & FS_SYNC_FL) 379 new_fl |= S_SYNC; 380 if (flags & FS_APPEND_FL) 381 new_fl |= S_APPEND; 382 if (flags & FS_IMMUTABLE_FL) 383 new_fl |= S_IMMUTABLE; 384 if (flags & FS_NOATIME_FL) 385 new_fl |= S_NOATIME; 386 if (flags & FS_DIRSYNC_FL) 387 new_fl |= S_DIRSYNC; 388 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE | 389 S_NOATIME | S_DIRSYNC); 390 } 391 392 int nilfs_read_inode_common(struct inode *inode, 393 struct nilfs_inode *raw_inode) 394 { 395 struct nilfs_inode_info *ii = NILFS_I(inode); 396 int err; 397 398 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 399 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid)); 400 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); 401 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 402 inode->i_size = le64_to_cpu(raw_inode->i_size); 403 inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), 404 le32_to_cpu(raw_inode->i_mtime_nsec)); 405 inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), 406 le32_to_cpu(raw_inode->i_ctime_nsec)); 407 inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), 408 le32_to_cpu(raw_inode->i_mtime_nsec)); 409 if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) 410 return -EIO; /* this inode is for metadata and corrupted */ 411 if (inode->i_nlink == 0) 412 return -ESTALE; /* this inode is deleted */ 413 414 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); 415 ii->i_flags = le32_to_cpu(raw_inode->i_flags); 416 #if 0 417 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); 418 ii->i_dir_acl = S_ISREG(inode->i_mode) ? 419 0 : le32_to_cpu(raw_inode->i_dir_acl); 420 #endif 421 ii->i_dir_start_lookup = 0; 422 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 423 424 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 425 S_ISLNK(inode->i_mode)) { 426 err = nilfs_bmap_read(ii->i_bmap, raw_inode); 427 if (err < 0) 428 return err; 429 set_bit(NILFS_I_BMAP, &ii->i_state); 430 /* No lock is needed; iget() ensures it. */ 431 } 432 return 0; 433 } 434 435 static int __nilfs_read_inode(struct super_block *sb, 436 struct nilfs_root *root, unsigned long ino, 437 struct inode *inode) 438 { 439 struct the_nilfs *nilfs = sb->s_fs_info; 440 struct buffer_head *bh; 441 struct nilfs_inode *raw_inode; 442 int err; 443 444 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 445 err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); 446 if (unlikely(err)) 447 goto bad_inode; 448 449 raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); 450 451 err = nilfs_read_inode_common(inode, raw_inode); 452 if (err) 453 goto failed_unmap; 454 455 if (S_ISREG(inode->i_mode)) { 456 inode->i_op = &nilfs_file_inode_operations; 457 inode->i_fop = &nilfs_file_operations; 458 inode->i_mapping->a_ops = &nilfs_aops; 459 } else if (S_ISDIR(inode->i_mode)) { 460 inode->i_op = &nilfs_dir_inode_operations; 461 inode->i_fop = &nilfs_dir_operations; 462 inode->i_mapping->a_ops = &nilfs_aops; 463 } else if (S_ISLNK(inode->i_mode)) { 464 inode->i_op = &nilfs_symlink_inode_operations; 465 inode_nohighmem(inode); 466 inode->i_mapping->a_ops = &nilfs_aops; 467 } else { 468 inode->i_op = &nilfs_special_inode_operations; 469 init_special_inode( 470 inode, inode->i_mode, 471 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 472 } 473 nilfs_ifile_unmap_inode(raw_inode); 474 brelse(bh); 475 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 476 nilfs_set_inode_flags(inode); 477 mapping_set_gfp_mask(inode->i_mapping, 478 mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); 479 return 0; 480 481 failed_unmap: 482 nilfs_ifile_unmap_inode(raw_inode); 483 brelse(bh); 484 485 bad_inode: 486 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 487 return err; 488 } 489 490 static int nilfs_iget_test(struct inode *inode, void *opaque) 491 { 492 struct nilfs_iget_args *args = opaque; 493 struct nilfs_inode_info *ii; 494 495 if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) 496 return 0; 497 498 ii = NILFS_I(inode); 499 if (ii->i_type != args->type) 500 return 0; 501 502 return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; 503 } 504 505 static int nilfs_iget_set(struct inode *inode, void *opaque) 506 { 507 struct nilfs_iget_args *args = opaque; 508 509 inode->i_ino = args->ino; 510 NILFS_I(inode)->i_cno = args->cno; 511 NILFS_I(inode)->i_root = args->root; 512 NILFS_I(inode)->i_type = args->type; 513 if (args->root && args->ino == NILFS_ROOT_INO) 514 nilfs_get_root(args->root); 515 return 0; 516 } 517 518 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, 519 unsigned long ino) 520 { 521 struct nilfs_iget_args args = { 522 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 523 }; 524 525 return ilookup5(sb, ino, nilfs_iget_test, &args); 526 } 527 528 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, 529 unsigned long ino) 530 { 531 struct nilfs_iget_args args = { 532 .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL 533 }; 534 535 return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 536 } 537 538 struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, 539 unsigned long ino) 540 { 541 struct inode *inode; 542 int err; 543 544 inode = nilfs_iget_locked(sb, root, ino); 545 if (unlikely(!inode)) 546 return ERR_PTR(-ENOMEM); 547 if (!(inode->i_state & I_NEW)) 548 return inode; 549 550 err = __nilfs_read_inode(sb, root, ino, inode); 551 if (unlikely(err)) { 552 iget_failed(inode); 553 return ERR_PTR(err); 554 } 555 unlock_new_inode(inode); 556 return inode; 557 } 558 559 struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, 560 __u64 cno) 561 { 562 struct nilfs_iget_args args = { 563 .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC 564 }; 565 struct inode *inode; 566 int err; 567 568 inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); 569 if (unlikely(!inode)) 570 return ERR_PTR(-ENOMEM); 571 if (!(inode->i_state & I_NEW)) 572 return inode; 573 574 err = nilfs_init_gcinode(inode); 575 if (unlikely(err)) { 576 iget_failed(inode); 577 return ERR_PTR(err); 578 } 579 unlock_new_inode(inode); 580 return inode; 581 } 582 583 /** 584 * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode 585 * @inode: inode object 586 * 587 * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, 588 * or does nothing if the inode already has it. This function allocates 589 * an additional inode to maintain page cache of B-tree nodes one-on-one. 590 * 591 * Return Value: On success, 0 is returned. On errors, one of the following 592 * negative error code is returned. 593 * 594 * %-ENOMEM - Insufficient memory available. 595 */ 596 int nilfs_attach_btree_node_cache(struct inode *inode) 597 { 598 struct nilfs_inode_info *ii = NILFS_I(inode); 599 struct inode *btnc_inode; 600 struct nilfs_iget_args args; 601 602 if (ii->i_assoc_inode) 603 return 0; 604 605 args.ino = inode->i_ino; 606 args.root = ii->i_root; 607 args.cno = ii->i_cno; 608 args.type = ii->i_type | NILFS_I_TYPE_BTNC; 609 610 btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, 611 nilfs_iget_set, &args); 612 if (unlikely(!btnc_inode)) 613 return -ENOMEM; 614 if (btnc_inode->i_state & I_NEW) { 615 nilfs_init_btnc_inode(btnc_inode); 616 unlock_new_inode(btnc_inode); 617 } 618 NILFS_I(btnc_inode)->i_assoc_inode = inode; 619 NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; 620 ii->i_assoc_inode = btnc_inode; 621 622 return 0; 623 } 624 625 /** 626 * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode 627 * @inode: inode object 628 * 629 * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its 630 * holder inode bound to @inode, or does nothing if @inode doesn't have it. 631 */ 632 void nilfs_detach_btree_node_cache(struct inode *inode) 633 { 634 struct nilfs_inode_info *ii = NILFS_I(inode); 635 struct inode *btnc_inode = ii->i_assoc_inode; 636 637 if (btnc_inode) { 638 NILFS_I(btnc_inode)->i_assoc_inode = NULL; 639 ii->i_assoc_inode = NULL; 640 iput(btnc_inode); 641 } 642 } 643 644 /** 645 * nilfs_iget_for_shadow - obtain inode for shadow mapping 646 * @inode: inode object that uses shadow mapping 647 * 648 * nilfs_iget_for_shadow() allocates a pair of inodes that holds page 649 * caches for shadow mapping. The page cache for data pages is set up 650 * in one inode and the one for b-tree node pages is set up in the 651 * other inode, which is attached to the former inode. 652 * 653 * Return Value: On success, a pointer to the inode for data pages is 654 * returned. On errors, one of the following negative error code is returned 655 * in a pointer type. 656 * 657 * %-ENOMEM - Insufficient memory available. 658 */ 659 struct inode *nilfs_iget_for_shadow(struct inode *inode) 660 { 661 struct nilfs_iget_args args = { 662 .ino = inode->i_ino, .root = NULL, .cno = 0, 663 .type = NILFS_I_TYPE_SHADOW 664 }; 665 struct inode *s_inode; 666 int err; 667 668 s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, 669 nilfs_iget_set, &args); 670 if (unlikely(!s_inode)) 671 return ERR_PTR(-ENOMEM); 672 if (!(s_inode->i_state & I_NEW)) 673 return inode; 674 675 NILFS_I(s_inode)->i_flags = 0; 676 memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); 677 mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); 678 679 err = nilfs_attach_btree_node_cache(s_inode); 680 if (unlikely(err)) { 681 iget_failed(s_inode); 682 return ERR_PTR(err); 683 } 684 unlock_new_inode(s_inode); 685 return s_inode; 686 } 687 688 /** 689 * nilfs_write_inode_common - export common inode information to on-disk inode 690 * @inode: inode object 691 * @raw_inode: on-disk inode 692 * 693 * This function writes standard information from the on-memory inode @inode 694 * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap 695 * data is not exported, nilfs_bmap_write() must be called separately during 696 * log writing. 697 */ 698 void nilfs_write_inode_common(struct inode *inode, 699 struct nilfs_inode *raw_inode) 700 { 701 struct nilfs_inode_info *ii = NILFS_I(inode); 702 703 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 704 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode)); 705 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); 706 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 707 raw_inode->i_size = cpu_to_le64(inode->i_size); 708 raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); 709 raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); 710 raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); 711 raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); 712 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); 713 714 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 715 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 716 717 /* 718 * When extending inode, nilfs->ns_inode_size should be checked 719 * for substitutions of appended fields. 720 */ 721 } 722 723 void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) 724 { 725 ino_t ino = inode->i_ino; 726 struct nilfs_inode_info *ii = NILFS_I(inode); 727 struct inode *ifile = ii->i_root->ifile; 728 struct nilfs_inode *raw_inode; 729 730 raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); 731 732 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 733 memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); 734 if (flags & I_DIRTY_DATASYNC) 735 set_bit(NILFS_I_INODE_SYNC, &ii->i_state); 736 737 nilfs_write_inode_common(inode, raw_inode); 738 739 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 740 raw_inode->i_device_code = 741 cpu_to_le64(huge_encode_dev(inode->i_rdev)); 742 743 nilfs_ifile_unmap_inode(raw_inode); 744 } 745 746 #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ 747 748 static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 749 unsigned long from) 750 { 751 __u64 b; 752 int ret; 753 754 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 755 return; 756 repeat: 757 ret = nilfs_bmap_last_key(ii->i_bmap, &b); 758 if (ret == -ENOENT) 759 return; 760 else if (ret < 0) 761 goto failed; 762 763 if (b < from) 764 return; 765 766 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 767 ret = nilfs_bmap_truncate(ii->i_bmap, b); 768 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 769 if (!ret || (ret == -ENOMEM && 770 nilfs_bmap_truncate(ii->i_bmap, b) == 0)) 771 goto repeat; 772 773 failed: 774 nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", 775 ret, ii->vfs_inode.i_ino); 776 } 777 778 void nilfs_truncate(struct inode *inode) 779 { 780 unsigned long blkoff; 781 unsigned int blocksize; 782 struct nilfs_transaction_info ti; 783 struct super_block *sb = inode->i_sb; 784 struct nilfs_inode_info *ii = NILFS_I(inode); 785 786 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 787 return; 788 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 789 return; 790 791 blocksize = sb->s_blocksize; 792 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; 793 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 794 795 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); 796 797 nilfs_truncate_bmap(ii, blkoff); 798 799 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 800 if (IS_SYNC(inode)) 801 nilfs_set_transaction_flag(NILFS_TI_SYNC); 802 803 nilfs_mark_inode_dirty(inode); 804 nilfs_set_file_dirty(inode, 0); 805 nilfs_transaction_commit(sb); 806 /* 807 * May construct a logical segment and may fail in sync mode. 808 * But truncate has no return value. 809 */ 810 } 811 812 static void nilfs_clear_inode(struct inode *inode) 813 { 814 struct nilfs_inode_info *ii = NILFS_I(inode); 815 816 /* 817 * Free resources allocated in nilfs_read_inode(), here. 818 */ 819 BUG_ON(!list_empty(&ii->i_dirty)); 820 brelse(ii->i_bh); 821 ii->i_bh = NULL; 822 823 if (nilfs_is_metadata_file_inode(inode)) 824 nilfs_mdt_clear(inode); 825 826 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 827 nilfs_bmap_clear(ii->i_bmap); 828 829 if (!(ii->i_type & NILFS_I_TYPE_BTNC)) 830 nilfs_detach_btree_node_cache(inode); 831 832 if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) 833 nilfs_put_root(ii->i_root); 834 } 835 836 void nilfs_evict_inode(struct inode *inode) 837 { 838 struct nilfs_transaction_info ti; 839 struct super_block *sb = inode->i_sb; 840 struct nilfs_inode_info *ii = NILFS_I(inode); 841 struct the_nilfs *nilfs; 842 int ret; 843 844 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 845 truncate_inode_pages_final(&inode->i_data); 846 clear_inode(inode); 847 nilfs_clear_inode(inode); 848 return; 849 } 850 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 851 852 truncate_inode_pages_final(&inode->i_data); 853 854 nilfs = sb->s_fs_info; 855 if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) { 856 /* 857 * If this inode is about to be disposed after the file system 858 * has been degraded to read-only due to file system corruption 859 * or after the writer has been detached, do not make any 860 * changes that cause writes, just clear it. 861 * Do this check after read-locking ns_segctor_sem by 862 * nilfs_transaction_begin() in order to avoid a race with 863 * the writer detach operation. 864 */ 865 clear_inode(inode); 866 nilfs_clear_inode(inode); 867 nilfs_transaction_abort(sb); 868 return; 869 } 870 871 /* TODO: some of the following operations may fail. */ 872 nilfs_truncate_bmap(ii, 0); 873 nilfs_mark_inode_dirty(inode); 874 clear_inode(inode); 875 876 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 877 if (!ret) 878 atomic64_dec(&ii->i_root->inodes_count); 879 880 nilfs_clear_inode(inode); 881 882 if (IS_SYNC(inode)) 883 nilfs_set_transaction_flag(NILFS_TI_SYNC); 884 nilfs_transaction_commit(sb); 885 /* 886 * May construct a logical segment and may fail in sync mode. 887 * But delete_inode has no return value. 888 */ 889 } 890 891 int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 892 struct iattr *iattr) 893 { 894 struct nilfs_transaction_info ti; 895 struct inode *inode = d_inode(dentry); 896 struct super_block *sb = inode->i_sb; 897 int err; 898 899 err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 900 if (err) 901 return err; 902 903 err = nilfs_transaction_begin(sb, &ti, 0); 904 if (unlikely(err)) 905 return err; 906 907 if ((iattr->ia_valid & ATTR_SIZE) && 908 iattr->ia_size != i_size_read(inode)) { 909 inode_dio_wait(inode); 910 truncate_setsize(inode, iattr->ia_size); 911 nilfs_truncate(inode); 912 } 913 914 setattr_copy(&nop_mnt_idmap, inode, iattr); 915 mark_inode_dirty(inode); 916 917 if (iattr->ia_valid & ATTR_MODE) { 918 err = nilfs_acl_chmod(inode); 919 if (unlikely(err)) 920 goto out_err; 921 } 922 923 return nilfs_transaction_commit(sb); 924 925 out_err: 926 nilfs_transaction_abort(sb); 927 return err; 928 } 929 930 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode, 931 int mask) 932 { 933 struct nilfs_root *root = NILFS_I(inode)->i_root; 934 935 if ((mask & MAY_WRITE) && root && 936 root->cno != NILFS_CPTREE_CURRENT_CNO) 937 return -EROFS; /* snapshot is not writable */ 938 939 return generic_permission(&nop_mnt_idmap, inode, mask); 940 } 941 942 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 943 { 944 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 945 struct nilfs_inode_info *ii = NILFS_I(inode); 946 int err; 947 948 spin_lock(&nilfs->ns_inode_lock); 949 if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) { 950 spin_unlock(&nilfs->ns_inode_lock); 951 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 952 inode->i_ino, pbh); 953 if (unlikely(err)) 954 return err; 955 spin_lock(&nilfs->ns_inode_lock); 956 if (ii->i_bh == NULL) 957 ii->i_bh = *pbh; 958 else if (unlikely(!buffer_uptodate(ii->i_bh))) { 959 __brelse(ii->i_bh); 960 ii->i_bh = *pbh; 961 } else { 962 brelse(*pbh); 963 *pbh = ii->i_bh; 964 } 965 } else 966 *pbh = ii->i_bh; 967 968 get_bh(*pbh); 969 spin_unlock(&nilfs->ns_inode_lock); 970 return 0; 971 } 972 973 int nilfs_inode_dirty(struct inode *inode) 974 { 975 struct nilfs_inode_info *ii = NILFS_I(inode); 976 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 977 int ret = 0; 978 979 if (!list_empty(&ii->i_dirty)) { 980 spin_lock(&nilfs->ns_inode_lock); 981 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 982 test_bit(NILFS_I_BUSY, &ii->i_state); 983 spin_unlock(&nilfs->ns_inode_lock); 984 } 985 return ret; 986 } 987 988 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) 989 { 990 struct nilfs_inode_info *ii = NILFS_I(inode); 991 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 992 993 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); 994 995 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 996 return 0; 997 998 spin_lock(&nilfs->ns_inode_lock); 999 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 1000 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 1001 /* 1002 * Because this routine may race with nilfs_dispose_list(), 1003 * we have to check NILFS_I_QUEUED here, too. 1004 */ 1005 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 1006 /* 1007 * This will happen when somebody is freeing 1008 * this inode. 1009 */ 1010 nilfs_warn(inode->i_sb, 1011 "cannot set file dirty (ino=%lu): the file is being freed", 1012 inode->i_ino); 1013 spin_unlock(&nilfs->ns_inode_lock); 1014 return -EINVAL; /* 1015 * NILFS_I_DIRTY may remain for 1016 * freeing inode. 1017 */ 1018 } 1019 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); 1020 set_bit(NILFS_I_QUEUED, &ii->i_state); 1021 } 1022 spin_unlock(&nilfs->ns_inode_lock); 1023 return 0; 1024 } 1025 1026 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 1027 { 1028 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1029 struct buffer_head *ibh; 1030 int err; 1031 1032 /* 1033 * Do not dirty inodes after the log writer has been detached 1034 * and its nilfs_root struct has been freed. 1035 */ 1036 if (unlikely(nilfs_purging(nilfs))) 1037 return 0; 1038 1039 err = nilfs_load_inode_block(inode, &ibh); 1040 if (unlikely(err)) { 1041 nilfs_warn(inode->i_sb, 1042 "cannot mark inode dirty (ino=%lu): error %d loading inode block", 1043 inode->i_ino, err); 1044 return err; 1045 } 1046 nilfs_update_inode(inode, ibh, flags); 1047 mark_buffer_dirty(ibh); 1048 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 1049 brelse(ibh); 1050 return 0; 1051 } 1052 1053 /** 1054 * nilfs_dirty_inode - reflect changes on given inode to an inode block. 1055 * @inode: inode of the file to be registered. 1056 * @flags: flags to determine the dirty state of the inode 1057 * 1058 * nilfs_dirty_inode() loads a inode block containing the specified 1059 * @inode and copies data from a nilfs_inode to a corresponding inode 1060 * entry in the inode block. This operation is excluded from the segment 1061 * construction. This function can be called both as a single operation 1062 * and as a part of indivisible file operations. 1063 */ 1064 void nilfs_dirty_inode(struct inode *inode, int flags) 1065 { 1066 struct nilfs_transaction_info ti; 1067 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 1068 1069 if (is_bad_inode(inode)) { 1070 nilfs_warn(inode->i_sb, 1071 "tried to mark bad_inode dirty. ignored."); 1072 dump_stack(); 1073 return; 1074 } 1075 if (mdi) { 1076 nilfs_mdt_mark_dirty(inode); 1077 return; 1078 } 1079 nilfs_transaction_begin(inode->i_sb, &ti, 0); 1080 __nilfs_mark_inode_dirty(inode, flags); 1081 nilfs_transaction_commit(inode->i_sb); /* never fails */ 1082 } 1083 1084 int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1085 __u64 start, __u64 len) 1086 { 1087 struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1088 __u64 logical = 0, phys = 0, size = 0; 1089 __u32 flags = 0; 1090 loff_t isize; 1091 sector_t blkoff, end_blkoff; 1092 sector_t delalloc_blkoff; 1093 unsigned long delalloc_blklen; 1094 unsigned int blkbits = inode->i_blkbits; 1095 int ret, n; 1096 1097 ret = fiemap_prep(inode, fieinfo, start, &len, 0); 1098 if (ret) 1099 return ret; 1100 1101 inode_lock(inode); 1102 1103 isize = i_size_read(inode); 1104 1105 blkoff = start >> blkbits; 1106 end_blkoff = (start + len - 1) >> blkbits; 1107 1108 delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, 1109 &delalloc_blkoff); 1110 1111 do { 1112 __u64 blkphy; 1113 unsigned int maxblocks; 1114 1115 if (delalloc_blklen && blkoff == delalloc_blkoff) { 1116 if (size) { 1117 /* End of the current extent */ 1118 ret = fiemap_fill_next_extent( 1119 fieinfo, logical, phys, size, flags); 1120 if (ret) 1121 break; 1122 } 1123 if (blkoff > end_blkoff) 1124 break; 1125 1126 flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; 1127 logical = blkoff << blkbits; 1128 phys = 0; 1129 size = delalloc_blklen << blkbits; 1130 1131 blkoff = delalloc_blkoff + delalloc_blklen; 1132 delalloc_blklen = nilfs_find_uncommitted_extent( 1133 inode, blkoff, &delalloc_blkoff); 1134 continue; 1135 } 1136 1137 /* 1138 * Limit the number of blocks that we look up so as 1139 * not to get into the next delayed allocation extent. 1140 */ 1141 maxblocks = INT_MAX; 1142 if (delalloc_blklen) 1143 maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, 1144 maxblocks); 1145 blkphy = 0; 1146 1147 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1148 n = nilfs_bmap_lookup_contig( 1149 NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); 1150 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 1151 1152 if (n < 0) { 1153 int past_eof; 1154 1155 if (unlikely(n != -ENOENT)) 1156 break; /* error */ 1157 1158 /* HOLE */ 1159 blkoff++; 1160 past_eof = ((blkoff << blkbits) >= isize); 1161 1162 if (size) { 1163 /* End of the current extent */ 1164 1165 if (past_eof) 1166 flags |= FIEMAP_EXTENT_LAST; 1167 1168 ret = fiemap_fill_next_extent( 1169 fieinfo, logical, phys, size, flags); 1170 if (ret) 1171 break; 1172 size = 0; 1173 } 1174 if (blkoff > end_blkoff || past_eof) 1175 break; 1176 } else { 1177 if (size) { 1178 if (phys && blkphy << blkbits == phys + size) { 1179 /* The current extent goes on */ 1180 size += n << blkbits; 1181 } else { 1182 /* Terminate the current extent */ 1183 ret = fiemap_fill_next_extent( 1184 fieinfo, logical, phys, size, 1185 flags); 1186 if (ret || blkoff > end_blkoff) 1187 break; 1188 1189 /* Start another extent */ 1190 flags = FIEMAP_EXTENT_MERGED; 1191 logical = blkoff << blkbits; 1192 phys = blkphy << blkbits; 1193 size = n << blkbits; 1194 } 1195 } else { 1196 /* Start a new extent */ 1197 flags = FIEMAP_EXTENT_MERGED; 1198 logical = blkoff << blkbits; 1199 phys = blkphy << blkbits; 1200 size = n << blkbits; 1201 } 1202 blkoff += n; 1203 } 1204 cond_resched(); 1205 } while (true); 1206 1207 /* If ret is 1 then we just hit the end of the extent array */ 1208 if (ret == 1) 1209 ret = 0; 1210 1211 inode_unlock(inode); 1212 return ret; 1213 } 1214