1 /* 2 * linux/fs/inode.c 3 * 4 * (C) 1997 Linus Torvalds 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/mm.h> 9 #include <linux/dcache.h> 10 #include <linux/init.h> 11 #include <linux/slab.h> 12 #include <linux/writeback.h> 13 #include <linux/module.h> 14 #include <linux/backing-dev.h> 15 #include <linux/wait.h> 16 #include <linux/rwsem.h> 17 #include <linux/hash.h> 18 #include <linux/swap.h> 19 #include <linux/security.h> 20 #include <linux/pagemap.h> 21 #include <linux/cdev.h> 22 #include <linux/bootmem.h> 23 #include <linux/fsnotify.h> 24 #include <linux/mount.h> 25 #include <linux/async.h> 26 #include <linux/posix_acl.h> 27 #include <linux/ima.h> 28 29 /* 30 * This is needed for the following functions: 31 * - inode_has_buffers 32 * - invalidate_bdev 33 * 34 * FIXME: remove all knowledge of the buffer layer from this file 35 */ 36 #include <linux/buffer_head.h> 37 38 /* 39 * New inode.c implementation. 40 * 41 * This implementation has the basic premise of trying 42 * to be extremely low-overhead and SMP-safe, yet be 43 * simple enough to be "obviously correct". 44 * 45 * Famous last words. 46 */ 47 48 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ 49 50 /* #define INODE_PARANOIA 1 */ 51 /* #define INODE_DEBUG 1 */ 52 53 /* 54 * Inode lookup is no longer as critical as it used to be: 55 * most of the lookups are going to be through the dcache. 56 */ 57 #define I_HASHBITS i_hash_shift 58 #define I_HASHMASK i_hash_mask 59 60 static unsigned int i_hash_mask __read_mostly; 61 static unsigned int i_hash_shift __read_mostly; 62 63 /* 64 * Each inode can be on two separate lists. One is 65 * the hash list of the inode, used for lookups. The 66 * other linked list is the "type" list: 67 * "in_use" - valid inode, i_count > 0, i_nlink > 0 68 * "dirty" - as "in_use" but also dirty 69 * "unused" - valid inode, i_count = 0 70 * 71 * A "dirty" list is maintained for each super block, 72 * allowing for low-overhead inode sync() operations. 73 */ 74 75 static LIST_HEAD(inode_lru); 76 static struct hlist_head *inode_hashtable __read_mostly; 77 78 /* 79 * A simple spinlock to protect the list manipulations. 80 * 81 * NOTE! You also have to own the lock if you change 82 * the i_state of an inode while it is in use.. 83 */ 84 DEFINE_SPINLOCK(inode_lock); 85 86 /* 87 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 88 * icache shrinking path, and the umount path. Without this exclusion, 89 * by the time prune_icache calls iput for the inode whose pages it has 90 * been invalidating, or by the time it calls clear_inode & destroy_inode 91 * from its final dispose_list, the struct super_block they refer to 92 * (for inode->i_sb->s_op) may already have been freed and reused. 93 * 94 * We make this an rwsem because the fastpath is icache shrinking. In 95 * some cases a filesystem may be doing a significant amount of work in 96 * its inode reclaim code, so this should improve parallelism. 97 */ 98 static DECLARE_RWSEM(iprune_sem); 99 100 /* 101 * Statistics gathering.. 102 */ 103 struct inodes_stat_t inodes_stat; 104 105 static DEFINE_PER_CPU(unsigned int, nr_inodes); 106 107 static struct kmem_cache *inode_cachep __read_mostly; 108 109 static int get_nr_inodes(void) 110 { 111 int i; 112 int sum = 0; 113 for_each_possible_cpu(i) 114 sum += per_cpu(nr_inodes, i); 115 return sum < 0 ? 0 : sum; 116 } 117 118 static inline int get_nr_inodes_unused(void) 119 { 120 return inodes_stat.nr_unused; 121 } 122 123 int get_nr_dirty_inodes(void) 124 { 125 /* not actually dirty inodes, but a wild approximation */ 126 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 127 return nr_dirty > 0 ? nr_dirty : 0; 128 } 129 130 /* 131 * Handle nr_inode sysctl 132 */ 133 #ifdef CONFIG_SYSCTL 134 int proc_nr_inodes(ctl_table *table, int write, 135 void __user *buffer, size_t *lenp, loff_t *ppos) 136 { 137 inodes_stat.nr_inodes = get_nr_inodes(); 138 return proc_dointvec(table, write, buffer, lenp, ppos); 139 } 140 #endif 141 142 static void wake_up_inode(struct inode *inode) 143 { 144 /* 145 * Prevent speculative execution through spin_unlock(&inode_lock); 146 */ 147 smp_mb(); 148 wake_up_bit(&inode->i_state, __I_NEW); 149 } 150 151 /** 152 * inode_init_always - perform inode structure intialisation 153 * @sb: superblock inode belongs to 154 * @inode: inode to initialise 155 * 156 * These are initializations that need to be done on every inode 157 * allocation as the fields are not initialised by slab allocation. 158 */ 159 int inode_init_always(struct super_block *sb, struct inode *inode) 160 { 161 static const struct address_space_operations empty_aops; 162 static const struct inode_operations empty_iops; 163 static const struct file_operations empty_fops; 164 struct address_space *const mapping = &inode->i_data; 165 166 inode->i_sb = sb; 167 inode->i_blkbits = sb->s_blocksize_bits; 168 inode->i_flags = 0; 169 atomic_set(&inode->i_count, 1); 170 inode->i_op = &empty_iops; 171 inode->i_fop = &empty_fops; 172 inode->i_nlink = 1; 173 inode->i_uid = 0; 174 inode->i_gid = 0; 175 atomic_set(&inode->i_writecount, 0); 176 inode->i_size = 0; 177 inode->i_blocks = 0; 178 inode->i_bytes = 0; 179 inode->i_generation = 0; 180 #ifdef CONFIG_QUOTA 181 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 182 #endif 183 inode->i_pipe = NULL; 184 inode->i_bdev = NULL; 185 inode->i_cdev = NULL; 186 inode->i_rdev = 0; 187 inode->dirtied_when = 0; 188 189 if (security_inode_alloc(inode)) 190 goto out; 191 spin_lock_init(&inode->i_lock); 192 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 193 194 mutex_init(&inode->i_mutex); 195 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 196 197 init_rwsem(&inode->i_alloc_sem); 198 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 199 200 mapping->a_ops = &empty_aops; 201 mapping->host = inode; 202 mapping->flags = 0; 203 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 204 mapping->assoc_mapping = NULL; 205 mapping->backing_dev_info = &default_backing_dev_info; 206 mapping->writeback_index = 0; 207 208 /* 209 * If the block_device provides a backing_dev_info for client 210 * inodes then use that. Otherwise the inode share the bdev's 211 * backing_dev_info. 212 */ 213 if (sb->s_bdev) { 214 struct backing_dev_info *bdi; 215 216 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 217 mapping->backing_dev_info = bdi; 218 } 219 inode->i_private = NULL; 220 inode->i_mapping = mapping; 221 #ifdef CONFIG_FS_POSIX_ACL 222 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 223 #endif 224 225 #ifdef CONFIG_FSNOTIFY 226 inode->i_fsnotify_mask = 0; 227 #endif 228 229 this_cpu_inc(nr_inodes); 230 231 return 0; 232 out: 233 return -ENOMEM; 234 } 235 EXPORT_SYMBOL(inode_init_always); 236 237 static struct inode *alloc_inode(struct super_block *sb) 238 { 239 struct inode *inode; 240 241 if (sb->s_op->alloc_inode) 242 inode = sb->s_op->alloc_inode(sb); 243 else 244 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 245 246 if (!inode) 247 return NULL; 248 249 if (unlikely(inode_init_always(sb, inode))) { 250 if (inode->i_sb->s_op->destroy_inode) 251 inode->i_sb->s_op->destroy_inode(inode); 252 else 253 kmem_cache_free(inode_cachep, inode); 254 return NULL; 255 } 256 257 return inode; 258 } 259 260 void free_inode_nonrcu(struct inode *inode) 261 { 262 kmem_cache_free(inode_cachep, inode); 263 } 264 EXPORT_SYMBOL(free_inode_nonrcu); 265 266 void __destroy_inode(struct inode *inode) 267 { 268 BUG_ON(inode_has_buffers(inode)); 269 security_inode_free(inode); 270 fsnotify_inode_delete(inode); 271 #ifdef CONFIG_FS_POSIX_ACL 272 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) 273 posix_acl_release(inode->i_acl); 274 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 275 posix_acl_release(inode->i_default_acl); 276 #endif 277 this_cpu_dec(nr_inodes); 278 } 279 EXPORT_SYMBOL(__destroy_inode); 280 281 static void i_callback(struct rcu_head *head) 282 { 283 struct inode *inode = container_of(head, struct inode, i_rcu); 284 INIT_LIST_HEAD(&inode->i_dentry); 285 kmem_cache_free(inode_cachep, inode); 286 } 287 288 static void destroy_inode(struct inode *inode) 289 { 290 BUG_ON(!list_empty(&inode->i_lru)); 291 __destroy_inode(inode); 292 if (inode->i_sb->s_op->destroy_inode) 293 inode->i_sb->s_op->destroy_inode(inode); 294 else 295 call_rcu(&inode->i_rcu, i_callback); 296 } 297 298 void address_space_init_once(struct address_space *mapping) 299 { 300 memset(mapping, 0, sizeof(*mapping)); 301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); 302 spin_lock_init(&mapping->tree_lock); 303 spin_lock_init(&mapping->i_mmap_lock); 304 INIT_LIST_HEAD(&mapping->private_list); 305 spin_lock_init(&mapping->private_lock); 306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); 307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 308 mutex_init(&mapping->unmap_mutex); 309 } 310 EXPORT_SYMBOL(address_space_init_once); 311 312 /* 313 * These are initializations that only need to be done 314 * once, because the fields are idempotent across use 315 * of the inode, so let the slab aware of that. 316 */ 317 void inode_init_once(struct inode *inode) 318 { 319 memset(inode, 0, sizeof(*inode)); 320 INIT_HLIST_NODE(&inode->i_hash); 321 INIT_LIST_HEAD(&inode->i_dentry); 322 INIT_LIST_HEAD(&inode->i_devices); 323 INIT_LIST_HEAD(&inode->i_wb_list); 324 INIT_LIST_HEAD(&inode->i_lru); 325 address_space_init_once(&inode->i_data); 326 i_size_ordered_init(inode); 327 #ifdef CONFIG_FSNOTIFY 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 329 #endif 330 } 331 EXPORT_SYMBOL(inode_init_once); 332 333 static void init_once(void *foo) 334 { 335 struct inode *inode = (struct inode *) foo; 336 337 inode_init_once(inode); 338 } 339 340 /* 341 * inode_lock must be held 342 */ 343 void __iget(struct inode *inode) 344 { 345 atomic_inc(&inode->i_count); 346 } 347 348 /* 349 * get additional reference to inode; caller must already hold one. 350 */ 351 void ihold(struct inode *inode) 352 { 353 WARN_ON(atomic_inc_return(&inode->i_count) < 2); 354 } 355 EXPORT_SYMBOL(ihold); 356 357 static void inode_lru_list_add(struct inode *inode) 358 { 359 if (list_empty(&inode->i_lru)) { 360 list_add(&inode->i_lru, &inode_lru); 361 inodes_stat.nr_unused++; 362 } 363 } 364 365 static void inode_lru_list_del(struct inode *inode) 366 { 367 if (!list_empty(&inode->i_lru)) { 368 list_del_init(&inode->i_lru); 369 inodes_stat.nr_unused--; 370 } 371 } 372 373 static inline void __inode_sb_list_add(struct inode *inode) 374 { 375 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 376 } 377 378 /** 379 * inode_sb_list_add - add inode to the superblock list of inodes 380 * @inode: inode to add 381 */ 382 void inode_sb_list_add(struct inode *inode) 383 { 384 spin_lock(&inode_lock); 385 __inode_sb_list_add(inode); 386 spin_unlock(&inode_lock); 387 } 388 EXPORT_SYMBOL_GPL(inode_sb_list_add); 389 390 static inline void __inode_sb_list_del(struct inode *inode) 391 { 392 list_del_init(&inode->i_sb_list); 393 } 394 395 static unsigned long hash(struct super_block *sb, unsigned long hashval) 396 { 397 unsigned long tmp; 398 399 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 400 L1_CACHE_BYTES; 401 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); 402 return tmp & I_HASHMASK; 403 } 404 405 /** 406 * __insert_inode_hash - hash an inode 407 * @inode: unhashed inode 408 * @hashval: unsigned long value used to locate this object in the 409 * inode_hashtable. 410 * 411 * Add an inode to the inode hash for this superblock. 412 */ 413 void __insert_inode_hash(struct inode *inode, unsigned long hashval) 414 { 415 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 416 417 spin_lock(&inode_lock); 418 hlist_add_head(&inode->i_hash, b); 419 spin_unlock(&inode_lock); 420 } 421 EXPORT_SYMBOL(__insert_inode_hash); 422 423 /** 424 * __remove_inode_hash - remove an inode from the hash 425 * @inode: inode to unhash 426 * 427 * Remove an inode from the superblock. 428 */ 429 static void __remove_inode_hash(struct inode *inode) 430 { 431 hlist_del_init(&inode->i_hash); 432 } 433 434 /** 435 * remove_inode_hash - remove an inode from the hash 436 * @inode: inode to unhash 437 * 438 * Remove an inode from the superblock. 439 */ 440 void remove_inode_hash(struct inode *inode) 441 { 442 spin_lock(&inode_lock); 443 hlist_del_init(&inode->i_hash); 444 spin_unlock(&inode_lock); 445 } 446 EXPORT_SYMBOL(remove_inode_hash); 447 448 void end_writeback(struct inode *inode) 449 { 450 might_sleep(); 451 BUG_ON(inode->i_data.nrpages); 452 BUG_ON(!list_empty(&inode->i_data.private_list)); 453 BUG_ON(!(inode->i_state & I_FREEING)); 454 BUG_ON(inode->i_state & I_CLEAR); 455 inode_sync_wait(inode); 456 /* don't need i_lock here, no concurrent mods to i_state */ 457 inode->i_state = I_FREEING | I_CLEAR; 458 } 459 EXPORT_SYMBOL(end_writeback); 460 461 static void evict(struct inode *inode) 462 { 463 const struct super_operations *op = inode->i_sb->s_op; 464 465 if (op->evict_inode) { 466 op->evict_inode(inode); 467 } else { 468 if (inode->i_data.nrpages) 469 truncate_inode_pages(&inode->i_data, 0); 470 end_writeback(inode); 471 } 472 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 473 bd_forget(inode); 474 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 475 cd_forget(inode); 476 } 477 478 /* 479 * dispose_list - dispose of the contents of a local list 480 * @head: the head of the list to free 481 * 482 * Dispose-list gets a local list with local inodes in it, so it doesn't 483 * need to worry about list corruption and SMP locks. 484 */ 485 static void dispose_list(struct list_head *head) 486 { 487 while (!list_empty(head)) { 488 struct inode *inode; 489 490 inode = list_first_entry(head, struct inode, i_lru); 491 list_del_init(&inode->i_lru); 492 493 evict(inode); 494 495 spin_lock(&inode_lock); 496 __remove_inode_hash(inode); 497 __inode_sb_list_del(inode); 498 spin_unlock(&inode_lock); 499 500 wake_up_inode(inode); 501 destroy_inode(inode); 502 } 503 } 504 505 /** 506 * evict_inodes - evict all evictable inodes for a superblock 507 * @sb: superblock to operate on 508 * 509 * Make sure that no inodes with zero refcount are retained. This is 510 * called by superblock shutdown after having MS_ACTIVE flag removed, 511 * so any inode reaching zero refcount during or after that call will 512 * be immediately evicted. 513 */ 514 void evict_inodes(struct super_block *sb) 515 { 516 struct inode *inode, *next; 517 LIST_HEAD(dispose); 518 519 down_write(&iprune_sem); 520 521 spin_lock(&inode_lock); 522 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 523 if (atomic_read(&inode->i_count)) 524 continue; 525 526 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 527 WARN_ON(1); 528 continue; 529 } 530 531 inode->i_state |= I_FREEING; 532 533 /* 534 * Move the inode off the IO lists and LRU once I_FREEING is 535 * set so that it won't get moved back on there if it is dirty. 536 */ 537 list_move(&inode->i_lru, &dispose); 538 list_del_init(&inode->i_wb_list); 539 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 540 inodes_stat.nr_unused--; 541 } 542 spin_unlock(&inode_lock); 543 544 dispose_list(&dispose); 545 up_write(&iprune_sem); 546 } 547 548 /** 549 * invalidate_inodes - attempt to free all inodes on a superblock 550 * @sb: superblock to operate on 551 * @kill_dirty: flag to guide handling of dirty inodes 552 * 553 * Attempts to free all inodes for a given superblock. If there were any 554 * busy inodes return a non-zero value, else zero. 555 * If @kill_dirty is set, discard dirty inodes too, otherwise treat 556 * them as busy. 557 */ 558 int invalidate_inodes(struct super_block *sb, bool kill_dirty) 559 { 560 int busy = 0; 561 struct inode *inode, *next; 562 LIST_HEAD(dispose); 563 564 down_write(&iprune_sem); 565 566 spin_lock(&inode_lock); 567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 569 continue; 570 if (inode->i_state & I_DIRTY && !kill_dirty) { 571 busy = 1; 572 continue; 573 } 574 if (atomic_read(&inode->i_count)) { 575 busy = 1; 576 continue; 577 } 578 579 inode->i_state |= I_FREEING; 580 581 /* 582 * Move the inode off the IO lists and LRU once I_FREEING is 583 * set so that it won't get moved back on there if it is dirty. 584 */ 585 list_move(&inode->i_lru, &dispose); 586 list_del_init(&inode->i_wb_list); 587 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 588 inodes_stat.nr_unused--; 589 } 590 spin_unlock(&inode_lock); 591 592 dispose_list(&dispose); 593 up_write(&iprune_sem); 594 595 return busy; 596 } 597 598 static int can_unuse(struct inode *inode) 599 { 600 if (inode->i_state & ~I_REFERENCED) 601 return 0; 602 if (inode_has_buffers(inode)) 603 return 0; 604 if (atomic_read(&inode->i_count)) 605 return 0; 606 if (inode->i_data.nrpages) 607 return 0; 608 return 1; 609 } 610 611 /* 612 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 613 * temporary list and then are freed outside inode_lock by dispose_list(). 614 * 615 * Any inodes which are pinned purely because of attached pagecache have their 616 * pagecache removed. If the inode has metadata buffers attached to 617 * mapping->private_list then try to remove them. 618 * 619 * If the inode has the I_REFERENCED flag set, then it means that it has been 620 * used recently - the flag is set in iput_final(). When we encounter such an 621 * inode, clear the flag and move it to the back of the LRU so it gets another 622 * pass through the LRU before it gets reclaimed. This is necessary because of 623 * the fact we are doing lazy LRU updates to minimise lock contention so the 624 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 625 * with this flag set because they are the inodes that are out of order. 626 */ 627 static void prune_icache(int nr_to_scan) 628 { 629 LIST_HEAD(freeable); 630 int nr_scanned; 631 unsigned long reap = 0; 632 633 down_read(&iprune_sem); 634 spin_lock(&inode_lock); 635 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 636 struct inode *inode; 637 638 if (list_empty(&inode_lru)) 639 break; 640 641 inode = list_entry(inode_lru.prev, struct inode, i_lru); 642 643 /* 644 * Referenced or dirty inodes are still in use. Give them 645 * another pass through the LRU as we canot reclaim them now. 646 */ 647 if (atomic_read(&inode->i_count) || 648 (inode->i_state & ~I_REFERENCED)) { 649 list_del_init(&inode->i_lru); 650 inodes_stat.nr_unused--; 651 continue; 652 } 653 654 /* recently referenced inodes get one more pass */ 655 if (inode->i_state & I_REFERENCED) { 656 list_move(&inode->i_lru, &inode_lru); 657 inode->i_state &= ~I_REFERENCED; 658 continue; 659 } 660 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 661 __iget(inode); 662 spin_unlock(&inode_lock); 663 if (remove_inode_buffers(inode)) 664 reap += invalidate_mapping_pages(&inode->i_data, 665 0, -1); 666 iput(inode); 667 spin_lock(&inode_lock); 668 669 if (inode != list_entry(inode_lru.next, 670 struct inode, i_lru)) 671 continue; /* wrong inode or list_empty */ 672 if (!can_unuse(inode)) 673 continue; 674 } 675 WARN_ON(inode->i_state & I_NEW); 676 inode->i_state |= I_FREEING; 677 678 /* 679 * Move the inode off the IO lists and LRU once I_FREEING is 680 * set so that it won't get moved back on there if it is dirty. 681 */ 682 list_move(&inode->i_lru, &freeable); 683 list_del_init(&inode->i_wb_list); 684 inodes_stat.nr_unused--; 685 } 686 if (current_is_kswapd()) 687 __count_vm_events(KSWAPD_INODESTEAL, reap); 688 else 689 __count_vm_events(PGINODESTEAL, reap); 690 spin_unlock(&inode_lock); 691 692 dispose_list(&freeable); 693 up_read(&iprune_sem); 694 } 695 696 /* 697 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, 698 * "unused" means that no dentries are referring to the inodes: the files are 699 * not open and the dcache references to those inodes have already been 700 * reclaimed. 701 * 702 * This function is passed the number of inodes to scan, and it returns the 703 * total number of remaining possibly-reclaimable inodes. 704 */ 705 static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 706 { 707 if (nr) { 708 /* 709 * Nasty deadlock avoidance. We may hold various FS locks, 710 * and we don't want to recurse into the FS that called us 711 * in clear_inode() and friends.. 712 */ 713 if (!(gfp_mask & __GFP_FS)) 714 return -1; 715 prune_icache(nr); 716 } 717 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; 718 } 719 720 static struct shrinker icache_shrinker = { 721 .shrink = shrink_icache_memory, 722 .seeks = DEFAULT_SEEKS, 723 }; 724 725 static void __wait_on_freeing_inode(struct inode *inode); 726 /* 727 * Called with the inode lock held. 728 */ 729 static struct inode *find_inode(struct super_block *sb, 730 struct hlist_head *head, 731 int (*test)(struct inode *, void *), 732 void *data) 733 { 734 struct hlist_node *node; 735 struct inode *inode = NULL; 736 737 repeat: 738 hlist_for_each_entry(inode, node, head, i_hash) { 739 if (inode->i_sb != sb) 740 continue; 741 if (!test(inode, data)) 742 continue; 743 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 744 __wait_on_freeing_inode(inode); 745 goto repeat; 746 } 747 __iget(inode); 748 return inode; 749 } 750 return NULL; 751 } 752 753 /* 754 * find_inode_fast is the fast path version of find_inode, see the comment at 755 * iget_locked for details. 756 */ 757 static struct inode *find_inode_fast(struct super_block *sb, 758 struct hlist_head *head, unsigned long ino) 759 { 760 struct hlist_node *node; 761 struct inode *inode = NULL; 762 763 repeat: 764 hlist_for_each_entry(inode, node, head, i_hash) { 765 if (inode->i_ino != ino) 766 continue; 767 if (inode->i_sb != sb) 768 continue; 769 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 770 __wait_on_freeing_inode(inode); 771 goto repeat; 772 } 773 __iget(inode); 774 return inode; 775 } 776 return NULL; 777 } 778 779 /* 780 * Each cpu owns a range of LAST_INO_BATCH numbers. 781 * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, 782 * to renew the exhausted range. 783 * 784 * This does not significantly increase overflow rate because every CPU can 785 * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is 786 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the 787 * 2^32 range, and is a worst-case. Even a 50% wastage would only increase 788 * overflow rate by 2x, which does not seem too significant. 789 * 790 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 791 * error if st_ino won't fit in target struct field. Use 32bit counter 792 * here to attempt to avoid that. 793 */ 794 #define LAST_INO_BATCH 1024 795 static DEFINE_PER_CPU(unsigned int, last_ino); 796 797 unsigned int get_next_ino(void) 798 { 799 unsigned int *p = &get_cpu_var(last_ino); 800 unsigned int res = *p; 801 802 #ifdef CONFIG_SMP 803 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { 804 static atomic_t shared_last_ino; 805 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); 806 807 res = next - LAST_INO_BATCH; 808 } 809 #endif 810 811 *p = ++res; 812 put_cpu_var(last_ino); 813 return res; 814 } 815 EXPORT_SYMBOL(get_next_ino); 816 817 /** 818 * new_inode - obtain an inode 819 * @sb: superblock 820 * 821 * Allocates a new inode for given superblock. The default gfp_mask 822 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 823 * If HIGHMEM pages are unsuitable or it is known that pages allocated 824 * for the page cache are not reclaimable or migratable, 825 * mapping_set_gfp_mask() must be called with suitable flags on the 826 * newly created inode's mapping 827 * 828 */ 829 struct inode *new_inode(struct super_block *sb) 830 { 831 struct inode *inode; 832 833 spin_lock_prefetch(&inode_lock); 834 835 inode = alloc_inode(sb); 836 if (inode) { 837 spin_lock(&inode_lock); 838 __inode_sb_list_add(inode); 839 inode->i_state = 0; 840 spin_unlock(&inode_lock); 841 } 842 return inode; 843 } 844 EXPORT_SYMBOL(new_inode); 845 846 void unlock_new_inode(struct inode *inode) 847 { 848 #ifdef CONFIG_DEBUG_LOCK_ALLOC 849 if (S_ISDIR(inode->i_mode)) { 850 struct file_system_type *type = inode->i_sb->s_type; 851 852 /* Set new key only if filesystem hasn't already changed it */ 853 if (!lockdep_match_class(&inode->i_mutex, 854 &type->i_mutex_key)) { 855 /* 856 * ensure nobody is actually holding i_mutex 857 */ 858 mutex_destroy(&inode->i_mutex); 859 mutex_init(&inode->i_mutex); 860 lockdep_set_class(&inode->i_mutex, 861 &type->i_mutex_dir_key); 862 } 863 } 864 #endif 865 /* 866 * This is special! We do not need the spinlock when clearing I_NEW, 867 * because we're guaranteed that nobody else tries to do anything about 868 * the state of the inode when it is locked, as we just created it (so 869 * there can be no old holders that haven't tested I_NEW). 870 * However we must emit the memory barrier so that other CPUs reliably 871 * see the clearing of I_NEW after the other inode initialisation has 872 * completed. 873 */ 874 smp_mb(); 875 WARN_ON(!(inode->i_state & I_NEW)); 876 inode->i_state &= ~I_NEW; 877 wake_up_inode(inode); 878 } 879 EXPORT_SYMBOL(unlock_new_inode); 880 881 /* 882 * This is called without the inode lock held.. Be careful. 883 * 884 * We no longer cache the sb_flags in i_flags - see fs.h 885 * -- rmk@arm.uk.linux.org 886 */ 887 static struct inode *get_new_inode(struct super_block *sb, 888 struct hlist_head *head, 889 int (*test)(struct inode *, void *), 890 int (*set)(struct inode *, void *), 891 void *data) 892 { 893 struct inode *inode; 894 895 inode = alloc_inode(sb); 896 if (inode) { 897 struct inode *old; 898 899 spin_lock(&inode_lock); 900 /* We released the lock, so.. */ 901 old = find_inode(sb, head, test, data); 902 if (!old) { 903 if (set(inode, data)) 904 goto set_failed; 905 906 hlist_add_head(&inode->i_hash, head); 907 __inode_sb_list_add(inode); 908 inode->i_state = I_NEW; 909 spin_unlock(&inode_lock); 910 911 /* Return the locked inode with I_NEW set, the 912 * caller is responsible for filling in the contents 913 */ 914 return inode; 915 } 916 917 /* 918 * Uhhuh, somebody else created the same inode under 919 * us. Use the old inode instead of the one we just 920 * allocated. 921 */ 922 spin_unlock(&inode_lock); 923 destroy_inode(inode); 924 inode = old; 925 wait_on_inode(inode); 926 } 927 return inode; 928 929 set_failed: 930 spin_unlock(&inode_lock); 931 destroy_inode(inode); 932 return NULL; 933 } 934 935 /* 936 * get_new_inode_fast is the fast path version of get_new_inode, see the 937 * comment at iget_locked for details. 938 */ 939 static struct inode *get_new_inode_fast(struct super_block *sb, 940 struct hlist_head *head, unsigned long ino) 941 { 942 struct inode *inode; 943 944 inode = alloc_inode(sb); 945 if (inode) { 946 struct inode *old; 947 948 spin_lock(&inode_lock); 949 /* We released the lock, so.. */ 950 old = find_inode_fast(sb, head, ino); 951 if (!old) { 952 inode->i_ino = ino; 953 hlist_add_head(&inode->i_hash, head); 954 __inode_sb_list_add(inode); 955 inode->i_state = I_NEW; 956 spin_unlock(&inode_lock); 957 958 /* Return the locked inode with I_NEW set, the 959 * caller is responsible for filling in the contents 960 */ 961 return inode; 962 } 963 964 /* 965 * Uhhuh, somebody else created the same inode under 966 * us. Use the old inode instead of the one we just 967 * allocated. 968 */ 969 spin_unlock(&inode_lock); 970 destroy_inode(inode); 971 inode = old; 972 wait_on_inode(inode); 973 } 974 return inode; 975 } 976 977 /* 978 * search the inode cache for a matching inode number. 979 * If we find one, then the inode number we are trying to 980 * allocate is not unique and so we should not use it. 981 * 982 * Returns 1 if the inode number is unique, 0 if it is not. 983 */ 984 static int test_inode_iunique(struct super_block *sb, unsigned long ino) 985 { 986 struct hlist_head *b = inode_hashtable + hash(sb, ino); 987 struct hlist_node *node; 988 struct inode *inode; 989 990 hlist_for_each_entry(inode, node, b, i_hash) { 991 if (inode->i_ino == ino && inode->i_sb == sb) 992 return 0; 993 } 994 995 return 1; 996 } 997 998 /** 999 * iunique - get a unique inode number 1000 * @sb: superblock 1001 * @max_reserved: highest reserved inode number 1002 * 1003 * Obtain an inode number that is unique on the system for a given 1004 * superblock. This is used by file systems that have no natural 1005 * permanent inode numbering system. An inode number is returned that 1006 * is higher than the reserved limit but unique. 1007 * 1008 * BUGS: 1009 * With a large number of inodes live on the file system this function 1010 * currently becomes quite slow. 1011 */ 1012 ino_t iunique(struct super_block *sb, ino_t max_reserved) 1013 { 1014 /* 1015 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 1016 * error if st_ino won't fit in target struct field. Use 32bit counter 1017 * here to attempt to avoid that. 1018 */ 1019 static DEFINE_SPINLOCK(iunique_lock); 1020 static unsigned int counter; 1021 ino_t res; 1022 1023 spin_lock(&inode_lock); 1024 spin_lock(&iunique_lock); 1025 do { 1026 if (counter <= max_reserved) 1027 counter = max_reserved + 1; 1028 res = counter++; 1029 } while (!test_inode_iunique(sb, res)); 1030 spin_unlock(&iunique_lock); 1031 spin_unlock(&inode_lock); 1032 1033 return res; 1034 } 1035 EXPORT_SYMBOL(iunique); 1036 1037 struct inode *igrab(struct inode *inode) 1038 { 1039 spin_lock(&inode_lock); 1040 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) 1041 __iget(inode); 1042 else 1043 /* 1044 * Handle the case where s_op->clear_inode is not been 1045 * called yet, and somebody is calling igrab 1046 * while the inode is getting freed. 1047 */ 1048 inode = NULL; 1049 spin_unlock(&inode_lock); 1050 return inode; 1051 } 1052 EXPORT_SYMBOL(igrab); 1053 1054 /** 1055 * ifind - internal function, you want ilookup5() or iget5(). 1056 * @sb: super block of file system to search 1057 * @head: the head of the list to search 1058 * @test: callback used for comparisons between inodes 1059 * @data: opaque data pointer to pass to @test 1060 * @wait: if true wait for the inode to be unlocked, if false do not 1061 * 1062 * ifind() searches for the inode specified by @data in the inode 1063 * cache. This is a generalized version of ifind_fast() for file systems where 1064 * the inode number is not sufficient for unique identification of an inode. 1065 * 1066 * If the inode is in the cache, the inode is returned with an incremented 1067 * reference count. 1068 * 1069 * Otherwise NULL is returned. 1070 * 1071 * Note, @test is called with the inode_lock held, so can't sleep. 1072 */ 1073 static struct inode *ifind(struct super_block *sb, 1074 struct hlist_head *head, int (*test)(struct inode *, void *), 1075 void *data, const int wait) 1076 { 1077 struct inode *inode; 1078 1079 spin_lock(&inode_lock); 1080 inode = find_inode(sb, head, test, data); 1081 if (inode) { 1082 spin_unlock(&inode_lock); 1083 if (likely(wait)) 1084 wait_on_inode(inode); 1085 return inode; 1086 } 1087 spin_unlock(&inode_lock); 1088 return NULL; 1089 } 1090 1091 /** 1092 * ifind_fast - internal function, you want ilookup() or iget(). 1093 * @sb: super block of file system to search 1094 * @head: head of the list to search 1095 * @ino: inode number to search for 1096 * 1097 * ifind_fast() searches for the inode @ino in the inode cache. This is for 1098 * file systems where the inode number is sufficient for unique identification 1099 * of an inode. 1100 * 1101 * If the inode is in the cache, the inode is returned with an incremented 1102 * reference count. 1103 * 1104 * Otherwise NULL is returned. 1105 */ 1106 static struct inode *ifind_fast(struct super_block *sb, 1107 struct hlist_head *head, unsigned long ino) 1108 { 1109 struct inode *inode; 1110 1111 spin_lock(&inode_lock); 1112 inode = find_inode_fast(sb, head, ino); 1113 if (inode) { 1114 spin_unlock(&inode_lock); 1115 wait_on_inode(inode); 1116 return inode; 1117 } 1118 spin_unlock(&inode_lock); 1119 return NULL; 1120 } 1121 1122 /** 1123 * ilookup5_nowait - search for an inode in the inode cache 1124 * @sb: super block of file system to search 1125 * @hashval: hash value (usually inode number) to search for 1126 * @test: callback used for comparisons between inodes 1127 * @data: opaque data pointer to pass to @test 1128 * 1129 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1130 * @data in the inode cache. This is a generalized version of ilookup() for 1131 * file systems where the inode number is not sufficient for unique 1132 * identification of an inode. 1133 * 1134 * If the inode is in the cache, the inode is returned with an incremented 1135 * reference count. Note, the inode lock is not waited upon so you have to be 1136 * very careful what you do with the returned inode. You probably should be 1137 * using ilookup5() instead. 1138 * 1139 * Otherwise NULL is returned. 1140 * 1141 * Note, @test is called with the inode_lock held, so can't sleep. 1142 */ 1143 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1144 int (*test)(struct inode *, void *), void *data) 1145 { 1146 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1147 1148 return ifind(sb, head, test, data, 0); 1149 } 1150 EXPORT_SYMBOL(ilookup5_nowait); 1151 1152 /** 1153 * ilookup5 - search for an inode in the inode cache 1154 * @sb: super block of file system to search 1155 * @hashval: hash value (usually inode number) to search for 1156 * @test: callback used for comparisons between inodes 1157 * @data: opaque data pointer to pass to @test 1158 * 1159 * ilookup5() uses ifind() to search for the inode specified by @hashval and 1160 * @data in the inode cache. This is a generalized version of ilookup() for 1161 * file systems where the inode number is not sufficient for unique 1162 * identification of an inode. 1163 * 1164 * If the inode is in the cache, the inode lock is waited upon and the inode is 1165 * returned with an incremented reference count. 1166 * 1167 * Otherwise NULL is returned. 1168 * 1169 * Note, @test is called with the inode_lock held, so can't sleep. 1170 */ 1171 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1172 int (*test)(struct inode *, void *), void *data) 1173 { 1174 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1175 1176 return ifind(sb, head, test, data, 1); 1177 } 1178 EXPORT_SYMBOL(ilookup5); 1179 1180 /** 1181 * ilookup - search for an inode in the inode cache 1182 * @sb: super block of file system to search 1183 * @ino: inode number to search for 1184 * 1185 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 1186 * This is for file systems where the inode number is sufficient for unique 1187 * identification of an inode. 1188 * 1189 * If the inode is in the cache, the inode is returned with an incremented 1190 * reference count. 1191 * 1192 * Otherwise NULL is returned. 1193 */ 1194 struct inode *ilookup(struct super_block *sb, unsigned long ino) 1195 { 1196 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1197 1198 return ifind_fast(sb, head, ino); 1199 } 1200 EXPORT_SYMBOL(ilookup); 1201 1202 /** 1203 * iget5_locked - obtain an inode from a mounted file system 1204 * @sb: super block of file system 1205 * @hashval: hash value (usually inode number) to get 1206 * @test: callback used for comparisons between inodes 1207 * @set: callback used to initialize a new struct inode 1208 * @data: opaque data pointer to pass to @test and @set 1209 * 1210 * iget5_locked() uses ifind() to search for the inode specified by @hashval 1211 * and @data in the inode cache and if present it is returned with an increased 1212 * reference count. This is a generalized version of iget_locked() for file 1213 * systems where the inode number is not sufficient for unique identification 1214 * of an inode. 1215 * 1216 * If the inode is not in cache, get_new_inode() is called to allocate a new 1217 * inode and this is returned locked, hashed, and with the I_NEW flag set. The 1218 * file system gets to fill it in before unlocking it via unlock_new_inode(). 1219 * 1220 * Note both @test and @set are called with the inode_lock held, so can't sleep. 1221 */ 1222 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 1223 int (*test)(struct inode *, void *), 1224 int (*set)(struct inode *, void *), void *data) 1225 { 1226 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1227 struct inode *inode; 1228 1229 inode = ifind(sb, head, test, data, 1); 1230 if (inode) 1231 return inode; 1232 /* 1233 * get_new_inode() will do the right thing, re-trying the search 1234 * in case it had to block at any point. 1235 */ 1236 return get_new_inode(sb, head, test, set, data); 1237 } 1238 EXPORT_SYMBOL(iget5_locked); 1239 1240 /** 1241 * iget_locked - obtain an inode from a mounted file system 1242 * @sb: super block of file system 1243 * @ino: inode number to get 1244 * 1245 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in 1246 * the inode cache and if present it is returned with an increased reference 1247 * count. This is for file systems where the inode number is sufficient for 1248 * unique identification of an inode. 1249 * 1250 * If the inode is not in cache, get_new_inode_fast() is called to allocate a 1251 * new inode and this is returned locked, hashed, and with the I_NEW flag set. 1252 * The file system gets to fill it in before unlocking it via 1253 * unlock_new_inode(). 1254 */ 1255 struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1256 { 1257 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1258 struct inode *inode; 1259 1260 inode = ifind_fast(sb, head, ino); 1261 if (inode) 1262 return inode; 1263 /* 1264 * get_new_inode_fast() will do the right thing, re-trying the search 1265 * in case it had to block at any point. 1266 */ 1267 return get_new_inode_fast(sb, head, ino); 1268 } 1269 EXPORT_SYMBOL(iget_locked); 1270 1271 int insert_inode_locked(struct inode *inode) 1272 { 1273 struct super_block *sb = inode->i_sb; 1274 ino_t ino = inode->i_ino; 1275 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1276 1277 inode->i_state |= I_NEW; 1278 while (1) { 1279 struct hlist_node *node; 1280 struct inode *old = NULL; 1281 spin_lock(&inode_lock); 1282 hlist_for_each_entry(old, node, head, i_hash) { 1283 if (old->i_ino != ino) 1284 continue; 1285 if (old->i_sb != sb) 1286 continue; 1287 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1288 continue; 1289 break; 1290 } 1291 if (likely(!node)) { 1292 hlist_add_head(&inode->i_hash, head); 1293 spin_unlock(&inode_lock); 1294 return 0; 1295 } 1296 __iget(old); 1297 spin_unlock(&inode_lock); 1298 wait_on_inode(old); 1299 if (unlikely(!inode_unhashed(old))) { 1300 iput(old); 1301 return -EBUSY; 1302 } 1303 iput(old); 1304 } 1305 } 1306 EXPORT_SYMBOL(insert_inode_locked); 1307 1308 int insert_inode_locked4(struct inode *inode, unsigned long hashval, 1309 int (*test)(struct inode *, void *), void *data) 1310 { 1311 struct super_block *sb = inode->i_sb; 1312 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1313 1314 inode->i_state |= I_NEW; 1315 1316 while (1) { 1317 struct hlist_node *node; 1318 struct inode *old = NULL; 1319 1320 spin_lock(&inode_lock); 1321 hlist_for_each_entry(old, node, head, i_hash) { 1322 if (old->i_sb != sb) 1323 continue; 1324 if (!test(old, data)) 1325 continue; 1326 if (old->i_state & (I_FREEING|I_WILL_FREE)) 1327 continue; 1328 break; 1329 } 1330 if (likely(!node)) { 1331 hlist_add_head(&inode->i_hash, head); 1332 spin_unlock(&inode_lock); 1333 return 0; 1334 } 1335 __iget(old); 1336 spin_unlock(&inode_lock); 1337 wait_on_inode(old); 1338 if (unlikely(!inode_unhashed(old))) { 1339 iput(old); 1340 return -EBUSY; 1341 } 1342 iput(old); 1343 } 1344 } 1345 EXPORT_SYMBOL(insert_inode_locked4); 1346 1347 1348 int generic_delete_inode(struct inode *inode) 1349 { 1350 return 1; 1351 } 1352 EXPORT_SYMBOL(generic_delete_inode); 1353 1354 /* 1355 * Normal UNIX filesystem behaviour: delete the 1356 * inode when the usage count drops to zero, and 1357 * i_nlink is zero. 1358 */ 1359 int generic_drop_inode(struct inode *inode) 1360 { 1361 return !inode->i_nlink || inode_unhashed(inode); 1362 } 1363 EXPORT_SYMBOL_GPL(generic_drop_inode); 1364 1365 /* 1366 * Called when we're dropping the last reference 1367 * to an inode. 1368 * 1369 * Call the FS "drop_inode()" function, defaulting to 1370 * the legacy UNIX filesystem behaviour. If it tells 1371 * us to evict inode, do so. Otherwise, retain inode 1372 * in cache if fs is alive, sync and evict if fs is 1373 * shutting down. 1374 */ 1375 static void iput_final(struct inode *inode) 1376 { 1377 struct super_block *sb = inode->i_sb; 1378 const struct super_operations *op = inode->i_sb->s_op; 1379 int drop; 1380 1381 if (op && op->drop_inode) 1382 drop = op->drop_inode(inode); 1383 else 1384 drop = generic_drop_inode(inode); 1385 1386 if (!drop) { 1387 if (sb->s_flags & MS_ACTIVE) { 1388 inode->i_state |= I_REFERENCED; 1389 if (!(inode->i_state & (I_DIRTY|I_SYNC))) { 1390 inode_lru_list_add(inode); 1391 } 1392 spin_unlock(&inode_lock); 1393 return; 1394 } 1395 WARN_ON(inode->i_state & I_NEW); 1396 inode->i_state |= I_WILL_FREE; 1397 spin_unlock(&inode_lock); 1398 write_inode_now(inode, 1); 1399 spin_lock(&inode_lock); 1400 WARN_ON(inode->i_state & I_NEW); 1401 inode->i_state &= ~I_WILL_FREE; 1402 __remove_inode_hash(inode); 1403 } 1404 1405 WARN_ON(inode->i_state & I_NEW); 1406 inode->i_state |= I_FREEING; 1407 1408 /* 1409 * Move the inode off the IO lists and LRU once I_FREEING is 1410 * set so that it won't get moved back on there if it is dirty. 1411 */ 1412 inode_lru_list_del(inode); 1413 list_del_init(&inode->i_wb_list); 1414 1415 __inode_sb_list_del(inode); 1416 spin_unlock(&inode_lock); 1417 evict(inode); 1418 remove_inode_hash(inode); 1419 wake_up_inode(inode); 1420 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1421 destroy_inode(inode); 1422 } 1423 1424 /** 1425 * iput - put an inode 1426 * @inode: inode to put 1427 * 1428 * Puts an inode, dropping its usage count. If the inode use count hits 1429 * zero, the inode is then freed and may also be destroyed. 1430 * 1431 * Consequently, iput() can sleep. 1432 */ 1433 void iput(struct inode *inode) 1434 { 1435 if (inode) { 1436 BUG_ON(inode->i_state & I_CLEAR); 1437 1438 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1439 iput_final(inode); 1440 } 1441 } 1442 EXPORT_SYMBOL(iput); 1443 1444 /** 1445 * bmap - find a block number in a file 1446 * @inode: inode of file 1447 * @block: block to find 1448 * 1449 * Returns the block number on the device holding the inode that 1450 * is the disk block number for the block of the file requested. 1451 * That is, asked for block 4 of inode 1 the function will return the 1452 * disk block relative to the disk start that holds that block of the 1453 * file. 1454 */ 1455 sector_t bmap(struct inode *inode, sector_t block) 1456 { 1457 sector_t res = 0; 1458 if (inode->i_mapping->a_ops->bmap) 1459 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); 1460 return res; 1461 } 1462 EXPORT_SYMBOL(bmap); 1463 1464 /* 1465 * With relative atime, only update atime if the previous atime is 1466 * earlier than either the ctime or mtime or if at least a day has 1467 * passed since the last atime update. 1468 */ 1469 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, 1470 struct timespec now) 1471 { 1472 1473 if (!(mnt->mnt_flags & MNT_RELATIME)) 1474 return 1; 1475 /* 1476 * Is mtime younger than atime? If yes, update atime: 1477 */ 1478 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0) 1479 return 1; 1480 /* 1481 * Is ctime younger than atime? If yes, update atime: 1482 */ 1483 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0) 1484 return 1; 1485 1486 /* 1487 * Is the previous atime value older than a day? If yes, 1488 * update atime: 1489 */ 1490 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) 1491 return 1; 1492 /* 1493 * Good, we can skip the atime update: 1494 */ 1495 return 0; 1496 } 1497 1498 /** 1499 * touch_atime - update the access time 1500 * @mnt: mount the inode is accessed on 1501 * @dentry: dentry accessed 1502 * 1503 * Update the accessed time on an inode and mark it for writeback. 1504 * This function automatically handles read only file systems and media, 1505 * as well as the "noatime" flag and inode specific "noatime" markers. 1506 */ 1507 void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1508 { 1509 struct inode *inode = dentry->d_inode; 1510 struct timespec now; 1511 1512 if (inode->i_flags & S_NOATIME) 1513 return; 1514 if (IS_NOATIME(inode)) 1515 return; 1516 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1517 return; 1518 1519 if (mnt->mnt_flags & MNT_NOATIME) 1520 return; 1521 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1522 return; 1523 1524 now = current_fs_time(inode->i_sb); 1525 1526 if (!relatime_need_update(mnt, inode, now)) 1527 return; 1528 1529 if (timespec_equal(&inode->i_atime, &now)) 1530 return; 1531 1532 if (mnt_want_write(mnt)) 1533 return; 1534 1535 inode->i_atime = now; 1536 mark_inode_dirty_sync(inode); 1537 mnt_drop_write(mnt); 1538 } 1539 EXPORT_SYMBOL(touch_atime); 1540 1541 /** 1542 * file_update_time - update mtime and ctime time 1543 * @file: file accessed 1544 * 1545 * Update the mtime and ctime members of an inode and mark the inode 1546 * for writeback. Note that this function is meant exclusively for 1547 * usage in the file write path of filesystems, and filesystems may 1548 * choose to explicitly ignore update via this function with the 1549 * S_NOCMTIME inode flag, e.g. for network filesystem where these 1550 * timestamps are handled by the server. 1551 */ 1552 1553 void file_update_time(struct file *file) 1554 { 1555 struct inode *inode = file->f_path.dentry->d_inode; 1556 struct timespec now; 1557 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; 1558 1559 /* First try to exhaust all avenues to not sync */ 1560 if (IS_NOCMTIME(inode)) 1561 return; 1562 1563 now = current_fs_time(inode->i_sb); 1564 if (!timespec_equal(&inode->i_mtime, &now)) 1565 sync_it = S_MTIME; 1566 1567 if (!timespec_equal(&inode->i_ctime, &now)) 1568 sync_it |= S_CTIME; 1569 1570 if (IS_I_VERSION(inode)) 1571 sync_it |= S_VERSION; 1572 1573 if (!sync_it) 1574 return; 1575 1576 /* Finally allowed to write? Takes lock. */ 1577 if (mnt_want_write_file(file)) 1578 return; 1579 1580 /* Only change inode inside the lock region */ 1581 if (sync_it & S_VERSION) 1582 inode_inc_iversion(inode); 1583 if (sync_it & S_CTIME) 1584 inode->i_ctime = now; 1585 if (sync_it & S_MTIME) 1586 inode->i_mtime = now; 1587 mark_inode_dirty_sync(inode); 1588 mnt_drop_write(file->f_path.mnt); 1589 } 1590 EXPORT_SYMBOL(file_update_time); 1591 1592 int inode_needs_sync(struct inode *inode) 1593 { 1594 if (IS_SYNC(inode)) 1595 return 1; 1596 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 1597 return 1; 1598 return 0; 1599 } 1600 EXPORT_SYMBOL(inode_needs_sync); 1601 1602 int inode_wait(void *word) 1603 { 1604 schedule(); 1605 return 0; 1606 } 1607 EXPORT_SYMBOL(inode_wait); 1608 1609 /* 1610 * If we try to find an inode in the inode hash while it is being 1611 * deleted, we have to wait until the filesystem completes its 1612 * deletion before reporting that it isn't found. This function waits 1613 * until the deletion _might_ have completed. Callers are responsible 1614 * to recheck inode state. 1615 * 1616 * It doesn't matter if I_NEW is not set initially, a call to 1617 * wake_up_inode() after removing from the hash list will DTRT. 1618 * 1619 * This is called with inode_lock held. 1620 */ 1621 static void __wait_on_freeing_inode(struct inode *inode) 1622 { 1623 wait_queue_head_t *wq; 1624 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 1625 wq = bit_waitqueue(&inode->i_state, __I_NEW); 1626 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1627 spin_unlock(&inode_lock); 1628 schedule(); 1629 finish_wait(wq, &wait.wait); 1630 spin_lock(&inode_lock); 1631 } 1632 1633 static __initdata unsigned long ihash_entries; 1634 static int __init set_ihash_entries(char *str) 1635 { 1636 if (!str) 1637 return 0; 1638 ihash_entries = simple_strtoul(str, &str, 0); 1639 return 1; 1640 } 1641 __setup("ihash_entries=", set_ihash_entries); 1642 1643 /* 1644 * Initialize the waitqueues and inode hash table. 1645 */ 1646 void __init inode_init_early(void) 1647 { 1648 int loop; 1649 1650 /* If hashes are distributed across NUMA nodes, defer 1651 * hash allocation until vmalloc space is available. 1652 */ 1653 if (hashdist) 1654 return; 1655 1656 inode_hashtable = 1657 alloc_large_system_hash("Inode-cache", 1658 sizeof(struct hlist_head), 1659 ihash_entries, 1660 14, 1661 HASH_EARLY, 1662 &i_hash_shift, 1663 &i_hash_mask, 1664 0); 1665 1666 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1667 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1668 } 1669 1670 void __init inode_init(void) 1671 { 1672 int loop; 1673 1674 /* inode slab cache */ 1675 inode_cachep = kmem_cache_create("inode_cache", 1676 sizeof(struct inode), 1677 0, 1678 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1679 SLAB_MEM_SPREAD), 1680 init_once); 1681 register_shrinker(&icache_shrinker); 1682 1683 /* Hash may have been set up in inode_init_early */ 1684 if (!hashdist) 1685 return; 1686 1687 inode_hashtable = 1688 alloc_large_system_hash("Inode-cache", 1689 sizeof(struct hlist_head), 1690 ihash_entries, 1691 14, 1692 0, 1693 &i_hash_shift, 1694 &i_hash_mask, 1695 0); 1696 1697 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1698 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1699 } 1700 1701 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 1702 { 1703 inode->i_mode = mode; 1704 if (S_ISCHR(mode)) { 1705 inode->i_fop = &def_chr_fops; 1706 inode->i_rdev = rdev; 1707 } else if (S_ISBLK(mode)) { 1708 inode->i_fop = &def_blk_fops; 1709 inode->i_rdev = rdev; 1710 } else if (S_ISFIFO(mode)) 1711 inode->i_fop = &def_fifo_fops; 1712 else if (S_ISSOCK(mode)) 1713 inode->i_fop = &bad_sock_fops; 1714 else 1715 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" 1716 " inode %s:%lu\n", mode, inode->i_sb->s_id, 1717 inode->i_ino); 1718 } 1719 EXPORT_SYMBOL(init_special_inode); 1720 1721 /** 1722 * Init uid,gid,mode for new inode according to posix standards 1723 * @inode: New inode 1724 * @dir: Directory inode 1725 * @mode: mode of the new inode 1726 */ 1727 void inode_init_owner(struct inode *inode, const struct inode *dir, 1728 mode_t mode) 1729 { 1730 inode->i_uid = current_fsuid(); 1731 if (dir && dir->i_mode & S_ISGID) { 1732 inode->i_gid = dir->i_gid; 1733 if (S_ISDIR(mode)) 1734 mode |= S_ISGID; 1735 } else 1736 inode->i_gid = current_fsgid(); 1737 inode->i_mode = mode; 1738 } 1739 EXPORT_SYMBOL(inode_init_owner); 1740