1 /* 2 * linux/fs/inode.c 3 * 4 * (C) 1997 Linus Torvalds 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/mm.h> 9 #include <linux/dcache.h> 10 #include <linux/init.h> 11 #include <linux/quotaops.h> 12 #include <linux/slab.h> 13 #include <linux/writeback.h> 14 #include <linux/module.h> 15 #include <linux/backing-dev.h> 16 #include <linux/wait.h> 17 #include <linux/hash.h> 18 #include <linux/swap.h> 19 #include <linux/security.h> 20 #include <linux/pagemap.h> 21 #include <linux/cdev.h> 22 #include <linux/bootmem.h> 23 #include <linux/inotify.h> 24 #include <linux/mount.h> 25 #include <linux/async.h> 26 27 /* 28 * This is needed for the following functions: 29 * - inode_has_buffers 30 * - invalidate_inode_buffers 31 * - invalidate_bdev 32 * 33 * FIXME: remove all knowledge of the buffer layer from this file 34 */ 35 #include <linux/buffer_head.h> 36 37 /* 38 * New inode.c implementation. 39 * 40 * This implementation has the basic premise of trying 41 * to be extremely low-overhead and SMP-safe, yet be 42 * simple enough to be "obviously correct". 43 * 44 * Famous last words. 45 */ 46 47 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ 48 49 /* #define INODE_PARANOIA 1 */ 50 /* #define INODE_DEBUG 1 */ 51 52 /* 53 * Inode lookup is no longer as critical as it used to be: 54 * most of the lookups are going to be through the dcache. 55 */ 56 #define I_HASHBITS i_hash_shift 57 #define I_HASHMASK i_hash_mask 58 59 static unsigned int i_hash_mask __read_mostly; 60 static unsigned int i_hash_shift __read_mostly; 61 62 /* 63 * Each inode can be on two separate lists. One is 64 * the hash list of the inode, used for lookups. The 65 * other linked list is the "type" list: 66 * "in_use" - valid inode, i_count > 0, i_nlink > 0 67 * "dirty" - as "in_use" but also dirty 68 * "unused" - valid inode, i_count = 0 69 * 70 * A "dirty" list is maintained for each super block, 71 * allowing for low-overhead inode sync() operations. 72 */ 73 74 LIST_HEAD(inode_in_use); 75 LIST_HEAD(inode_unused); 76 static struct hlist_head *inode_hashtable __read_mostly; 77 78 /* 79 * A simple spinlock to protect the list manipulations. 80 * 81 * NOTE! You also have to own the lock if you change 82 * the i_state of an inode while it is in use.. 83 */ 84 DEFINE_SPINLOCK(inode_lock); 85 86 /* 87 * iprune_mutex provides exclusion between the kswapd or try_to_free_pages 88 * icache shrinking path, and the umount path. Without this exclusion, 89 * by the time prune_icache calls iput for the inode whose pages it has 90 * been invalidating, or by the time it calls clear_inode & destroy_inode 91 * from its final dispose_list, the struct super_block they refer to 92 * (for inode->i_sb->s_op) may already have been freed and reused. 93 */ 94 static DEFINE_MUTEX(iprune_mutex); 95 96 /* 97 * Statistics gathering.. 98 */ 99 struct inodes_stat_t inodes_stat; 100 101 static struct kmem_cache * inode_cachep __read_mostly; 102 103 static void wake_up_inode(struct inode *inode) 104 { 105 /* 106 * Prevent speculative execution through spin_unlock(&inode_lock); 107 */ 108 smp_mb(); 109 wake_up_bit(&inode->i_state, __I_LOCK); 110 } 111 112 /** 113 * inode_init_always - perform inode structure intialisation 114 * @sb: superblock inode belongs to 115 * @inode: inode to initialise 116 * 117 * These are initializations that need to be done on every inode 118 * allocation as the fields are not initialised by slab allocation. 119 */ 120 struct inode *inode_init_always(struct super_block *sb, struct inode *inode) 121 { 122 static const struct address_space_operations empty_aops; 123 static struct inode_operations empty_iops; 124 static const struct file_operations empty_fops; 125 126 struct address_space * const mapping = &inode->i_data; 127 128 inode->i_sb = sb; 129 inode->i_blkbits = sb->s_blocksize_bits; 130 inode->i_flags = 0; 131 atomic_set(&inode->i_count, 1); 132 inode->i_op = &empty_iops; 133 inode->i_fop = &empty_fops; 134 inode->i_nlink = 1; 135 inode->i_uid = 0; 136 inode->i_gid = 0; 137 atomic_set(&inode->i_writecount, 0); 138 inode->i_size = 0; 139 inode->i_blocks = 0; 140 inode->i_bytes = 0; 141 inode->i_generation = 0; 142 #ifdef CONFIG_QUOTA 143 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 144 #endif 145 inode->i_pipe = NULL; 146 inode->i_bdev = NULL; 147 inode->i_cdev = NULL; 148 inode->i_rdev = 0; 149 inode->dirtied_when = 0; 150 if (security_inode_alloc(inode)) { 151 if (inode->i_sb->s_op->destroy_inode) 152 inode->i_sb->s_op->destroy_inode(inode); 153 else 154 kmem_cache_free(inode_cachep, (inode)); 155 return NULL; 156 } 157 158 spin_lock_init(&inode->i_lock); 159 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 160 161 mutex_init(&inode->i_mutex); 162 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 163 164 init_rwsem(&inode->i_alloc_sem); 165 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 166 167 mapping->a_ops = &empty_aops; 168 mapping->host = inode; 169 mapping->flags = 0; 170 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 171 mapping->assoc_mapping = NULL; 172 mapping->backing_dev_info = &default_backing_dev_info; 173 mapping->writeback_index = 0; 174 175 /* 176 * If the block_device provides a backing_dev_info for client 177 * inodes then use that. Otherwise the inode share the bdev's 178 * backing_dev_info. 179 */ 180 if (sb->s_bdev) { 181 struct backing_dev_info *bdi; 182 183 bdi = sb->s_bdev->bd_inode_backing_dev_info; 184 if (!bdi) 185 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 186 mapping->backing_dev_info = bdi; 187 } 188 inode->i_private = NULL; 189 inode->i_mapping = mapping; 190 191 return inode; 192 } 193 EXPORT_SYMBOL(inode_init_always); 194 195 static struct inode *alloc_inode(struct super_block *sb) 196 { 197 struct inode *inode; 198 199 if (sb->s_op->alloc_inode) 200 inode = sb->s_op->alloc_inode(sb); 201 else 202 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 203 204 if (inode) 205 return inode_init_always(sb, inode); 206 return NULL; 207 } 208 209 void destroy_inode(struct inode *inode) 210 { 211 BUG_ON(inode_has_buffers(inode)); 212 security_inode_free(inode); 213 if (inode->i_sb->s_op->destroy_inode) 214 inode->i_sb->s_op->destroy_inode(inode); 215 else 216 kmem_cache_free(inode_cachep, (inode)); 217 } 218 EXPORT_SYMBOL(destroy_inode); 219 220 221 /* 222 * These are initializations that only need to be done 223 * once, because the fields are idempotent across use 224 * of the inode, so let the slab aware of that. 225 */ 226 void inode_init_once(struct inode *inode) 227 { 228 memset(inode, 0, sizeof(*inode)); 229 INIT_HLIST_NODE(&inode->i_hash); 230 INIT_LIST_HEAD(&inode->i_dentry); 231 INIT_LIST_HEAD(&inode->i_devices); 232 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 233 spin_lock_init(&inode->i_data.tree_lock); 234 spin_lock_init(&inode->i_data.i_mmap_lock); 235 INIT_LIST_HEAD(&inode->i_data.private_list); 236 spin_lock_init(&inode->i_data.private_lock); 237 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); 238 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 239 i_size_ordered_init(inode); 240 #ifdef CONFIG_INOTIFY 241 INIT_LIST_HEAD(&inode->inotify_watches); 242 mutex_init(&inode->inotify_mutex); 243 #endif 244 } 245 246 EXPORT_SYMBOL(inode_init_once); 247 248 static void init_once(void *foo) 249 { 250 struct inode * inode = (struct inode *) foo; 251 252 inode_init_once(inode); 253 } 254 255 /* 256 * inode_lock must be held 257 */ 258 void __iget(struct inode * inode) 259 { 260 if (atomic_read(&inode->i_count)) { 261 atomic_inc(&inode->i_count); 262 return; 263 } 264 atomic_inc(&inode->i_count); 265 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 266 list_move(&inode->i_list, &inode_in_use); 267 inodes_stat.nr_unused--; 268 } 269 270 /** 271 * clear_inode - clear an inode 272 * @inode: inode to clear 273 * 274 * This is called by the filesystem to tell us 275 * that the inode is no longer useful. We just 276 * terminate it with extreme prejudice. 277 */ 278 void clear_inode(struct inode *inode) 279 { 280 might_sleep(); 281 invalidate_inode_buffers(inode); 282 283 BUG_ON(inode->i_data.nrpages); 284 BUG_ON(!(inode->i_state & I_FREEING)); 285 BUG_ON(inode->i_state & I_CLEAR); 286 inode_sync_wait(inode); 287 DQUOT_DROP(inode); 288 if (inode->i_sb->s_op->clear_inode) 289 inode->i_sb->s_op->clear_inode(inode); 290 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 291 bd_forget(inode); 292 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 293 cd_forget(inode); 294 inode->i_state = I_CLEAR; 295 } 296 297 EXPORT_SYMBOL(clear_inode); 298 299 /* 300 * dispose_list - dispose of the contents of a local list 301 * @head: the head of the list to free 302 * 303 * Dispose-list gets a local list with local inodes in it, so it doesn't 304 * need to worry about list corruption and SMP locks. 305 */ 306 static void dispose_list(struct list_head *head) 307 { 308 int nr_disposed = 0; 309 310 while (!list_empty(head)) { 311 struct inode *inode; 312 313 inode = list_first_entry(head, struct inode, i_list); 314 list_del(&inode->i_list); 315 316 if (inode->i_data.nrpages) 317 truncate_inode_pages(&inode->i_data, 0); 318 clear_inode(inode); 319 320 spin_lock(&inode_lock); 321 hlist_del_init(&inode->i_hash); 322 list_del_init(&inode->i_sb_list); 323 spin_unlock(&inode_lock); 324 325 wake_up_inode(inode); 326 destroy_inode(inode); 327 nr_disposed++; 328 } 329 spin_lock(&inode_lock); 330 inodes_stat.nr_inodes -= nr_disposed; 331 spin_unlock(&inode_lock); 332 } 333 334 /* 335 * Invalidate all inodes for a device. 336 */ 337 static int invalidate_list(struct list_head *head, struct list_head *dispose) 338 { 339 struct list_head *next; 340 int busy = 0, count = 0; 341 342 next = head->next; 343 for (;;) { 344 struct list_head * tmp = next; 345 struct inode * inode; 346 347 /* 348 * We can reschedule here without worrying about the list's 349 * consistency because the per-sb list of inodes must not 350 * change during umount anymore, and because iprune_mutex keeps 351 * shrink_icache_memory() away. 352 */ 353 cond_resched_lock(&inode_lock); 354 355 next = next->next; 356 if (tmp == head) 357 break; 358 inode = list_entry(tmp, struct inode, i_sb_list); 359 invalidate_inode_buffers(inode); 360 if (!atomic_read(&inode->i_count)) { 361 list_move(&inode->i_list, dispose); 362 WARN_ON(inode->i_state & I_NEW); 363 inode->i_state |= I_FREEING; 364 count++; 365 continue; 366 } 367 busy = 1; 368 } 369 /* only unused inodes may be cached with i_count zero */ 370 inodes_stat.nr_unused -= count; 371 return busy; 372 } 373 374 /** 375 * invalidate_inodes - discard the inodes on a device 376 * @sb: superblock 377 * 378 * Discard all of the inodes for a given superblock. If the discard 379 * fails because there are busy inodes then a non zero value is returned. 380 * If the discard is successful all the inodes have been discarded. 381 */ 382 int invalidate_inodes(struct super_block * sb) 383 { 384 int busy; 385 LIST_HEAD(throw_away); 386 387 mutex_lock(&iprune_mutex); 388 spin_lock(&inode_lock); 389 inotify_unmount_inodes(&sb->s_inodes); 390 busy = invalidate_list(&sb->s_inodes, &throw_away); 391 spin_unlock(&inode_lock); 392 393 dispose_list(&throw_away); 394 mutex_unlock(&iprune_mutex); 395 396 return busy; 397 } 398 399 EXPORT_SYMBOL(invalidate_inodes); 400 401 static int can_unuse(struct inode *inode) 402 { 403 if (inode->i_state) 404 return 0; 405 if (inode_has_buffers(inode)) 406 return 0; 407 if (atomic_read(&inode->i_count)) 408 return 0; 409 if (inode->i_data.nrpages) 410 return 0; 411 return 1; 412 } 413 414 /* 415 * Scan `goal' inodes on the unused list for freeable ones. They are moved to 416 * a temporary list and then are freed outside inode_lock by dispose_list(). 417 * 418 * Any inodes which are pinned purely because of attached pagecache have their 419 * pagecache removed. We expect the final iput() on that inode to add it to 420 * the front of the inode_unused list. So look for it there and if the 421 * inode is still freeable, proceed. The right inode is found 99.9% of the 422 * time in testing on a 4-way. 423 * 424 * If the inode has metadata buffers attached to mapping->private_list then 425 * try to remove them. 426 */ 427 static void prune_icache(int nr_to_scan) 428 { 429 LIST_HEAD(freeable); 430 int nr_pruned = 0; 431 int nr_scanned; 432 unsigned long reap = 0; 433 434 mutex_lock(&iprune_mutex); 435 spin_lock(&inode_lock); 436 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 437 struct inode *inode; 438 439 if (list_empty(&inode_unused)) 440 break; 441 442 inode = list_entry(inode_unused.prev, struct inode, i_list); 443 444 if (inode->i_state || atomic_read(&inode->i_count)) { 445 list_move(&inode->i_list, &inode_unused); 446 continue; 447 } 448 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 449 __iget(inode); 450 spin_unlock(&inode_lock); 451 if (remove_inode_buffers(inode)) 452 reap += invalidate_mapping_pages(&inode->i_data, 453 0, -1); 454 iput(inode); 455 spin_lock(&inode_lock); 456 457 if (inode != list_entry(inode_unused.next, 458 struct inode, i_list)) 459 continue; /* wrong inode or list_empty */ 460 if (!can_unuse(inode)) 461 continue; 462 } 463 list_move(&inode->i_list, &freeable); 464 WARN_ON(inode->i_state & I_NEW); 465 inode->i_state |= I_FREEING; 466 nr_pruned++; 467 } 468 inodes_stat.nr_unused -= nr_pruned; 469 if (current_is_kswapd()) 470 __count_vm_events(KSWAPD_INODESTEAL, reap); 471 else 472 __count_vm_events(PGINODESTEAL, reap); 473 spin_unlock(&inode_lock); 474 475 dispose_list(&freeable); 476 mutex_unlock(&iprune_mutex); 477 } 478 479 /* 480 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, 481 * "unused" means that no dentries are referring to the inodes: the files are 482 * not open and the dcache references to those inodes have already been 483 * reclaimed. 484 * 485 * This function is passed the number of inodes to scan, and it returns the 486 * total number of remaining possibly-reclaimable inodes. 487 */ 488 static int shrink_icache_memory(int nr, gfp_t gfp_mask) 489 { 490 if (nr) { 491 /* 492 * Nasty deadlock avoidance. We may hold various FS locks, 493 * and we don't want to recurse into the FS that called us 494 * in clear_inode() and friends.. 495 */ 496 if (!(gfp_mask & __GFP_FS)) 497 return -1; 498 prune_icache(nr); 499 } 500 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 501 } 502 503 static struct shrinker icache_shrinker = { 504 .shrink = shrink_icache_memory, 505 .seeks = DEFAULT_SEEKS, 506 }; 507 508 static void __wait_on_freeing_inode(struct inode *inode); 509 /* 510 * Called with the inode lock held. 511 * NOTE: we are not increasing the inode-refcount, you must call __iget() 512 * by hand after calling find_inode now! This simplifies iunique and won't 513 * add any additional branch in the common code. 514 */ 515 static struct inode * find_inode(struct super_block * sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data) 516 { 517 struct hlist_node *node; 518 struct inode * inode = NULL; 519 520 repeat: 521 hlist_for_each_entry(inode, node, head, i_hash) { 522 if (inode->i_sb != sb) 523 continue; 524 if (!test(inode, data)) 525 continue; 526 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 527 __wait_on_freeing_inode(inode); 528 goto repeat; 529 } 530 break; 531 } 532 return node ? inode : NULL; 533 } 534 535 /* 536 * find_inode_fast is the fast path version of find_inode, see the comment at 537 * iget_locked for details. 538 */ 539 static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head *head, unsigned long ino) 540 { 541 struct hlist_node *node; 542 struct inode * inode = NULL; 543 544 repeat: 545 hlist_for_each_entry(inode, node, head, i_hash) { 546 if (inode->i_ino != ino) 547 continue; 548 if (inode->i_sb != sb) 549 continue; 550 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 551 __wait_on_freeing_inode(inode); 552 goto repeat; 553 } 554 break; 555 } 556 return node ? inode : NULL; 557 } 558 559 static unsigned long hash(struct super_block *sb, unsigned long hashval) 560 { 561 unsigned long tmp; 562 563 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 564 L1_CACHE_BYTES; 565 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); 566 return tmp & I_HASHMASK; 567 } 568 569 static inline void 570 __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, 571 struct inode *inode) 572 { 573 inodes_stat.nr_inodes++; 574 list_add(&inode->i_list, &inode_in_use); 575 list_add(&inode->i_sb_list, &sb->s_inodes); 576 if (head) 577 hlist_add_head(&inode->i_hash, head); 578 } 579 580 /** 581 * inode_add_to_lists - add a new inode to relevant lists 582 * @sb: superblock inode belongs to 583 * @inode: inode to mark in use 584 * 585 * When an inode is allocated it needs to be accounted for, added to the in use 586 * list, the owning superblock and the inode hash. This needs to be done under 587 * the inode_lock, so export a function to do this rather than the inode lock 588 * itself. We calculate the hash list to add to here so it is all internal 589 * which requires the caller to have already set up the inode number in the 590 * inode to add. 591 */ 592 void inode_add_to_lists(struct super_block *sb, struct inode *inode) 593 { 594 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); 595 596 spin_lock(&inode_lock); 597 __inode_add_to_lists(sb, head, inode); 598 spin_unlock(&inode_lock); 599 } 600 EXPORT_SYMBOL_GPL(inode_add_to_lists); 601 602 /** 603 * new_inode - obtain an inode 604 * @sb: superblock 605 * 606 * Allocates a new inode for given superblock. The default gfp_mask 607 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 608 * If HIGHMEM pages are unsuitable or it is known that pages allocated 609 * for the page cache are not reclaimable or migratable, 610 * mapping_set_gfp_mask() must be called with suitable flags on the 611 * newly created inode's mapping 612 * 613 */ 614 struct inode *new_inode(struct super_block *sb) 615 { 616 /* 617 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 618 * error if st_ino won't fit in target struct field. Use 32bit counter 619 * here to attempt to avoid that. 620 */ 621 static unsigned int last_ino; 622 struct inode * inode; 623 624 spin_lock_prefetch(&inode_lock); 625 626 inode = alloc_inode(sb); 627 if (inode) { 628 spin_lock(&inode_lock); 629 __inode_add_to_lists(sb, NULL, inode); 630 inode->i_ino = ++last_ino; 631 inode->i_state = 0; 632 spin_unlock(&inode_lock); 633 } 634 return inode; 635 } 636 637 EXPORT_SYMBOL(new_inode); 638 639 void unlock_new_inode(struct inode *inode) 640 { 641 #ifdef CONFIG_DEBUG_LOCK_ALLOC 642 if (inode->i_mode & S_IFDIR) { 643 struct file_system_type *type = inode->i_sb->s_type; 644 645 /* 646 * ensure nobody is actually holding i_mutex 647 */ 648 mutex_destroy(&inode->i_mutex); 649 mutex_init(&inode->i_mutex); 650 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); 651 } 652 #endif 653 /* 654 * This is special! We do not need the spinlock 655 * when clearing I_LOCK, because we're guaranteed 656 * that nobody else tries to do anything about the 657 * state of the inode when it is locked, as we 658 * just created it (so there can be no old holders 659 * that haven't tested I_LOCK). 660 */ 661 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 662 inode->i_state &= ~(I_LOCK|I_NEW); 663 wake_up_inode(inode); 664 } 665 666 EXPORT_SYMBOL(unlock_new_inode); 667 668 /* 669 * This is called without the inode lock held.. Be careful. 670 * 671 * We no longer cache the sb_flags in i_flags - see fs.h 672 * -- rmk@arm.uk.linux.org 673 */ 674 static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) 675 { 676 struct inode * inode; 677 678 inode = alloc_inode(sb); 679 if (inode) { 680 struct inode * old; 681 682 spin_lock(&inode_lock); 683 /* We released the lock, so.. */ 684 old = find_inode(sb, head, test, data); 685 if (!old) { 686 if (set(inode, data)) 687 goto set_failed; 688 689 __inode_add_to_lists(sb, head, inode); 690 inode->i_state = I_LOCK|I_NEW; 691 spin_unlock(&inode_lock); 692 693 /* Return the locked inode with I_NEW set, the 694 * caller is responsible for filling in the contents 695 */ 696 return inode; 697 } 698 699 /* 700 * Uhhuh, somebody else created the same inode under 701 * us. Use the old inode instead of the one we just 702 * allocated. 703 */ 704 __iget(old); 705 spin_unlock(&inode_lock); 706 destroy_inode(inode); 707 inode = old; 708 wait_on_inode(inode); 709 } 710 return inode; 711 712 set_failed: 713 spin_unlock(&inode_lock); 714 destroy_inode(inode); 715 return NULL; 716 } 717 718 /* 719 * get_new_inode_fast is the fast path version of get_new_inode, see the 720 * comment at iget_locked for details. 721 */ 722 static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) 723 { 724 struct inode * inode; 725 726 inode = alloc_inode(sb); 727 if (inode) { 728 struct inode * old; 729 730 spin_lock(&inode_lock); 731 /* We released the lock, so.. */ 732 old = find_inode_fast(sb, head, ino); 733 if (!old) { 734 inode->i_ino = ino; 735 __inode_add_to_lists(sb, head, inode); 736 inode->i_state = I_LOCK|I_NEW; 737 spin_unlock(&inode_lock); 738 739 /* Return the locked inode with I_NEW set, the 740 * caller is responsible for filling in the contents 741 */ 742 return inode; 743 } 744 745 /* 746 * Uhhuh, somebody else created the same inode under 747 * us. Use the old inode instead of the one we just 748 * allocated. 749 */ 750 __iget(old); 751 spin_unlock(&inode_lock); 752 destroy_inode(inode); 753 inode = old; 754 wait_on_inode(inode); 755 } 756 return inode; 757 } 758 759 /** 760 * iunique - get a unique inode number 761 * @sb: superblock 762 * @max_reserved: highest reserved inode number 763 * 764 * Obtain an inode number that is unique on the system for a given 765 * superblock. This is used by file systems that have no natural 766 * permanent inode numbering system. An inode number is returned that 767 * is higher than the reserved limit but unique. 768 * 769 * BUGS: 770 * With a large number of inodes live on the file system this function 771 * currently becomes quite slow. 772 */ 773 ino_t iunique(struct super_block *sb, ino_t max_reserved) 774 { 775 /* 776 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 777 * error if st_ino won't fit in target struct field. Use 32bit counter 778 * here to attempt to avoid that. 779 */ 780 static unsigned int counter; 781 struct inode *inode; 782 struct hlist_head *head; 783 ino_t res; 784 785 spin_lock(&inode_lock); 786 do { 787 if (counter <= max_reserved) 788 counter = max_reserved + 1; 789 res = counter++; 790 head = inode_hashtable + hash(sb, res); 791 inode = find_inode_fast(sb, head, res); 792 } while (inode != NULL); 793 spin_unlock(&inode_lock); 794 795 return res; 796 } 797 EXPORT_SYMBOL(iunique); 798 799 struct inode *igrab(struct inode *inode) 800 { 801 spin_lock(&inode_lock); 802 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) 803 __iget(inode); 804 else 805 /* 806 * Handle the case where s_op->clear_inode is not been 807 * called yet, and somebody is calling igrab 808 * while the inode is getting freed. 809 */ 810 inode = NULL; 811 spin_unlock(&inode_lock); 812 return inode; 813 } 814 815 EXPORT_SYMBOL(igrab); 816 817 /** 818 * ifind - internal function, you want ilookup5() or iget5(). 819 * @sb: super block of file system to search 820 * @head: the head of the list to search 821 * @test: callback used for comparisons between inodes 822 * @data: opaque data pointer to pass to @test 823 * @wait: if true wait for the inode to be unlocked, if false do not 824 * 825 * ifind() searches for the inode specified by @data in the inode 826 * cache. This is a generalized version of ifind_fast() for file systems where 827 * the inode number is not sufficient for unique identification of an inode. 828 * 829 * If the inode is in the cache, the inode is returned with an incremented 830 * reference count. 831 * 832 * Otherwise NULL is returned. 833 * 834 * Note, @test is called with the inode_lock held, so can't sleep. 835 */ 836 static struct inode *ifind(struct super_block *sb, 837 struct hlist_head *head, int (*test)(struct inode *, void *), 838 void *data, const int wait) 839 { 840 struct inode *inode; 841 842 spin_lock(&inode_lock); 843 inode = find_inode(sb, head, test, data); 844 if (inode) { 845 __iget(inode); 846 spin_unlock(&inode_lock); 847 if (likely(wait)) 848 wait_on_inode(inode); 849 return inode; 850 } 851 spin_unlock(&inode_lock); 852 return NULL; 853 } 854 855 /** 856 * ifind_fast - internal function, you want ilookup() or iget(). 857 * @sb: super block of file system to search 858 * @head: head of the list to search 859 * @ino: inode number to search for 860 * 861 * ifind_fast() searches for the inode @ino in the inode cache. This is for 862 * file systems where the inode number is sufficient for unique identification 863 * of an inode. 864 * 865 * If the inode is in the cache, the inode is returned with an incremented 866 * reference count. 867 * 868 * Otherwise NULL is returned. 869 */ 870 static struct inode *ifind_fast(struct super_block *sb, 871 struct hlist_head *head, unsigned long ino) 872 { 873 struct inode *inode; 874 875 spin_lock(&inode_lock); 876 inode = find_inode_fast(sb, head, ino); 877 if (inode) { 878 __iget(inode); 879 spin_unlock(&inode_lock); 880 wait_on_inode(inode); 881 return inode; 882 } 883 spin_unlock(&inode_lock); 884 return NULL; 885 } 886 887 /** 888 * ilookup5_nowait - search for an inode in the inode cache 889 * @sb: super block of file system to search 890 * @hashval: hash value (usually inode number) to search for 891 * @test: callback used for comparisons between inodes 892 * @data: opaque data pointer to pass to @test 893 * 894 * ilookup5() uses ifind() to search for the inode specified by @hashval and 895 * @data in the inode cache. This is a generalized version of ilookup() for 896 * file systems where the inode number is not sufficient for unique 897 * identification of an inode. 898 * 899 * If the inode is in the cache, the inode is returned with an incremented 900 * reference count. Note, the inode lock is not waited upon so you have to be 901 * very careful what you do with the returned inode. You probably should be 902 * using ilookup5() instead. 903 * 904 * Otherwise NULL is returned. 905 * 906 * Note, @test is called with the inode_lock held, so can't sleep. 907 */ 908 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 909 int (*test)(struct inode *, void *), void *data) 910 { 911 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 912 913 return ifind(sb, head, test, data, 0); 914 } 915 916 EXPORT_SYMBOL(ilookup5_nowait); 917 918 /** 919 * ilookup5 - search for an inode in the inode cache 920 * @sb: super block of file system to search 921 * @hashval: hash value (usually inode number) to search for 922 * @test: callback used for comparisons between inodes 923 * @data: opaque data pointer to pass to @test 924 * 925 * ilookup5() uses ifind() to search for the inode specified by @hashval and 926 * @data in the inode cache. This is a generalized version of ilookup() for 927 * file systems where the inode number is not sufficient for unique 928 * identification of an inode. 929 * 930 * If the inode is in the cache, the inode lock is waited upon and the inode is 931 * returned with an incremented reference count. 932 * 933 * Otherwise NULL is returned. 934 * 935 * Note, @test is called with the inode_lock held, so can't sleep. 936 */ 937 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 938 int (*test)(struct inode *, void *), void *data) 939 { 940 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 941 942 return ifind(sb, head, test, data, 1); 943 } 944 945 EXPORT_SYMBOL(ilookup5); 946 947 /** 948 * ilookup - search for an inode in the inode cache 949 * @sb: super block of file system to search 950 * @ino: inode number to search for 951 * 952 * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. 953 * This is for file systems where the inode number is sufficient for unique 954 * identification of an inode. 955 * 956 * If the inode is in the cache, the inode is returned with an incremented 957 * reference count. 958 * 959 * Otherwise NULL is returned. 960 */ 961 struct inode *ilookup(struct super_block *sb, unsigned long ino) 962 { 963 struct hlist_head *head = inode_hashtable + hash(sb, ino); 964 965 return ifind_fast(sb, head, ino); 966 } 967 968 EXPORT_SYMBOL(ilookup); 969 970 /** 971 * iget5_locked - obtain an inode from a mounted file system 972 * @sb: super block of file system 973 * @hashval: hash value (usually inode number) to get 974 * @test: callback used for comparisons between inodes 975 * @set: callback used to initialize a new struct inode 976 * @data: opaque data pointer to pass to @test and @set 977 * 978 * iget5_locked() uses ifind() to search for the inode specified by @hashval 979 * and @data in the inode cache and if present it is returned with an increased 980 * reference count. This is a generalized version of iget_locked() for file 981 * systems where the inode number is not sufficient for unique identification 982 * of an inode. 983 * 984 * If the inode is not in cache, get_new_inode() is called to allocate a new 985 * inode and this is returned locked, hashed, and with the I_NEW flag set. The 986 * file system gets to fill it in before unlocking it via unlock_new_inode(). 987 * 988 * Note both @test and @set are called with the inode_lock held, so can't sleep. 989 */ 990 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 991 int (*test)(struct inode *, void *), 992 int (*set)(struct inode *, void *), void *data) 993 { 994 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 995 struct inode *inode; 996 997 inode = ifind(sb, head, test, data, 1); 998 if (inode) 999 return inode; 1000 /* 1001 * get_new_inode() will do the right thing, re-trying the search 1002 * in case it had to block at any point. 1003 */ 1004 return get_new_inode(sb, head, test, set, data); 1005 } 1006 1007 EXPORT_SYMBOL(iget5_locked); 1008 1009 /** 1010 * iget_locked - obtain an inode from a mounted file system 1011 * @sb: super block of file system 1012 * @ino: inode number to get 1013 * 1014 * iget_locked() uses ifind_fast() to search for the inode specified by @ino in 1015 * the inode cache and if present it is returned with an increased reference 1016 * count. This is for file systems where the inode number is sufficient for 1017 * unique identification of an inode. 1018 * 1019 * If the inode is not in cache, get_new_inode_fast() is called to allocate a 1020 * new inode and this is returned locked, hashed, and with the I_NEW flag set. 1021 * The file system gets to fill it in before unlocking it via 1022 * unlock_new_inode(). 1023 */ 1024 struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1025 { 1026 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1027 struct inode *inode; 1028 1029 inode = ifind_fast(sb, head, ino); 1030 if (inode) 1031 return inode; 1032 /* 1033 * get_new_inode_fast() will do the right thing, re-trying the search 1034 * in case it had to block at any point. 1035 */ 1036 return get_new_inode_fast(sb, head, ino); 1037 } 1038 1039 EXPORT_SYMBOL(iget_locked); 1040 1041 int insert_inode_locked(struct inode *inode) 1042 { 1043 struct super_block *sb = inode->i_sb; 1044 ino_t ino = inode->i_ino; 1045 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1046 struct inode *old; 1047 1048 inode->i_state |= I_LOCK|I_NEW; 1049 while (1) { 1050 spin_lock(&inode_lock); 1051 old = find_inode_fast(sb, head, ino); 1052 if (likely(!old)) { 1053 hlist_add_head(&inode->i_hash, head); 1054 spin_unlock(&inode_lock); 1055 return 0; 1056 } 1057 __iget(old); 1058 spin_unlock(&inode_lock); 1059 wait_on_inode(old); 1060 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1061 iput(old); 1062 return -EBUSY; 1063 } 1064 iput(old); 1065 } 1066 } 1067 1068 EXPORT_SYMBOL(insert_inode_locked); 1069 1070 int insert_inode_locked4(struct inode *inode, unsigned long hashval, 1071 int (*test)(struct inode *, void *), void *data) 1072 { 1073 struct super_block *sb = inode->i_sb; 1074 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1075 struct inode *old; 1076 1077 inode->i_state |= I_LOCK|I_NEW; 1078 1079 while (1) { 1080 spin_lock(&inode_lock); 1081 old = find_inode(sb, head, test, data); 1082 if (likely(!old)) { 1083 hlist_add_head(&inode->i_hash, head); 1084 spin_unlock(&inode_lock); 1085 return 0; 1086 } 1087 __iget(old); 1088 spin_unlock(&inode_lock); 1089 wait_on_inode(old); 1090 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1091 iput(old); 1092 return -EBUSY; 1093 } 1094 iput(old); 1095 } 1096 } 1097 1098 EXPORT_SYMBOL(insert_inode_locked4); 1099 1100 /** 1101 * __insert_inode_hash - hash an inode 1102 * @inode: unhashed inode 1103 * @hashval: unsigned long value used to locate this object in the 1104 * inode_hashtable. 1105 * 1106 * Add an inode to the inode hash for this superblock. 1107 */ 1108 void __insert_inode_hash(struct inode *inode, unsigned long hashval) 1109 { 1110 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 1111 spin_lock(&inode_lock); 1112 hlist_add_head(&inode->i_hash, head); 1113 spin_unlock(&inode_lock); 1114 } 1115 1116 EXPORT_SYMBOL(__insert_inode_hash); 1117 1118 /** 1119 * remove_inode_hash - remove an inode from the hash 1120 * @inode: inode to unhash 1121 * 1122 * Remove an inode from the superblock. 1123 */ 1124 void remove_inode_hash(struct inode *inode) 1125 { 1126 spin_lock(&inode_lock); 1127 hlist_del_init(&inode->i_hash); 1128 spin_unlock(&inode_lock); 1129 } 1130 1131 EXPORT_SYMBOL(remove_inode_hash); 1132 1133 /* 1134 * Tell the filesystem that this inode is no longer of any interest and should 1135 * be completely destroyed. 1136 * 1137 * We leave the inode in the inode hash table until *after* the filesystem's 1138 * ->delete_inode completes. This ensures that an iget (such as nfsd might 1139 * instigate) will always find up-to-date information either in the hash or on 1140 * disk. 1141 * 1142 * I_FREEING is set so that no-one will take a new reference to the inode while 1143 * it is being deleted. 1144 */ 1145 void generic_delete_inode(struct inode *inode) 1146 { 1147 const struct super_operations *op = inode->i_sb->s_op; 1148 1149 list_del_init(&inode->i_list); 1150 list_del_init(&inode->i_sb_list); 1151 WARN_ON(inode->i_state & I_NEW); 1152 inode->i_state |= I_FREEING; 1153 inodes_stat.nr_inodes--; 1154 spin_unlock(&inode_lock); 1155 1156 security_inode_delete(inode); 1157 1158 if (op->delete_inode) { 1159 void (*delete)(struct inode *) = op->delete_inode; 1160 if (!is_bad_inode(inode)) 1161 DQUOT_INIT(inode); 1162 /* Filesystems implementing their own 1163 * s_op->delete_inode are required to call 1164 * truncate_inode_pages and clear_inode() 1165 * internally */ 1166 delete(inode); 1167 } else { 1168 truncate_inode_pages(&inode->i_data, 0); 1169 clear_inode(inode); 1170 } 1171 spin_lock(&inode_lock); 1172 hlist_del_init(&inode->i_hash); 1173 spin_unlock(&inode_lock); 1174 wake_up_inode(inode); 1175 BUG_ON(inode->i_state != I_CLEAR); 1176 destroy_inode(inode); 1177 } 1178 1179 EXPORT_SYMBOL(generic_delete_inode); 1180 1181 static void generic_forget_inode(struct inode *inode) 1182 { 1183 struct super_block *sb = inode->i_sb; 1184 1185 if (!hlist_unhashed(&inode->i_hash)) { 1186 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1187 list_move(&inode->i_list, &inode_unused); 1188 inodes_stat.nr_unused++; 1189 if (sb->s_flags & MS_ACTIVE) { 1190 spin_unlock(&inode_lock); 1191 return; 1192 } 1193 WARN_ON(inode->i_state & I_NEW); 1194 inode->i_state |= I_WILL_FREE; 1195 spin_unlock(&inode_lock); 1196 write_inode_now(inode, 1); 1197 spin_lock(&inode_lock); 1198 WARN_ON(inode->i_state & I_NEW); 1199 inode->i_state &= ~I_WILL_FREE; 1200 inodes_stat.nr_unused--; 1201 hlist_del_init(&inode->i_hash); 1202 } 1203 list_del_init(&inode->i_list); 1204 list_del_init(&inode->i_sb_list); 1205 WARN_ON(inode->i_state & I_NEW); 1206 inode->i_state |= I_FREEING; 1207 inodes_stat.nr_inodes--; 1208 spin_unlock(&inode_lock); 1209 if (inode->i_data.nrpages) 1210 truncate_inode_pages(&inode->i_data, 0); 1211 clear_inode(inode); 1212 wake_up_inode(inode); 1213 destroy_inode(inode); 1214 } 1215 1216 /* 1217 * Normal UNIX filesystem behaviour: delete the 1218 * inode when the usage count drops to zero, and 1219 * i_nlink is zero. 1220 */ 1221 void generic_drop_inode(struct inode *inode) 1222 { 1223 if (!inode->i_nlink) 1224 generic_delete_inode(inode); 1225 else 1226 generic_forget_inode(inode); 1227 } 1228 1229 EXPORT_SYMBOL_GPL(generic_drop_inode); 1230 1231 /* 1232 * Called when we're dropping the last reference 1233 * to an inode. 1234 * 1235 * Call the FS "drop()" function, defaulting to 1236 * the legacy UNIX filesystem behaviour.. 1237 * 1238 * NOTE! NOTE! NOTE! We're called with the inode lock 1239 * held, and the drop function is supposed to release 1240 * the lock! 1241 */ 1242 static inline void iput_final(struct inode *inode) 1243 { 1244 const struct super_operations *op = inode->i_sb->s_op; 1245 void (*drop)(struct inode *) = generic_drop_inode; 1246 1247 if (op && op->drop_inode) 1248 drop = op->drop_inode; 1249 drop(inode); 1250 } 1251 1252 /** 1253 * iput - put an inode 1254 * @inode: inode to put 1255 * 1256 * Puts an inode, dropping its usage count. If the inode use count hits 1257 * zero, the inode is then freed and may also be destroyed. 1258 * 1259 * Consequently, iput() can sleep. 1260 */ 1261 void iput(struct inode *inode) 1262 { 1263 if (inode) { 1264 BUG_ON(inode->i_state == I_CLEAR); 1265 1266 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1267 iput_final(inode); 1268 } 1269 } 1270 1271 EXPORT_SYMBOL(iput); 1272 1273 /** 1274 * bmap - find a block number in a file 1275 * @inode: inode of file 1276 * @block: block to find 1277 * 1278 * Returns the block number on the device holding the inode that 1279 * is the disk block number for the block of the file requested. 1280 * That is, asked for block 4 of inode 1 the function will return the 1281 * disk block relative to the disk start that holds that block of the 1282 * file. 1283 */ 1284 sector_t bmap(struct inode * inode, sector_t block) 1285 { 1286 sector_t res = 0; 1287 if (inode->i_mapping->a_ops->bmap) 1288 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); 1289 return res; 1290 } 1291 EXPORT_SYMBOL(bmap); 1292 1293 /** 1294 * touch_atime - update the access time 1295 * @mnt: mount the inode is accessed on 1296 * @dentry: dentry accessed 1297 * 1298 * Update the accessed time on an inode and mark it for writeback. 1299 * This function automatically handles read only file systems and media, 1300 * as well as the "noatime" flag and inode specific "noatime" markers. 1301 */ 1302 void touch_atime(struct vfsmount *mnt, struct dentry *dentry) 1303 { 1304 struct inode *inode = dentry->d_inode; 1305 struct timespec now; 1306 1307 if (mnt_want_write(mnt)) 1308 return; 1309 if (inode->i_flags & S_NOATIME) 1310 goto out; 1311 if (IS_NOATIME(inode)) 1312 goto out; 1313 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1314 goto out; 1315 1316 if (mnt->mnt_flags & MNT_NOATIME) 1317 goto out; 1318 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1319 goto out; 1320 if (mnt->mnt_flags & MNT_RELATIME) { 1321 /* 1322 * With relative atime, only update atime if the previous 1323 * atime is earlier than either the ctime or mtime. 1324 */ 1325 if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 && 1326 timespec_compare(&inode->i_ctime, &inode->i_atime) < 0) 1327 goto out; 1328 } 1329 1330 now = current_fs_time(inode->i_sb); 1331 if (timespec_equal(&inode->i_atime, &now)) 1332 goto out; 1333 1334 inode->i_atime = now; 1335 mark_inode_dirty_sync(inode); 1336 out: 1337 mnt_drop_write(mnt); 1338 } 1339 EXPORT_SYMBOL(touch_atime); 1340 1341 /** 1342 * file_update_time - update mtime and ctime time 1343 * @file: file accessed 1344 * 1345 * Update the mtime and ctime members of an inode and mark the inode 1346 * for writeback. Note that this function is meant exclusively for 1347 * usage in the file write path of filesystems, and filesystems may 1348 * choose to explicitly ignore update via this function with the 1349 * S_NOCTIME inode flag, e.g. for network filesystem where these 1350 * timestamps are handled by the server. 1351 */ 1352 1353 void file_update_time(struct file *file) 1354 { 1355 struct inode *inode = file->f_path.dentry->d_inode; 1356 struct timespec now; 1357 int sync_it = 0; 1358 int err; 1359 1360 if (IS_NOCMTIME(inode)) 1361 return; 1362 1363 err = mnt_want_write(file->f_path.mnt); 1364 if (err) 1365 return; 1366 1367 now = current_fs_time(inode->i_sb); 1368 if (!timespec_equal(&inode->i_mtime, &now)) { 1369 inode->i_mtime = now; 1370 sync_it = 1; 1371 } 1372 1373 if (!timespec_equal(&inode->i_ctime, &now)) { 1374 inode->i_ctime = now; 1375 sync_it = 1; 1376 } 1377 1378 if (IS_I_VERSION(inode)) { 1379 inode_inc_iversion(inode); 1380 sync_it = 1; 1381 } 1382 1383 if (sync_it) 1384 mark_inode_dirty_sync(inode); 1385 mnt_drop_write(file->f_path.mnt); 1386 } 1387 1388 EXPORT_SYMBOL(file_update_time); 1389 1390 int inode_needs_sync(struct inode *inode) 1391 { 1392 if (IS_SYNC(inode)) 1393 return 1; 1394 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 1395 return 1; 1396 return 0; 1397 } 1398 1399 EXPORT_SYMBOL(inode_needs_sync); 1400 1401 int inode_wait(void *word) 1402 { 1403 schedule(); 1404 return 0; 1405 } 1406 EXPORT_SYMBOL(inode_wait); 1407 1408 /* 1409 * If we try to find an inode in the inode hash while it is being 1410 * deleted, we have to wait until the filesystem completes its 1411 * deletion before reporting that it isn't found. This function waits 1412 * until the deletion _might_ have completed. Callers are responsible 1413 * to recheck inode state. 1414 * 1415 * It doesn't matter if I_LOCK is not set initially, a call to 1416 * wake_up_inode() after removing from the hash list will DTRT. 1417 * 1418 * This is called with inode_lock held. 1419 */ 1420 static void __wait_on_freeing_inode(struct inode *inode) 1421 { 1422 wait_queue_head_t *wq; 1423 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1424 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1425 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1426 spin_unlock(&inode_lock); 1427 schedule(); 1428 finish_wait(wq, &wait.wait); 1429 spin_lock(&inode_lock); 1430 } 1431 1432 /* 1433 * We rarely want to lock two inodes that do not have a parent/child 1434 * relationship (such as directory, child inode) simultaneously. The 1435 * vast majority of file systems should be able to get along fine 1436 * without this. Do not use these functions except as a last resort. 1437 */ 1438 void inode_double_lock(struct inode *inode1, struct inode *inode2) 1439 { 1440 if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { 1441 if (inode1) 1442 mutex_lock(&inode1->i_mutex); 1443 else if (inode2) 1444 mutex_lock(&inode2->i_mutex); 1445 return; 1446 } 1447 1448 if (inode1 < inode2) { 1449 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 1450 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 1451 } else { 1452 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); 1453 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); 1454 } 1455 } 1456 EXPORT_SYMBOL(inode_double_lock); 1457 1458 void inode_double_unlock(struct inode *inode1, struct inode *inode2) 1459 { 1460 if (inode1) 1461 mutex_unlock(&inode1->i_mutex); 1462 1463 if (inode2 && inode2 != inode1) 1464 mutex_unlock(&inode2->i_mutex); 1465 } 1466 EXPORT_SYMBOL(inode_double_unlock); 1467 1468 static __initdata unsigned long ihash_entries; 1469 static int __init set_ihash_entries(char *str) 1470 { 1471 if (!str) 1472 return 0; 1473 ihash_entries = simple_strtoul(str, &str, 0); 1474 return 1; 1475 } 1476 __setup("ihash_entries=", set_ihash_entries); 1477 1478 /* 1479 * Initialize the waitqueues and inode hash table. 1480 */ 1481 void __init inode_init_early(void) 1482 { 1483 int loop; 1484 1485 /* If hashes are distributed across NUMA nodes, defer 1486 * hash allocation until vmalloc space is available. 1487 */ 1488 if (hashdist) 1489 return; 1490 1491 inode_hashtable = 1492 alloc_large_system_hash("Inode-cache", 1493 sizeof(struct hlist_head), 1494 ihash_entries, 1495 14, 1496 HASH_EARLY, 1497 &i_hash_shift, 1498 &i_hash_mask, 1499 0); 1500 1501 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1502 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1503 } 1504 1505 void __init inode_init(void) 1506 { 1507 int loop; 1508 1509 /* inode slab cache */ 1510 inode_cachep = kmem_cache_create("inode_cache", 1511 sizeof(struct inode), 1512 0, 1513 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1514 SLAB_MEM_SPREAD), 1515 init_once); 1516 register_shrinker(&icache_shrinker); 1517 1518 /* Hash may have been set up in inode_init_early */ 1519 if (!hashdist) 1520 return; 1521 1522 inode_hashtable = 1523 alloc_large_system_hash("Inode-cache", 1524 sizeof(struct hlist_head), 1525 ihash_entries, 1526 14, 1527 0, 1528 &i_hash_shift, 1529 &i_hash_mask, 1530 0); 1531 1532 for (loop = 0; loop < (1 << i_hash_shift); loop++) 1533 INIT_HLIST_HEAD(&inode_hashtable[loop]); 1534 } 1535 1536 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 1537 { 1538 inode->i_mode = mode; 1539 if (S_ISCHR(mode)) { 1540 inode->i_fop = &def_chr_fops; 1541 inode->i_rdev = rdev; 1542 } else if (S_ISBLK(mode)) { 1543 inode->i_fop = &def_blk_fops; 1544 inode->i_rdev = rdev; 1545 } else if (S_ISFIFO(mode)) 1546 inode->i_fop = &def_fifo_fops; 1547 else if (S_ISSOCK(mode)) 1548 inode->i_fop = &bad_sock_fops; 1549 else 1550 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", 1551 mode); 1552 } 1553 EXPORT_SYMBOL(init_special_inode); 1554