1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/smp_lock.h> 25 #include <linux/hash.h> 26 #include <linux/cache.h> 27 #include <linux/module.h> 28 #include <linux/mount.h> 29 #include <linux/file.h> 30 #include <asm/uaccess.h> 31 #include <linux/security.h> 32 #include <linux/seqlock.h> 33 #include <linux/swap.h> 34 #include <linux/bootmem.h> 35 #include "internal.h" 36 37 38 int sysctl_vfs_cache_pressure __read_mostly = 100; 39 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 40 41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 42 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 43 44 EXPORT_SYMBOL(dcache_lock); 45 46 static struct kmem_cache *dentry_cache __read_mostly; 47 48 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) 49 50 /* 51 * This is the single most critical data structure when it comes 52 * to the dcache: the hashtable for lookups. Somebody should try 53 * to make this good - I've just made it work. 54 * 55 * This hash-function tries to avoid losing too many bits of hash 56 * information, yet avoid using a prime hash-size or similar. 57 */ 58 #define D_HASHBITS d_hash_shift 59 #define D_HASHMASK d_hash_mask 60 61 static unsigned int d_hash_mask __read_mostly; 62 static unsigned int d_hash_shift __read_mostly; 63 static struct hlist_head *dentry_hashtable __read_mostly; 64 static LIST_HEAD(dentry_unused); 65 66 /* Statistics gathering. */ 67 struct dentry_stat_t dentry_stat = { 68 .age_limit = 45, 69 }; 70 71 static void __d_free(struct dentry *dentry) 72 { 73 if (dname_external(dentry)) 74 kfree(dentry->d_name.name); 75 kmem_cache_free(dentry_cache, dentry); 76 } 77 78 static void d_callback(struct rcu_head *head) 79 { 80 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); 81 __d_free(dentry); 82 } 83 84 /* 85 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 86 * inside dcache_lock. 87 */ 88 static void d_free(struct dentry *dentry) 89 { 90 if (dentry->d_op && dentry->d_op->d_release) 91 dentry->d_op->d_release(dentry); 92 /* if dentry was never inserted into hash, immediate free is OK */ 93 if (dentry->d_hash.pprev == NULL) 94 __d_free(dentry); 95 else 96 call_rcu(&dentry->d_u.d_rcu, d_callback); 97 } 98 99 /* 100 * Release the dentry's inode, using the filesystem 101 * d_iput() operation if defined. 102 * Called with dcache_lock and per dentry lock held, drops both. 103 */ 104 static void dentry_iput(struct dentry * dentry) 105 { 106 struct inode *inode = dentry->d_inode; 107 if (inode) { 108 dentry->d_inode = NULL; 109 list_del_init(&dentry->d_alias); 110 spin_unlock(&dentry->d_lock); 111 spin_unlock(&dcache_lock); 112 if (!inode->i_nlink) 113 fsnotify_inoderemove(inode); 114 if (dentry->d_op && dentry->d_op->d_iput) 115 dentry->d_op->d_iput(dentry, inode); 116 else 117 iput(inode); 118 } else { 119 spin_unlock(&dentry->d_lock); 120 spin_unlock(&dcache_lock); 121 } 122 } 123 124 /* 125 * This is dput 126 * 127 * This is complicated by the fact that we do not want to put 128 * dentries that are no longer on any hash chain on the unused 129 * list: we'd much rather just get rid of them immediately. 130 * 131 * However, that implies that we have to traverse the dentry 132 * tree upwards to the parents which might _also_ now be 133 * scheduled for deletion (it may have been only waiting for 134 * its last child to go away). 135 * 136 * This tail recursion is done by hand as we don't want to depend 137 * on the compiler to always get this right (gcc generally doesn't). 138 * Real recursion would eat up our stack space. 139 */ 140 141 /* 142 * dput - release a dentry 143 * @dentry: dentry to release 144 * 145 * Release a dentry. This will drop the usage count and if appropriate 146 * call the dentry unlink method as well as removing it from the queues and 147 * releasing its resources. If the parent dentries were scheduled for release 148 * they too may now get deleted. 149 * 150 * no dcache lock, please. 151 */ 152 153 void dput(struct dentry *dentry) 154 { 155 if (!dentry) 156 return; 157 158 repeat: 159 if (atomic_read(&dentry->d_count) == 1) 160 might_sleep(); 161 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) 162 return; 163 164 spin_lock(&dentry->d_lock); 165 if (atomic_read(&dentry->d_count)) { 166 spin_unlock(&dentry->d_lock); 167 spin_unlock(&dcache_lock); 168 return; 169 } 170 171 /* 172 * AV: ->d_delete() is _NOT_ allowed to block now. 173 */ 174 if (dentry->d_op && dentry->d_op->d_delete) { 175 if (dentry->d_op->d_delete(dentry)) 176 goto unhash_it; 177 } 178 /* Unreachable? Get rid of it */ 179 if (d_unhashed(dentry)) 180 goto kill_it; 181 if (list_empty(&dentry->d_lru)) { 182 dentry->d_flags |= DCACHE_REFERENCED; 183 list_add(&dentry->d_lru, &dentry_unused); 184 dentry_stat.nr_unused++; 185 } 186 spin_unlock(&dentry->d_lock); 187 spin_unlock(&dcache_lock); 188 return; 189 190 unhash_it: 191 __d_drop(dentry); 192 193 kill_it: { 194 struct dentry *parent; 195 196 /* If dentry was on d_lru list 197 * delete it from there 198 */ 199 if (!list_empty(&dentry->d_lru)) { 200 list_del(&dentry->d_lru); 201 dentry_stat.nr_unused--; 202 } 203 list_del(&dentry->d_u.d_child); 204 dentry_stat.nr_dentry--; /* For d_free, below */ 205 /*drops the locks, at that point nobody can reach this dentry */ 206 dentry_iput(dentry); 207 parent = dentry->d_parent; 208 d_free(dentry); 209 if (dentry == parent) 210 return; 211 dentry = parent; 212 goto repeat; 213 } 214 } 215 216 /** 217 * d_invalidate - invalidate a dentry 218 * @dentry: dentry to invalidate 219 * 220 * Try to invalidate the dentry if it turns out to be 221 * possible. If there are other dentries that can be 222 * reached through this one we can't delete it and we 223 * return -EBUSY. On success we return 0. 224 * 225 * no dcache lock. 226 */ 227 228 int d_invalidate(struct dentry * dentry) 229 { 230 /* 231 * If it's already been dropped, return OK. 232 */ 233 spin_lock(&dcache_lock); 234 if (d_unhashed(dentry)) { 235 spin_unlock(&dcache_lock); 236 return 0; 237 } 238 /* 239 * Check whether to do a partial shrink_dcache 240 * to get rid of unused child entries. 241 */ 242 if (!list_empty(&dentry->d_subdirs)) { 243 spin_unlock(&dcache_lock); 244 shrink_dcache_parent(dentry); 245 spin_lock(&dcache_lock); 246 } 247 248 /* 249 * Somebody else still using it? 250 * 251 * If it's a directory, we can't drop it 252 * for fear of somebody re-populating it 253 * with children (even though dropping it 254 * would make it unreachable from the root, 255 * we might still populate it if it was a 256 * working directory or similar). 257 */ 258 spin_lock(&dentry->d_lock); 259 if (atomic_read(&dentry->d_count) > 1) { 260 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 261 spin_unlock(&dentry->d_lock); 262 spin_unlock(&dcache_lock); 263 return -EBUSY; 264 } 265 } 266 267 __d_drop(dentry); 268 spin_unlock(&dentry->d_lock); 269 spin_unlock(&dcache_lock); 270 return 0; 271 } 272 273 /* This should be called _only_ with dcache_lock held */ 274 275 static inline struct dentry * __dget_locked(struct dentry *dentry) 276 { 277 atomic_inc(&dentry->d_count); 278 if (!list_empty(&dentry->d_lru)) { 279 dentry_stat.nr_unused--; 280 list_del_init(&dentry->d_lru); 281 } 282 return dentry; 283 } 284 285 struct dentry * dget_locked(struct dentry *dentry) 286 { 287 return __dget_locked(dentry); 288 } 289 290 /** 291 * d_find_alias - grab a hashed alias of inode 292 * @inode: inode in question 293 * @want_discon: flag, used by d_splice_alias, to request 294 * that only a DISCONNECTED alias be returned. 295 * 296 * If inode has a hashed alias, or is a directory and has any alias, 297 * acquire the reference to alias and return it. Otherwise return NULL. 298 * Notice that if inode is a directory there can be only one alias and 299 * it can be unhashed only if it has no children, or if it is the root 300 * of a filesystem. 301 * 302 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 303 * any other hashed alias over that one unless @want_discon is set, 304 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 305 */ 306 307 static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 308 { 309 struct list_head *head, *next, *tmp; 310 struct dentry *alias, *discon_alias=NULL; 311 312 head = &inode->i_dentry; 313 next = inode->i_dentry.next; 314 while (next != head) { 315 tmp = next; 316 next = tmp->next; 317 prefetch(next); 318 alias = list_entry(tmp, struct dentry, d_alias); 319 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 320 if (IS_ROOT(alias) && 321 (alias->d_flags & DCACHE_DISCONNECTED)) 322 discon_alias = alias; 323 else if (!want_discon) { 324 __dget_locked(alias); 325 return alias; 326 } 327 } 328 } 329 if (discon_alias) 330 __dget_locked(discon_alias); 331 return discon_alias; 332 } 333 334 struct dentry * d_find_alias(struct inode *inode) 335 { 336 struct dentry *de = NULL; 337 338 if (!list_empty(&inode->i_dentry)) { 339 spin_lock(&dcache_lock); 340 de = __d_find_alias(inode, 0); 341 spin_unlock(&dcache_lock); 342 } 343 return de; 344 } 345 346 /* 347 * Try to kill dentries associated with this inode. 348 * WARNING: you must own a reference to inode. 349 */ 350 void d_prune_aliases(struct inode *inode) 351 { 352 struct dentry *dentry; 353 restart: 354 spin_lock(&dcache_lock); 355 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 356 spin_lock(&dentry->d_lock); 357 if (!atomic_read(&dentry->d_count)) { 358 __dget_locked(dentry); 359 __d_drop(dentry); 360 spin_unlock(&dentry->d_lock); 361 spin_unlock(&dcache_lock); 362 dput(dentry); 363 goto restart; 364 } 365 spin_unlock(&dentry->d_lock); 366 } 367 spin_unlock(&dcache_lock); 368 } 369 370 /* 371 * Throw away a dentry - free the inode, dput the parent. This requires that 372 * the LRU list has already been removed. 373 * 374 * Called with dcache_lock, drops it and then regains. 375 * Called with dentry->d_lock held, drops it. 376 */ 377 static void prune_one_dentry(struct dentry * dentry) 378 { 379 struct dentry * parent; 380 381 __d_drop(dentry); 382 list_del(&dentry->d_u.d_child); 383 dentry_stat.nr_dentry--; /* For d_free, below */ 384 dentry_iput(dentry); 385 parent = dentry->d_parent; 386 d_free(dentry); 387 if (parent != dentry) 388 dput(parent); 389 spin_lock(&dcache_lock); 390 } 391 392 /** 393 * prune_dcache - shrink the dcache 394 * @count: number of entries to try and free 395 * @sb: if given, ignore dentries for other superblocks 396 * which are being unmounted. 397 * 398 * Shrink the dcache. This is done when we need 399 * more memory, or simply when we need to unmount 400 * something (at which point we need to unuse 401 * all dentries). 402 * 403 * This function may fail to free any resources if 404 * all the dentries are in use. 405 */ 406 407 static void prune_dcache(int count, struct super_block *sb) 408 { 409 spin_lock(&dcache_lock); 410 for (; count ; count--) { 411 struct dentry *dentry; 412 struct list_head *tmp; 413 struct rw_semaphore *s_umount; 414 415 cond_resched_lock(&dcache_lock); 416 417 tmp = dentry_unused.prev; 418 if (sb) { 419 /* Try to find a dentry for this sb, but don't try 420 * too hard, if they aren't near the tail they will 421 * be moved down again soon 422 */ 423 int skip = count; 424 while (skip && tmp != &dentry_unused && 425 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 426 skip--; 427 tmp = tmp->prev; 428 } 429 } 430 if (tmp == &dentry_unused) 431 break; 432 list_del_init(tmp); 433 prefetch(dentry_unused.prev); 434 dentry_stat.nr_unused--; 435 dentry = list_entry(tmp, struct dentry, d_lru); 436 437 spin_lock(&dentry->d_lock); 438 /* 439 * We found an inuse dentry which was not removed from 440 * dentry_unused because of laziness during lookup. Do not free 441 * it - just keep it off the dentry_unused list. 442 */ 443 if (atomic_read(&dentry->d_count)) { 444 spin_unlock(&dentry->d_lock); 445 continue; 446 } 447 /* If the dentry was recently referenced, don't free it. */ 448 if (dentry->d_flags & DCACHE_REFERENCED) { 449 dentry->d_flags &= ~DCACHE_REFERENCED; 450 list_add(&dentry->d_lru, &dentry_unused); 451 dentry_stat.nr_unused++; 452 spin_unlock(&dentry->d_lock); 453 continue; 454 } 455 /* 456 * If the dentry is not DCACHED_REFERENCED, it is time 457 * to remove it from the dcache, provided the super block is 458 * NULL (which means we are trying to reclaim memory) 459 * or this dentry belongs to the same super block that 460 * we want to shrink. 461 */ 462 /* 463 * If this dentry is for "my" filesystem, then I can prune it 464 * without taking the s_umount lock (I already hold it). 465 */ 466 if (sb && dentry->d_sb == sb) { 467 prune_one_dentry(dentry); 468 continue; 469 } 470 /* 471 * ...otherwise we need to be sure this filesystem isn't being 472 * unmounted, otherwise we could race with 473 * generic_shutdown_super(), and end up holding a reference to 474 * an inode while the filesystem is unmounted. 475 * So we try to get s_umount, and make sure s_root isn't NULL. 476 * (Take a local copy of s_umount to avoid a use-after-free of 477 * `dentry'). 478 */ 479 s_umount = &dentry->d_sb->s_umount; 480 if (down_read_trylock(s_umount)) { 481 if (dentry->d_sb->s_root != NULL) { 482 prune_one_dentry(dentry); 483 up_read(s_umount); 484 continue; 485 } 486 up_read(s_umount); 487 } 488 spin_unlock(&dentry->d_lock); 489 /* 490 * Insert dentry at the head of the list as inserting at the 491 * tail leads to a cycle. 492 */ 493 list_add(&dentry->d_lru, &dentry_unused); 494 dentry_stat.nr_unused++; 495 } 496 spin_unlock(&dcache_lock); 497 } 498 499 /* 500 * Shrink the dcache for the specified super block. 501 * This allows us to unmount a device without disturbing 502 * the dcache for the other devices. 503 * 504 * This implementation makes just two traversals of the 505 * unused list. On the first pass we move the selected 506 * dentries to the most recent end, and on the second 507 * pass we free them. The second pass must restart after 508 * each dput(), but since the target dentries are all at 509 * the end, it's really just a single traversal. 510 */ 511 512 /** 513 * shrink_dcache_sb - shrink dcache for a superblock 514 * @sb: superblock 515 * 516 * Shrink the dcache for the specified super block. This 517 * is used to free the dcache before unmounting a file 518 * system 519 */ 520 521 void shrink_dcache_sb(struct super_block * sb) 522 { 523 struct list_head *tmp, *next; 524 struct dentry *dentry; 525 526 /* 527 * Pass one ... move the dentries for the specified 528 * superblock to the most recent end of the unused list. 529 */ 530 spin_lock(&dcache_lock); 531 list_for_each_safe(tmp, next, &dentry_unused) { 532 dentry = list_entry(tmp, struct dentry, d_lru); 533 if (dentry->d_sb != sb) 534 continue; 535 list_move(tmp, &dentry_unused); 536 } 537 538 /* 539 * Pass two ... free the dentries for this superblock. 540 */ 541 repeat: 542 list_for_each_safe(tmp, next, &dentry_unused) { 543 dentry = list_entry(tmp, struct dentry, d_lru); 544 if (dentry->d_sb != sb) 545 continue; 546 dentry_stat.nr_unused--; 547 list_del_init(tmp); 548 spin_lock(&dentry->d_lock); 549 if (atomic_read(&dentry->d_count)) { 550 spin_unlock(&dentry->d_lock); 551 continue; 552 } 553 prune_one_dentry(dentry); 554 cond_resched_lock(&dcache_lock); 555 goto repeat; 556 } 557 spin_unlock(&dcache_lock); 558 } 559 560 /* 561 * destroy a single subtree of dentries for unmount 562 * - see the comments on shrink_dcache_for_umount() for a description of the 563 * locking 564 */ 565 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 566 { 567 struct dentry *parent; 568 unsigned detached = 0; 569 570 BUG_ON(!IS_ROOT(dentry)); 571 572 /* detach this root from the system */ 573 spin_lock(&dcache_lock); 574 if (!list_empty(&dentry->d_lru)) { 575 dentry_stat.nr_unused--; 576 list_del_init(&dentry->d_lru); 577 } 578 __d_drop(dentry); 579 spin_unlock(&dcache_lock); 580 581 for (;;) { 582 /* descend to the first leaf in the current subtree */ 583 while (!list_empty(&dentry->d_subdirs)) { 584 struct dentry *loop; 585 586 /* this is a branch with children - detach all of them 587 * from the system in one go */ 588 spin_lock(&dcache_lock); 589 list_for_each_entry(loop, &dentry->d_subdirs, 590 d_u.d_child) { 591 if (!list_empty(&loop->d_lru)) { 592 dentry_stat.nr_unused--; 593 list_del_init(&loop->d_lru); 594 } 595 596 __d_drop(loop); 597 cond_resched_lock(&dcache_lock); 598 } 599 spin_unlock(&dcache_lock); 600 601 /* move to the first child */ 602 dentry = list_entry(dentry->d_subdirs.next, 603 struct dentry, d_u.d_child); 604 } 605 606 /* consume the dentries from this leaf up through its parents 607 * until we find one with children or run out altogether */ 608 do { 609 struct inode *inode; 610 611 if (atomic_read(&dentry->d_count) != 0) { 612 printk(KERN_ERR 613 "BUG: Dentry %p{i=%lx,n=%s}" 614 " still in use (%d)" 615 " [unmount of %s %s]\n", 616 dentry, 617 dentry->d_inode ? 618 dentry->d_inode->i_ino : 0UL, 619 dentry->d_name.name, 620 atomic_read(&dentry->d_count), 621 dentry->d_sb->s_type->name, 622 dentry->d_sb->s_id); 623 BUG(); 624 } 625 626 parent = dentry->d_parent; 627 if (parent == dentry) 628 parent = NULL; 629 else 630 atomic_dec(&parent->d_count); 631 632 list_del(&dentry->d_u.d_child); 633 detached++; 634 635 inode = dentry->d_inode; 636 if (inode) { 637 dentry->d_inode = NULL; 638 list_del_init(&dentry->d_alias); 639 if (dentry->d_op && dentry->d_op->d_iput) 640 dentry->d_op->d_iput(dentry, inode); 641 else 642 iput(inode); 643 } 644 645 d_free(dentry); 646 647 /* finished when we fall off the top of the tree, 648 * otherwise we ascend to the parent and move to the 649 * next sibling if there is one */ 650 if (!parent) 651 goto out; 652 653 dentry = parent; 654 655 } while (list_empty(&dentry->d_subdirs)); 656 657 dentry = list_entry(dentry->d_subdirs.next, 658 struct dentry, d_u.d_child); 659 } 660 out: 661 /* several dentries were freed, need to correct nr_dentry */ 662 spin_lock(&dcache_lock); 663 dentry_stat.nr_dentry -= detached; 664 spin_unlock(&dcache_lock); 665 } 666 667 /* 668 * destroy the dentries attached to a superblock on unmounting 669 * - we don't need to use dentry->d_lock, and only need dcache_lock when 670 * removing the dentry from the system lists and hashes because: 671 * - the superblock is detached from all mountings and open files, so the 672 * dentry trees will not be rearranged by the VFS 673 * - s_umount is write-locked, so the memory pressure shrinker will ignore 674 * any dentries belonging to this superblock that it comes across 675 * - the filesystem itself is no longer permitted to rearrange the dentries 676 * in this superblock 677 */ 678 void shrink_dcache_for_umount(struct super_block *sb) 679 { 680 struct dentry *dentry; 681 682 if (down_read_trylock(&sb->s_umount)) 683 BUG(); 684 685 dentry = sb->s_root; 686 sb->s_root = NULL; 687 atomic_dec(&dentry->d_count); 688 shrink_dcache_for_umount_subtree(dentry); 689 690 while (!hlist_empty(&sb->s_anon)) { 691 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 692 shrink_dcache_for_umount_subtree(dentry); 693 } 694 } 695 696 /* 697 * Search for at least 1 mount point in the dentry's subdirs. 698 * We descend to the next level whenever the d_subdirs 699 * list is non-empty and continue searching. 700 */ 701 702 /** 703 * have_submounts - check for mounts over a dentry 704 * @parent: dentry to check. 705 * 706 * Return true if the parent or its subdirectories contain 707 * a mount point 708 */ 709 710 int have_submounts(struct dentry *parent) 711 { 712 struct dentry *this_parent = parent; 713 struct list_head *next; 714 715 spin_lock(&dcache_lock); 716 if (d_mountpoint(parent)) 717 goto positive; 718 repeat: 719 next = this_parent->d_subdirs.next; 720 resume: 721 while (next != &this_parent->d_subdirs) { 722 struct list_head *tmp = next; 723 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 724 next = tmp->next; 725 /* Have we found a mount point ? */ 726 if (d_mountpoint(dentry)) 727 goto positive; 728 if (!list_empty(&dentry->d_subdirs)) { 729 this_parent = dentry; 730 goto repeat; 731 } 732 } 733 /* 734 * All done at this level ... ascend and resume the search. 735 */ 736 if (this_parent != parent) { 737 next = this_parent->d_u.d_child.next; 738 this_parent = this_parent->d_parent; 739 goto resume; 740 } 741 spin_unlock(&dcache_lock); 742 return 0; /* No mount points found in tree */ 743 positive: 744 spin_unlock(&dcache_lock); 745 return 1; 746 } 747 748 /* 749 * Search the dentry child list for the specified parent, 750 * and move any unused dentries to the end of the unused 751 * list for prune_dcache(). We descend to the next level 752 * whenever the d_subdirs list is non-empty and continue 753 * searching. 754 * 755 * It returns zero iff there are no unused children, 756 * otherwise it returns the number of children moved to 757 * the end of the unused list. This may not be the total 758 * number of unused children, because select_parent can 759 * drop the lock and return early due to latency 760 * constraints. 761 */ 762 static int select_parent(struct dentry * parent) 763 { 764 struct dentry *this_parent = parent; 765 struct list_head *next; 766 int found = 0; 767 768 spin_lock(&dcache_lock); 769 repeat: 770 next = this_parent->d_subdirs.next; 771 resume: 772 while (next != &this_parent->d_subdirs) { 773 struct list_head *tmp = next; 774 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 775 next = tmp->next; 776 777 if (!list_empty(&dentry->d_lru)) { 778 dentry_stat.nr_unused--; 779 list_del_init(&dentry->d_lru); 780 } 781 /* 782 * move only zero ref count dentries to the end 783 * of the unused list for prune_dcache 784 */ 785 if (!atomic_read(&dentry->d_count)) { 786 list_add_tail(&dentry->d_lru, &dentry_unused); 787 dentry_stat.nr_unused++; 788 found++; 789 } 790 791 /* 792 * We can return to the caller if we have found some (this 793 * ensures forward progress). We'll be coming back to find 794 * the rest. 795 */ 796 if (found && need_resched()) 797 goto out; 798 799 /* 800 * Descend a level if the d_subdirs list is non-empty. 801 */ 802 if (!list_empty(&dentry->d_subdirs)) { 803 this_parent = dentry; 804 goto repeat; 805 } 806 } 807 /* 808 * All done at this level ... ascend and resume the search. 809 */ 810 if (this_parent != parent) { 811 next = this_parent->d_u.d_child.next; 812 this_parent = this_parent->d_parent; 813 goto resume; 814 } 815 out: 816 spin_unlock(&dcache_lock); 817 return found; 818 } 819 820 /** 821 * shrink_dcache_parent - prune dcache 822 * @parent: parent of entries to prune 823 * 824 * Prune the dcache to remove unused children of the parent dentry. 825 */ 826 827 void shrink_dcache_parent(struct dentry * parent) 828 { 829 int found; 830 831 while ((found = select_parent(parent)) != 0) 832 prune_dcache(found, parent->d_sb); 833 } 834 835 /* 836 * Scan `nr' dentries and return the number which remain. 837 * 838 * We need to avoid reentering the filesystem if the caller is performing a 839 * GFP_NOFS allocation attempt. One example deadlock is: 840 * 841 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 842 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 843 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 844 * 845 * In this case we return -1 to tell the caller that we baled. 846 */ 847 static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 848 { 849 if (nr) { 850 if (!(gfp_mask & __GFP_FS)) 851 return -1; 852 prune_dcache(nr, NULL); 853 } 854 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 855 } 856 857 /** 858 * d_alloc - allocate a dcache entry 859 * @parent: parent of entry to allocate 860 * @name: qstr of the name 861 * 862 * Allocates a dentry. It returns %NULL if there is insufficient memory 863 * available. On a success the dentry is returned. The name passed in is 864 * copied and the copy passed in may be reused after this call. 865 */ 866 867 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 868 { 869 struct dentry *dentry; 870 char *dname; 871 872 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 873 if (!dentry) 874 return NULL; 875 876 if (name->len > DNAME_INLINE_LEN-1) { 877 dname = kmalloc(name->len + 1, GFP_KERNEL); 878 if (!dname) { 879 kmem_cache_free(dentry_cache, dentry); 880 return NULL; 881 } 882 } else { 883 dname = dentry->d_iname; 884 } 885 dentry->d_name.name = dname; 886 887 dentry->d_name.len = name->len; 888 dentry->d_name.hash = name->hash; 889 memcpy(dname, name->name, name->len); 890 dname[name->len] = 0; 891 892 atomic_set(&dentry->d_count, 1); 893 dentry->d_flags = DCACHE_UNHASHED; 894 spin_lock_init(&dentry->d_lock); 895 dentry->d_inode = NULL; 896 dentry->d_parent = NULL; 897 dentry->d_sb = NULL; 898 dentry->d_op = NULL; 899 dentry->d_fsdata = NULL; 900 dentry->d_mounted = 0; 901 #ifdef CONFIG_PROFILING 902 dentry->d_cookie = NULL; 903 #endif 904 INIT_HLIST_NODE(&dentry->d_hash); 905 INIT_LIST_HEAD(&dentry->d_lru); 906 INIT_LIST_HEAD(&dentry->d_subdirs); 907 INIT_LIST_HEAD(&dentry->d_alias); 908 909 if (parent) { 910 dentry->d_parent = dget(parent); 911 dentry->d_sb = parent->d_sb; 912 } else { 913 INIT_LIST_HEAD(&dentry->d_u.d_child); 914 } 915 916 spin_lock(&dcache_lock); 917 if (parent) 918 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 919 dentry_stat.nr_dentry++; 920 spin_unlock(&dcache_lock); 921 922 return dentry; 923 } 924 925 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 926 { 927 struct qstr q; 928 929 q.name = name; 930 q.len = strlen(name); 931 q.hash = full_name_hash(q.name, q.len); 932 return d_alloc(parent, &q); 933 } 934 935 /** 936 * d_instantiate - fill in inode information for a dentry 937 * @entry: dentry to complete 938 * @inode: inode to attach to this dentry 939 * 940 * Fill in inode information in the entry. 941 * 942 * This turns negative dentries into productive full members 943 * of society. 944 * 945 * NOTE! This assumes that the inode count has been incremented 946 * (or otherwise set) by the caller to indicate that it is now 947 * in use by the dcache. 948 */ 949 950 void d_instantiate(struct dentry *entry, struct inode * inode) 951 { 952 BUG_ON(!list_empty(&entry->d_alias)); 953 spin_lock(&dcache_lock); 954 if (inode) 955 list_add(&entry->d_alias, &inode->i_dentry); 956 entry->d_inode = inode; 957 fsnotify_d_instantiate(entry, inode); 958 spin_unlock(&dcache_lock); 959 security_d_instantiate(entry, inode); 960 } 961 962 /** 963 * d_instantiate_unique - instantiate a non-aliased dentry 964 * @entry: dentry to instantiate 965 * @inode: inode to attach to this dentry 966 * 967 * Fill in inode information in the entry. On success, it returns NULL. 968 * If an unhashed alias of "entry" already exists, then we return the 969 * aliased dentry instead and drop one reference to inode. 970 * 971 * Note that in order to avoid conflicts with rename() etc, the caller 972 * had better be holding the parent directory semaphore. 973 * 974 * This also assumes that the inode count has been incremented 975 * (or otherwise set) by the caller to indicate that it is now 976 * in use by the dcache. 977 */ 978 static struct dentry *__d_instantiate_unique(struct dentry *entry, 979 struct inode *inode) 980 { 981 struct dentry *alias; 982 int len = entry->d_name.len; 983 const char *name = entry->d_name.name; 984 unsigned int hash = entry->d_name.hash; 985 986 if (!inode) { 987 entry->d_inode = NULL; 988 return NULL; 989 } 990 991 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 992 struct qstr *qstr = &alias->d_name; 993 994 if (qstr->hash != hash) 995 continue; 996 if (alias->d_parent != entry->d_parent) 997 continue; 998 if (qstr->len != len) 999 continue; 1000 if (memcmp(qstr->name, name, len)) 1001 continue; 1002 dget_locked(alias); 1003 return alias; 1004 } 1005 1006 list_add(&entry->d_alias, &inode->i_dentry); 1007 entry->d_inode = inode; 1008 fsnotify_d_instantiate(entry, inode); 1009 return NULL; 1010 } 1011 1012 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1013 { 1014 struct dentry *result; 1015 1016 BUG_ON(!list_empty(&entry->d_alias)); 1017 1018 spin_lock(&dcache_lock); 1019 result = __d_instantiate_unique(entry, inode); 1020 spin_unlock(&dcache_lock); 1021 1022 if (!result) { 1023 security_d_instantiate(entry, inode); 1024 return NULL; 1025 } 1026 1027 BUG_ON(!d_unhashed(result)); 1028 iput(inode); 1029 return result; 1030 } 1031 1032 EXPORT_SYMBOL(d_instantiate_unique); 1033 1034 /** 1035 * d_alloc_root - allocate root dentry 1036 * @root_inode: inode to allocate the root for 1037 * 1038 * Allocate a root ("/") dentry for the inode given. The inode is 1039 * instantiated and returned. %NULL is returned if there is insufficient 1040 * memory or the inode passed is %NULL. 1041 */ 1042 1043 struct dentry * d_alloc_root(struct inode * root_inode) 1044 { 1045 struct dentry *res = NULL; 1046 1047 if (root_inode) { 1048 static const struct qstr name = { .name = "/", .len = 1 }; 1049 1050 res = d_alloc(NULL, &name); 1051 if (res) { 1052 res->d_sb = root_inode->i_sb; 1053 res->d_parent = res; 1054 d_instantiate(res, root_inode); 1055 } 1056 } 1057 return res; 1058 } 1059 1060 static inline struct hlist_head *d_hash(struct dentry *parent, 1061 unsigned long hash) 1062 { 1063 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 1064 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 1065 return dentry_hashtable + (hash & D_HASHMASK); 1066 } 1067 1068 /** 1069 * d_alloc_anon - allocate an anonymous dentry 1070 * @inode: inode to allocate the dentry for 1071 * 1072 * This is similar to d_alloc_root. It is used by filesystems when 1073 * creating a dentry for a given inode, often in the process of 1074 * mapping a filehandle to a dentry. The returned dentry may be 1075 * anonymous, or may have a full name (if the inode was already 1076 * in the cache). The file system may need to make further 1077 * efforts to connect this dentry into the dcache properly. 1078 * 1079 * When called on a directory inode, we must ensure that 1080 * the inode only ever has one dentry. If a dentry is 1081 * found, that is returned instead of allocating a new one. 1082 * 1083 * On successful return, the reference to the inode has been transferred 1084 * to the dentry. If %NULL is returned (indicating kmalloc failure), 1085 * the reference on the inode has not been released. 1086 */ 1087 1088 struct dentry * d_alloc_anon(struct inode *inode) 1089 { 1090 static const struct qstr anonstring = { .name = "" }; 1091 struct dentry *tmp; 1092 struct dentry *res; 1093 1094 if ((res = d_find_alias(inode))) { 1095 iput(inode); 1096 return res; 1097 } 1098 1099 tmp = d_alloc(NULL, &anonstring); 1100 if (!tmp) 1101 return NULL; 1102 1103 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1104 1105 spin_lock(&dcache_lock); 1106 res = __d_find_alias(inode, 0); 1107 if (!res) { 1108 /* attach a disconnected dentry */ 1109 res = tmp; 1110 tmp = NULL; 1111 spin_lock(&res->d_lock); 1112 res->d_sb = inode->i_sb; 1113 res->d_parent = res; 1114 res->d_inode = inode; 1115 res->d_flags |= DCACHE_DISCONNECTED; 1116 res->d_flags &= ~DCACHE_UNHASHED; 1117 list_add(&res->d_alias, &inode->i_dentry); 1118 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); 1119 spin_unlock(&res->d_lock); 1120 1121 inode = NULL; /* don't drop reference */ 1122 } 1123 spin_unlock(&dcache_lock); 1124 1125 if (inode) 1126 iput(inode); 1127 if (tmp) 1128 dput(tmp); 1129 return res; 1130 } 1131 1132 1133 /** 1134 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1135 * @inode: the inode which may have a disconnected dentry 1136 * @dentry: a negative dentry which we want to point to the inode. 1137 * 1138 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1139 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1140 * and return it, else simply d_add the inode to the dentry and return NULL. 1141 * 1142 * This is needed in the lookup routine of any filesystem that is exportable 1143 * (via knfsd) so that we can build dcache paths to directories effectively. 1144 * 1145 * If a dentry was found and moved, then it is returned. Otherwise NULL 1146 * is returned. This matches the expected return value of ->lookup. 1147 * 1148 */ 1149 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1150 { 1151 struct dentry *new = NULL; 1152 1153 if (inode && S_ISDIR(inode->i_mode)) { 1154 spin_lock(&dcache_lock); 1155 new = __d_find_alias(inode, 1); 1156 if (new) { 1157 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1158 fsnotify_d_instantiate(new, inode); 1159 spin_unlock(&dcache_lock); 1160 security_d_instantiate(new, inode); 1161 d_rehash(dentry); 1162 d_move(new, dentry); 1163 iput(inode); 1164 } else { 1165 /* d_instantiate takes dcache_lock, so we do it by hand */ 1166 list_add(&dentry->d_alias, &inode->i_dentry); 1167 dentry->d_inode = inode; 1168 fsnotify_d_instantiate(dentry, inode); 1169 spin_unlock(&dcache_lock); 1170 security_d_instantiate(dentry, inode); 1171 d_rehash(dentry); 1172 } 1173 } else 1174 d_add(dentry, inode); 1175 return new; 1176 } 1177 1178 1179 /** 1180 * d_lookup - search for a dentry 1181 * @parent: parent dentry 1182 * @name: qstr of name we wish to find 1183 * 1184 * Searches the children of the parent dentry for the name in question. If 1185 * the dentry is found its reference count is incremented and the dentry 1186 * is returned. The caller must use d_put to free the entry when it has 1187 * finished using it. %NULL is returned on failure. 1188 * 1189 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1190 * Memory barriers are used while updating and doing lockless traversal. 1191 * To avoid races with d_move while rename is happening, d_lock is used. 1192 * 1193 * Overflows in memcmp(), while d_move, are avoided by keeping the length 1194 * and name pointer in one structure pointed by d_qstr. 1195 * 1196 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1197 * lookup is going on. 1198 * 1199 * dentry_unused list is not updated even if lookup finds the required dentry 1200 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1201 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1202 * acquisition. 1203 * 1204 * d_lookup() is protected against the concurrent renames in some unrelated 1205 * directory using the seqlockt_t rename_lock. 1206 */ 1207 1208 struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1209 { 1210 struct dentry * dentry = NULL; 1211 unsigned long seq; 1212 1213 do { 1214 seq = read_seqbegin(&rename_lock); 1215 dentry = __d_lookup(parent, name); 1216 if (dentry) 1217 break; 1218 } while (read_seqretry(&rename_lock, seq)); 1219 return dentry; 1220 } 1221 1222 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1223 { 1224 unsigned int len = name->len; 1225 unsigned int hash = name->hash; 1226 const unsigned char *str = name->name; 1227 struct hlist_head *head = d_hash(parent,hash); 1228 struct dentry *found = NULL; 1229 struct hlist_node *node; 1230 struct dentry *dentry; 1231 1232 rcu_read_lock(); 1233 1234 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1235 struct qstr *qstr; 1236 1237 if (dentry->d_name.hash != hash) 1238 continue; 1239 if (dentry->d_parent != parent) 1240 continue; 1241 1242 spin_lock(&dentry->d_lock); 1243 1244 /* 1245 * Recheck the dentry after taking the lock - d_move may have 1246 * changed things. Don't bother checking the hash because we're 1247 * about to compare the whole name anyway. 1248 */ 1249 if (dentry->d_parent != parent) 1250 goto next; 1251 1252 /* 1253 * It is safe to compare names since d_move() cannot 1254 * change the qstr (protected by d_lock). 1255 */ 1256 qstr = &dentry->d_name; 1257 if (parent->d_op && parent->d_op->d_compare) { 1258 if (parent->d_op->d_compare(parent, qstr, name)) 1259 goto next; 1260 } else { 1261 if (qstr->len != len) 1262 goto next; 1263 if (memcmp(qstr->name, str, len)) 1264 goto next; 1265 } 1266 1267 if (!d_unhashed(dentry)) { 1268 atomic_inc(&dentry->d_count); 1269 found = dentry; 1270 } 1271 spin_unlock(&dentry->d_lock); 1272 break; 1273 next: 1274 spin_unlock(&dentry->d_lock); 1275 } 1276 rcu_read_unlock(); 1277 1278 return found; 1279 } 1280 1281 /** 1282 * d_hash_and_lookup - hash the qstr then search for a dentry 1283 * @dir: Directory to search in 1284 * @name: qstr of name we wish to find 1285 * 1286 * On hash failure or on lookup failure NULL is returned. 1287 */ 1288 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1289 { 1290 struct dentry *dentry = NULL; 1291 1292 /* 1293 * Check for a fs-specific hash function. Note that we must 1294 * calculate the standard hash first, as the d_op->d_hash() 1295 * routine may choose to leave the hash value unchanged. 1296 */ 1297 name->hash = full_name_hash(name->name, name->len); 1298 if (dir->d_op && dir->d_op->d_hash) { 1299 if (dir->d_op->d_hash(dir, name) < 0) 1300 goto out; 1301 } 1302 dentry = d_lookup(dir, name); 1303 out: 1304 return dentry; 1305 } 1306 1307 /** 1308 * d_validate - verify dentry provided from insecure source 1309 * @dentry: The dentry alleged to be valid child of @dparent 1310 * @dparent: The parent dentry (known to be valid) 1311 * @hash: Hash of the dentry 1312 * @len: Length of the name 1313 * 1314 * An insecure source has sent us a dentry, here we verify it and dget() it. 1315 * This is used by ncpfs in its readdir implementation. 1316 * Zero is returned in the dentry is invalid. 1317 */ 1318 1319 int d_validate(struct dentry *dentry, struct dentry *dparent) 1320 { 1321 struct hlist_head *base; 1322 struct hlist_node *lhp; 1323 1324 /* Check whether the ptr might be valid at all.. */ 1325 if (!kmem_ptr_validate(dentry_cache, dentry)) 1326 goto out; 1327 1328 if (dentry->d_parent != dparent) 1329 goto out; 1330 1331 spin_lock(&dcache_lock); 1332 base = d_hash(dparent, dentry->d_name.hash); 1333 hlist_for_each(lhp,base) { 1334 /* hlist_for_each_entry_rcu() not required for d_hash list 1335 * as it is parsed under dcache_lock 1336 */ 1337 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 1338 __dget_locked(dentry); 1339 spin_unlock(&dcache_lock); 1340 return 1; 1341 } 1342 } 1343 spin_unlock(&dcache_lock); 1344 out: 1345 return 0; 1346 } 1347 1348 /* 1349 * When a file is deleted, we have two options: 1350 * - turn this dentry into a negative dentry 1351 * - unhash this dentry and free it. 1352 * 1353 * Usually, we want to just turn this into 1354 * a negative dentry, but if anybody else is 1355 * currently using the dentry or the inode 1356 * we can't do that and we fall back on removing 1357 * it from the hash queues and waiting for 1358 * it to be deleted later when it has no users 1359 */ 1360 1361 /** 1362 * d_delete - delete a dentry 1363 * @dentry: The dentry to delete 1364 * 1365 * Turn the dentry into a negative dentry if possible, otherwise 1366 * remove it from the hash queues so it can be deleted later 1367 */ 1368 1369 void d_delete(struct dentry * dentry) 1370 { 1371 int isdir = 0; 1372 /* 1373 * Are we the only user? 1374 */ 1375 spin_lock(&dcache_lock); 1376 spin_lock(&dentry->d_lock); 1377 isdir = S_ISDIR(dentry->d_inode->i_mode); 1378 if (atomic_read(&dentry->d_count) == 1) { 1379 dentry_iput(dentry); 1380 fsnotify_nameremove(dentry, isdir); 1381 1382 /* remove this and other inotify debug checks after 2.6.18 */ 1383 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; 1384 return; 1385 } 1386 1387 if (!d_unhashed(dentry)) 1388 __d_drop(dentry); 1389 1390 spin_unlock(&dentry->d_lock); 1391 spin_unlock(&dcache_lock); 1392 1393 fsnotify_nameremove(dentry, isdir); 1394 } 1395 1396 static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1397 { 1398 1399 entry->d_flags &= ~DCACHE_UNHASHED; 1400 hlist_add_head_rcu(&entry->d_hash, list); 1401 } 1402 1403 static void _d_rehash(struct dentry * entry) 1404 { 1405 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 1406 } 1407 1408 /** 1409 * d_rehash - add an entry back to the hash 1410 * @entry: dentry to add to the hash 1411 * 1412 * Adds a dentry to the hash according to its name. 1413 */ 1414 1415 void d_rehash(struct dentry * entry) 1416 { 1417 spin_lock(&dcache_lock); 1418 spin_lock(&entry->d_lock); 1419 _d_rehash(entry); 1420 spin_unlock(&entry->d_lock); 1421 spin_unlock(&dcache_lock); 1422 } 1423 1424 #define do_switch(x,y) do { \ 1425 __typeof__ (x) __tmp = x; \ 1426 x = y; y = __tmp; } while (0) 1427 1428 /* 1429 * When switching names, the actual string doesn't strictly have to 1430 * be preserved in the target - because we're dropping the target 1431 * anyway. As such, we can just do a simple memcpy() to copy over 1432 * the new name before we switch. 1433 * 1434 * Note that we have to be a lot more careful about getting the hash 1435 * switched - we have to switch the hash value properly even if it 1436 * then no longer matches the actual (corrupted) string of the target. 1437 * The hash value has to match the hash queue that the dentry is on.. 1438 */ 1439 static void switch_names(struct dentry *dentry, struct dentry *target) 1440 { 1441 if (dname_external(target)) { 1442 if (dname_external(dentry)) { 1443 /* 1444 * Both external: swap the pointers 1445 */ 1446 do_switch(target->d_name.name, dentry->d_name.name); 1447 } else { 1448 /* 1449 * dentry:internal, target:external. Steal target's 1450 * storage and make target internal. 1451 */ 1452 dentry->d_name.name = target->d_name.name; 1453 target->d_name.name = target->d_iname; 1454 } 1455 } else { 1456 if (dname_external(dentry)) { 1457 /* 1458 * dentry:external, target:internal. Give dentry's 1459 * storage to target and make dentry internal 1460 */ 1461 memcpy(dentry->d_iname, target->d_name.name, 1462 target->d_name.len + 1); 1463 target->d_name.name = dentry->d_name.name; 1464 dentry->d_name.name = dentry->d_iname; 1465 } else { 1466 /* 1467 * Both are internal. Just copy target to dentry 1468 */ 1469 memcpy(dentry->d_iname, target->d_name.name, 1470 target->d_name.len + 1); 1471 } 1472 } 1473 } 1474 1475 /* 1476 * We cannibalize "target" when moving dentry on top of it, 1477 * because it's going to be thrown away anyway. We could be more 1478 * polite about it, though. 1479 * 1480 * This forceful removal will result in ugly /proc output if 1481 * somebody holds a file open that got deleted due to a rename. 1482 * We could be nicer about the deleted file, and let it show 1483 * up under the name it got deleted rather than the name that 1484 * deleted it. 1485 */ 1486 1487 /* 1488 * d_move_locked - move a dentry 1489 * @dentry: entry to move 1490 * @target: new dentry 1491 * 1492 * Update the dcache to reflect the move of a file name. Negative 1493 * dcache entries should not be moved in this way. 1494 */ 1495 static void d_move_locked(struct dentry * dentry, struct dentry * target) 1496 { 1497 struct hlist_head *list; 1498 1499 if (!dentry->d_inode) 1500 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1501 1502 write_seqlock(&rename_lock); 1503 /* 1504 * XXXX: do we really need to take target->d_lock? 1505 */ 1506 if (target < dentry) { 1507 spin_lock(&target->d_lock); 1508 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1509 } else { 1510 spin_lock(&dentry->d_lock); 1511 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); 1512 } 1513 1514 /* Move the dentry to the target hash queue, if on different bucket */ 1515 if (dentry->d_flags & DCACHE_UNHASHED) 1516 goto already_unhashed; 1517 1518 hlist_del_rcu(&dentry->d_hash); 1519 1520 already_unhashed: 1521 list = d_hash(target->d_parent, target->d_name.hash); 1522 __d_rehash(dentry, list); 1523 1524 /* Unhash the target: dput() will then get rid of it */ 1525 __d_drop(target); 1526 1527 list_del(&dentry->d_u.d_child); 1528 list_del(&target->d_u.d_child); 1529 1530 /* Switch the names.. */ 1531 switch_names(dentry, target); 1532 do_switch(dentry->d_name.len, target->d_name.len); 1533 do_switch(dentry->d_name.hash, target->d_name.hash); 1534 1535 /* ... and switch the parents */ 1536 if (IS_ROOT(dentry)) { 1537 dentry->d_parent = target->d_parent; 1538 target->d_parent = target; 1539 INIT_LIST_HEAD(&target->d_u.d_child); 1540 } else { 1541 do_switch(dentry->d_parent, target->d_parent); 1542 1543 /* And add them back to the (new) parent lists */ 1544 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1545 } 1546 1547 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1548 spin_unlock(&target->d_lock); 1549 fsnotify_d_move(dentry); 1550 spin_unlock(&dentry->d_lock); 1551 write_sequnlock(&rename_lock); 1552 } 1553 1554 /** 1555 * d_move - move a dentry 1556 * @dentry: entry to move 1557 * @target: new dentry 1558 * 1559 * Update the dcache to reflect the move of a file name. Negative 1560 * dcache entries should not be moved in this way. 1561 */ 1562 1563 void d_move(struct dentry * dentry, struct dentry * target) 1564 { 1565 spin_lock(&dcache_lock); 1566 d_move_locked(dentry, target); 1567 spin_unlock(&dcache_lock); 1568 } 1569 1570 /* 1571 * Helper that returns 1 if p1 is a parent of p2, else 0 1572 */ 1573 static int d_isparent(struct dentry *p1, struct dentry *p2) 1574 { 1575 struct dentry *p; 1576 1577 for (p = p2; p->d_parent != p; p = p->d_parent) { 1578 if (p->d_parent == p1) 1579 return 1; 1580 } 1581 return 0; 1582 } 1583 1584 /* 1585 * This helper attempts to cope with remotely renamed directories 1586 * 1587 * It assumes that the caller is already holding 1588 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 1589 * 1590 * Note: If ever the locking in lock_rename() changes, then please 1591 * remember to update this too... 1592 * 1593 * On return, dcache_lock will have been unlocked. 1594 */ 1595 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 1596 { 1597 struct mutex *m1 = NULL, *m2 = NULL; 1598 struct dentry *ret; 1599 1600 /* If alias and dentry share a parent, then no extra locks required */ 1601 if (alias->d_parent == dentry->d_parent) 1602 goto out_unalias; 1603 1604 /* Check for loops */ 1605 ret = ERR_PTR(-ELOOP); 1606 if (d_isparent(alias, dentry)) 1607 goto out_err; 1608 1609 /* See lock_rename() */ 1610 ret = ERR_PTR(-EBUSY); 1611 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 1612 goto out_err; 1613 m1 = &dentry->d_sb->s_vfs_rename_mutex; 1614 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 1615 goto out_err; 1616 m2 = &alias->d_parent->d_inode->i_mutex; 1617 out_unalias: 1618 d_move_locked(alias, dentry); 1619 ret = alias; 1620 out_err: 1621 spin_unlock(&dcache_lock); 1622 if (m2) 1623 mutex_unlock(m2); 1624 if (m1) 1625 mutex_unlock(m1); 1626 return ret; 1627 } 1628 1629 /* 1630 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 1631 * named dentry in place of the dentry to be replaced. 1632 */ 1633 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 1634 { 1635 struct dentry *dparent, *aparent; 1636 1637 switch_names(dentry, anon); 1638 do_switch(dentry->d_name.len, anon->d_name.len); 1639 do_switch(dentry->d_name.hash, anon->d_name.hash); 1640 1641 dparent = dentry->d_parent; 1642 aparent = anon->d_parent; 1643 1644 dentry->d_parent = (aparent == anon) ? dentry : aparent; 1645 list_del(&dentry->d_u.d_child); 1646 if (!IS_ROOT(dentry)) 1647 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1648 else 1649 INIT_LIST_HEAD(&dentry->d_u.d_child); 1650 1651 anon->d_parent = (dparent == dentry) ? anon : dparent; 1652 list_del(&anon->d_u.d_child); 1653 if (!IS_ROOT(anon)) 1654 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 1655 else 1656 INIT_LIST_HEAD(&anon->d_u.d_child); 1657 1658 anon->d_flags &= ~DCACHE_DISCONNECTED; 1659 } 1660 1661 /** 1662 * d_materialise_unique - introduce an inode into the tree 1663 * @dentry: candidate dentry 1664 * @inode: inode to bind to the dentry, to which aliases may be attached 1665 * 1666 * Introduces an dentry into the tree, substituting an extant disconnected 1667 * root directory alias in its place if there is one 1668 */ 1669 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 1670 { 1671 struct dentry *actual; 1672 1673 BUG_ON(!d_unhashed(dentry)); 1674 1675 spin_lock(&dcache_lock); 1676 1677 if (!inode) { 1678 actual = dentry; 1679 dentry->d_inode = NULL; 1680 goto found_lock; 1681 } 1682 1683 if (S_ISDIR(inode->i_mode)) { 1684 struct dentry *alias; 1685 1686 /* Does an aliased dentry already exist? */ 1687 alias = __d_find_alias(inode, 0); 1688 if (alias) { 1689 actual = alias; 1690 /* Is this an anonymous mountpoint that we could splice 1691 * into our tree? */ 1692 if (IS_ROOT(alias)) { 1693 spin_lock(&alias->d_lock); 1694 __d_materialise_dentry(dentry, alias); 1695 __d_drop(alias); 1696 goto found; 1697 } 1698 /* Nope, but we must(!) avoid directory aliasing */ 1699 actual = __d_unalias(dentry, alias); 1700 if (IS_ERR(actual)) 1701 dput(alias); 1702 goto out_nolock; 1703 } 1704 } 1705 1706 /* Add a unique reference */ 1707 actual = __d_instantiate_unique(dentry, inode); 1708 if (!actual) 1709 actual = dentry; 1710 else if (unlikely(!d_unhashed(actual))) 1711 goto shouldnt_be_hashed; 1712 1713 found_lock: 1714 spin_lock(&actual->d_lock); 1715 found: 1716 _d_rehash(actual); 1717 spin_unlock(&actual->d_lock); 1718 spin_unlock(&dcache_lock); 1719 out_nolock: 1720 if (actual == dentry) { 1721 security_d_instantiate(dentry, inode); 1722 return NULL; 1723 } 1724 1725 iput(inode); 1726 return actual; 1727 1728 shouldnt_be_hashed: 1729 spin_unlock(&dcache_lock); 1730 BUG(); 1731 goto shouldnt_be_hashed; 1732 } 1733 1734 /** 1735 * d_path - return the path of a dentry 1736 * @dentry: dentry to report 1737 * @vfsmnt: vfsmnt to which the dentry belongs 1738 * @root: root dentry 1739 * @rootmnt: vfsmnt to which the root dentry belongs 1740 * @buffer: buffer to return value in 1741 * @buflen: buffer length 1742 * 1743 * Convert a dentry into an ASCII path name. If the entry has been deleted 1744 * the string " (deleted)" is appended. Note that this is ambiguous. 1745 * 1746 * Returns the buffer or an error code if the path was too long. 1747 * 1748 * "buflen" should be positive. Caller holds the dcache_lock. 1749 */ 1750 static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, 1751 struct dentry *root, struct vfsmount *rootmnt, 1752 char *buffer, int buflen) 1753 { 1754 char * end = buffer+buflen; 1755 char * retval; 1756 int namelen; 1757 1758 *--end = '\0'; 1759 buflen--; 1760 if (!IS_ROOT(dentry) && d_unhashed(dentry)) { 1761 buflen -= 10; 1762 end -= 10; 1763 if (buflen < 0) 1764 goto Elong; 1765 memcpy(end, " (deleted)", 10); 1766 } 1767 1768 if (buflen < 1) 1769 goto Elong; 1770 /* Get '/' right */ 1771 retval = end-1; 1772 *retval = '/'; 1773 1774 for (;;) { 1775 struct dentry * parent; 1776 1777 if (dentry == root && vfsmnt == rootmnt) 1778 break; 1779 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1780 /* Global root? */ 1781 spin_lock(&vfsmount_lock); 1782 if (vfsmnt->mnt_parent == vfsmnt) { 1783 spin_unlock(&vfsmount_lock); 1784 goto global_root; 1785 } 1786 dentry = vfsmnt->mnt_mountpoint; 1787 vfsmnt = vfsmnt->mnt_parent; 1788 spin_unlock(&vfsmount_lock); 1789 continue; 1790 } 1791 parent = dentry->d_parent; 1792 prefetch(parent); 1793 namelen = dentry->d_name.len; 1794 buflen -= namelen + 1; 1795 if (buflen < 0) 1796 goto Elong; 1797 end -= namelen; 1798 memcpy(end, dentry->d_name.name, namelen); 1799 *--end = '/'; 1800 retval = end; 1801 dentry = parent; 1802 } 1803 1804 return retval; 1805 1806 global_root: 1807 namelen = dentry->d_name.len; 1808 buflen -= namelen; 1809 if (buflen < 0) 1810 goto Elong; 1811 retval -= namelen-1; /* hit the slash */ 1812 memcpy(retval, dentry->d_name.name, namelen); 1813 return retval; 1814 Elong: 1815 return ERR_PTR(-ENAMETOOLONG); 1816 } 1817 1818 /* write full pathname into buffer and return start of pathname */ 1819 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1820 char *buf, int buflen) 1821 { 1822 char *res; 1823 struct vfsmount *rootmnt; 1824 struct dentry *root; 1825 1826 read_lock(¤t->fs->lock); 1827 rootmnt = mntget(current->fs->rootmnt); 1828 root = dget(current->fs->root); 1829 read_unlock(¤t->fs->lock); 1830 spin_lock(&dcache_lock); 1831 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); 1832 spin_unlock(&dcache_lock); 1833 dput(root); 1834 mntput(rootmnt); 1835 return res; 1836 } 1837 1838 /* 1839 * NOTE! The user-level library version returns a 1840 * character pointer. The kernel system call just 1841 * returns the length of the buffer filled (which 1842 * includes the ending '\0' character), or a negative 1843 * error value. So libc would do something like 1844 * 1845 * char *getcwd(char * buf, size_t size) 1846 * { 1847 * int retval; 1848 * 1849 * retval = sys_getcwd(buf, size); 1850 * if (retval >= 0) 1851 * return buf; 1852 * errno = -retval; 1853 * return NULL; 1854 * } 1855 */ 1856 asmlinkage long sys_getcwd(char __user *buf, unsigned long size) 1857 { 1858 int error; 1859 struct vfsmount *pwdmnt, *rootmnt; 1860 struct dentry *pwd, *root; 1861 char *page = (char *) __get_free_page(GFP_USER); 1862 1863 if (!page) 1864 return -ENOMEM; 1865 1866 read_lock(¤t->fs->lock); 1867 pwdmnt = mntget(current->fs->pwdmnt); 1868 pwd = dget(current->fs->pwd); 1869 rootmnt = mntget(current->fs->rootmnt); 1870 root = dget(current->fs->root); 1871 read_unlock(¤t->fs->lock); 1872 1873 error = -ENOENT; 1874 /* Has the current directory has been unlinked? */ 1875 spin_lock(&dcache_lock); 1876 if (pwd->d_parent == pwd || !d_unhashed(pwd)) { 1877 unsigned long len; 1878 char * cwd; 1879 1880 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); 1881 spin_unlock(&dcache_lock); 1882 1883 error = PTR_ERR(cwd); 1884 if (IS_ERR(cwd)) 1885 goto out; 1886 1887 error = -ERANGE; 1888 len = PAGE_SIZE + page - cwd; 1889 if (len <= size) { 1890 error = len; 1891 if (copy_to_user(buf, cwd, len)) 1892 error = -EFAULT; 1893 } 1894 } else 1895 spin_unlock(&dcache_lock); 1896 1897 out: 1898 dput(pwd); 1899 mntput(pwdmnt); 1900 dput(root); 1901 mntput(rootmnt); 1902 free_page((unsigned long) page); 1903 return error; 1904 } 1905 1906 /* 1907 * Test whether new_dentry is a subdirectory of old_dentry. 1908 * 1909 * Trivially implemented using the dcache structure 1910 */ 1911 1912 /** 1913 * is_subdir - is new dentry a subdirectory of old_dentry 1914 * @new_dentry: new dentry 1915 * @old_dentry: old dentry 1916 * 1917 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 1918 * Returns 0 otherwise. 1919 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 1920 */ 1921 1922 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 1923 { 1924 int result; 1925 struct dentry * saved = new_dentry; 1926 unsigned long seq; 1927 1928 /* need rcu_readlock to protect against the d_parent trashing due to 1929 * d_move 1930 */ 1931 rcu_read_lock(); 1932 do { 1933 /* for restarting inner loop in case of seq retry */ 1934 new_dentry = saved; 1935 result = 0; 1936 seq = read_seqbegin(&rename_lock); 1937 for (;;) { 1938 if (new_dentry != old_dentry) { 1939 struct dentry * parent = new_dentry->d_parent; 1940 if (parent == new_dentry) 1941 break; 1942 new_dentry = parent; 1943 continue; 1944 } 1945 result = 1; 1946 break; 1947 } 1948 } while (read_seqretry(&rename_lock, seq)); 1949 rcu_read_unlock(); 1950 1951 return result; 1952 } 1953 1954 void d_genocide(struct dentry *root) 1955 { 1956 struct dentry *this_parent = root; 1957 struct list_head *next; 1958 1959 spin_lock(&dcache_lock); 1960 repeat: 1961 next = this_parent->d_subdirs.next; 1962 resume: 1963 while (next != &this_parent->d_subdirs) { 1964 struct list_head *tmp = next; 1965 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1966 next = tmp->next; 1967 if (d_unhashed(dentry)||!dentry->d_inode) 1968 continue; 1969 if (!list_empty(&dentry->d_subdirs)) { 1970 this_parent = dentry; 1971 goto repeat; 1972 } 1973 atomic_dec(&dentry->d_count); 1974 } 1975 if (this_parent != root) { 1976 next = this_parent->d_u.d_child.next; 1977 atomic_dec(&this_parent->d_count); 1978 this_parent = this_parent->d_parent; 1979 goto resume; 1980 } 1981 spin_unlock(&dcache_lock); 1982 } 1983 1984 /** 1985 * find_inode_number - check for dentry with name 1986 * @dir: directory to check 1987 * @name: Name to find. 1988 * 1989 * Check whether a dentry already exists for the given name, 1990 * and return the inode number if it has an inode. Otherwise 1991 * 0 is returned. 1992 * 1993 * This routine is used to post-process directory listings for 1994 * filesystems using synthetic inode numbers, and is necessary 1995 * to keep getcwd() working. 1996 */ 1997 1998 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 1999 { 2000 struct dentry * dentry; 2001 ino_t ino = 0; 2002 2003 dentry = d_hash_and_lookup(dir, name); 2004 if (dentry) { 2005 if (dentry->d_inode) 2006 ino = dentry->d_inode->i_ino; 2007 dput(dentry); 2008 } 2009 return ino; 2010 } 2011 2012 static __initdata unsigned long dhash_entries; 2013 static int __init set_dhash_entries(char *str) 2014 { 2015 if (!str) 2016 return 0; 2017 dhash_entries = simple_strtoul(str, &str, 0); 2018 return 1; 2019 } 2020 __setup("dhash_entries=", set_dhash_entries); 2021 2022 static void __init dcache_init_early(void) 2023 { 2024 int loop; 2025 2026 /* If hashes are distributed across NUMA nodes, defer 2027 * hash allocation until vmalloc space is available. 2028 */ 2029 if (hashdist) 2030 return; 2031 2032 dentry_hashtable = 2033 alloc_large_system_hash("Dentry cache", 2034 sizeof(struct hlist_head), 2035 dhash_entries, 2036 13, 2037 HASH_EARLY, 2038 &d_hash_shift, 2039 &d_hash_mask, 2040 0); 2041 2042 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2043 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2044 } 2045 2046 static void __init dcache_init(unsigned long mempages) 2047 { 2048 int loop; 2049 2050 /* 2051 * A constructor could be added for stable state like the lists, 2052 * but it is probably not worth it because of the cache nature 2053 * of the dcache. 2054 */ 2055 dentry_cache = kmem_cache_create("dentry_cache", 2056 sizeof(struct dentry), 2057 0, 2058 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 2059 SLAB_MEM_SPREAD), 2060 NULL, NULL); 2061 2062 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); 2063 2064 /* Hash may have been set up in dcache_init_early */ 2065 if (!hashdist) 2066 return; 2067 2068 dentry_hashtable = 2069 alloc_large_system_hash("Dentry cache", 2070 sizeof(struct hlist_head), 2071 dhash_entries, 2072 13, 2073 0, 2074 &d_hash_shift, 2075 &d_hash_mask, 2076 0); 2077 2078 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2079 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 2080 } 2081 2082 /* SLAB cache for __getname() consumers */ 2083 struct kmem_cache *names_cachep __read_mostly; 2084 2085 /* SLAB cache for file structures */ 2086 struct kmem_cache *filp_cachep __read_mostly; 2087 2088 EXPORT_SYMBOL(d_genocide); 2089 2090 void __init vfs_caches_init_early(void) 2091 { 2092 dcache_init_early(); 2093 inode_init_early(); 2094 } 2095 2096 void __init vfs_caches_init(unsigned long mempages) 2097 { 2098 unsigned long reserve; 2099 2100 /* Base hash sizes on available memory, with a reserve equal to 2101 150% of current kernel size */ 2102 2103 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 2104 mempages -= reserve; 2105 2106 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 2107 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 2108 2109 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 2110 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 2111 2112 dcache_init(mempages); 2113 inode_init(mempages); 2114 files_init(mempages); 2115 mnt_init(mempages); 2116 bdev_cache_init(); 2117 chrdev_init(); 2118 } 2119 2120 EXPORT_SYMBOL(d_alloc); 2121 EXPORT_SYMBOL(d_alloc_anon); 2122 EXPORT_SYMBOL(d_alloc_root); 2123 EXPORT_SYMBOL(d_delete); 2124 EXPORT_SYMBOL(d_find_alias); 2125 EXPORT_SYMBOL(d_instantiate); 2126 EXPORT_SYMBOL(d_invalidate); 2127 EXPORT_SYMBOL(d_lookup); 2128 EXPORT_SYMBOL(d_move); 2129 EXPORT_SYMBOL_GPL(d_materialise_unique); 2130 EXPORT_SYMBOL(d_path); 2131 EXPORT_SYMBOL(d_prune_aliases); 2132 EXPORT_SYMBOL(d_rehash); 2133 EXPORT_SYMBOL(d_splice_alias); 2134 EXPORT_SYMBOL(d_validate); 2135 EXPORT_SYMBOL(dget_locked); 2136 EXPORT_SYMBOL(dput); 2137 EXPORT_SYMBOL(find_inode_number); 2138 EXPORT_SYMBOL(have_submounts); 2139 EXPORT_SYMBOL(names_cachep); 2140 EXPORT_SYMBOL(shrink_dcache_parent); 2141 EXPORT_SYMBOL(shrink_dcache_sb); 2142