1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/config.h> 18 #include <linux/syscalls.h> 19 #include <linux/string.h> 20 #include <linux/mm.h> 21 #include <linux/fs.h> 22 #include <linux/fsnotify.h> 23 #include <linux/slab.h> 24 #include <linux/init.h> 25 #include <linux/smp_lock.h> 26 #include <linux/hash.h> 27 #include <linux/cache.h> 28 #include <linux/module.h> 29 #include <linux/mount.h> 30 #include <linux/file.h> 31 #include <asm/uaccess.h> 32 #include <linux/security.h> 33 #include <linux/seqlock.h> 34 #include <linux/swap.h> 35 #include <linux/bootmem.h> 36 37 38 int sysctl_vfs_cache_pressure __read_mostly = 100; 39 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 40 41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 42 static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; 43 44 EXPORT_SYMBOL(dcache_lock); 45 46 static kmem_cache_t *dentry_cache __read_mostly; 47 48 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) 49 50 /* 51 * This is the single most critical data structure when it comes 52 * to the dcache: the hashtable for lookups. Somebody should try 53 * to make this good - I've just made it work. 54 * 55 * This hash-function tries to avoid losing too many bits of hash 56 * information, yet avoid using a prime hash-size or similar. 57 */ 58 #define D_HASHBITS d_hash_shift 59 #define D_HASHMASK d_hash_mask 60 61 static unsigned int d_hash_mask __read_mostly; 62 static unsigned int d_hash_shift __read_mostly; 63 static struct hlist_head *dentry_hashtable __read_mostly; 64 static LIST_HEAD(dentry_unused); 65 66 /* Statistics gathering. */ 67 struct dentry_stat_t dentry_stat = { 68 .age_limit = 45, 69 }; 70 71 static void d_callback(struct rcu_head *head) 72 { 73 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); 74 75 if (dname_external(dentry)) 76 kfree(dentry->d_name.name); 77 kmem_cache_free(dentry_cache, dentry); 78 } 79 80 /* 81 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 82 * inside dcache_lock. 83 */ 84 static void d_free(struct dentry *dentry) 85 { 86 if (dentry->d_op && dentry->d_op->d_release) 87 dentry->d_op->d_release(dentry); 88 call_rcu(&dentry->d_u.d_rcu, d_callback); 89 } 90 91 /* 92 * Release the dentry's inode, using the filesystem 93 * d_iput() operation if defined. 94 * Called with dcache_lock and per dentry lock held, drops both. 95 */ 96 static void dentry_iput(struct dentry * dentry) 97 { 98 struct inode *inode = dentry->d_inode; 99 if (inode) { 100 dentry->d_inode = NULL; 101 list_del_init(&dentry->d_alias); 102 spin_unlock(&dentry->d_lock); 103 spin_unlock(&dcache_lock); 104 if (!inode->i_nlink) 105 fsnotify_inoderemove(inode); 106 if (dentry->d_op && dentry->d_op->d_iput) 107 dentry->d_op->d_iput(dentry, inode); 108 else 109 iput(inode); 110 } else { 111 spin_unlock(&dentry->d_lock); 112 spin_unlock(&dcache_lock); 113 } 114 } 115 116 /* 117 * This is dput 118 * 119 * This is complicated by the fact that we do not want to put 120 * dentries that are no longer on any hash chain on the unused 121 * list: we'd much rather just get rid of them immediately. 122 * 123 * However, that implies that we have to traverse the dentry 124 * tree upwards to the parents which might _also_ now be 125 * scheduled for deletion (it may have been only waiting for 126 * its last child to go away). 127 * 128 * This tail recursion is done by hand as we don't want to depend 129 * on the compiler to always get this right (gcc generally doesn't). 130 * Real recursion would eat up our stack space. 131 */ 132 133 /* 134 * dput - release a dentry 135 * @dentry: dentry to release 136 * 137 * Release a dentry. This will drop the usage count and if appropriate 138 * call the dentry unlink method as well as removing it from the queues and 139 * releasing its resources. If the parent dentries were scheduled for release 140 * they too may now get deleted. 141 * 142 * no dcache lock, please. 143 */ 144 145 void dput(struct dentry *dentry) 146 { 147 if (!dentry) 148 return; 149 150 repeat: 151 if (atomic_read(&dentry->d_count) == 1) 152 might_sleep(); 153 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) 154 return; 155 156 spin_lock(&dentry->d_lock); 157 if (atomic_read(&dentry->d_count)) { 158 spin_unlock(&dentry->d_lock); 159 spin_unlock(&dcache_lock); 160 return; 161 } 162 163 /* 164 * AV: ->d_delete() is _NOT_ allowed to block now. 165 */ 166 if (dentry->d_op && dentry->d_op->d_delete) { 167 if (dentry->d_op->d_delete(dentry)) 168 goto unhash_it; 169 } 170 /* Unreachable? Get rid of it */ 171 if (d_unhashed(dentry)) 172 goto kill_it; 173 if (list_empty(&dentry->d_lru)) { 174 dentry->d_flags |= DCACHE_REFERENCED; 175 list_add(&dentry->d_lru, &dentry_unused); 176 dentry_stat.nr_unused++; 177 } 178 spin_unlock(&dentry->d_lock); 179 spin_unlock(&dcache_lock); 180 return; 181 182 unhash_it: 183 __d_drop(dentry); 184 185 kill_it: { 186 struct dentry *parent; 187 188 /* If dentry was on d_lru list 189 * delete it from there 190 */ 191 if (!list_empty(&dentry->d_lru)) { 192 list_del(&dentry->d_lru); 193 dentry_stat.nr_unused--; 194 } 195 list_del(&dentry->d_u.d_child); 196 dentry_stat.nr_dentry--; /* For d_free, below */ 197 /*drops the locks, at that point nobody can reach this dentry */ 198 dentry_iput(dentry); 199 parent = dentry->d_parent; 200 d_free(dentry); 201 if (dentry == parent) 202 return; 203 dentry = parent; 204 goto repeat; 205 } 206 } 207 208 /** 209 * d_invalidate - invalidate a dentry 210 * @dentry: dentry to invalidate 211 * 212 * Try to invalidate the dentry if it turns out to be 213 * possible. If there are other dentries that can be 214 * reached through this one we can't delete it and we 215 * return -EBUSY. On success we return 0. 216 * 217 * no dcache lock. 218 */ 219 220 int d_invalidate(struct dentry * dentry) 221 { 222 /* 223 * If it's already been dropped, return OK. 224 */ 225 spin_lock(&dcache_lock); 226 if (d_unhashed(dentry)) { 227 spin_unlock(&dcache_lock); 228 return 0; 229 } 230 /* 231 * Check whether to do a partial shrink_dcache 232 * to get rid of unused child entries. 233 */ 234 if (!list_empty(&dentry->d_subdirs)) { 235 spin_unlock(&dcache_lock); 236 shrink_dcache_parent(dentry); 237 spin_lock(&dcache_lock); 238 } 239 240 /* 241 * Somebody else still using it? 242 * 243 * If it's a directory, we can't drop it 244 * for fear of somebody re-populating it 245 * with children (even though dropping it 246 * would make it unreachable from the root, 247 * we might still populate it if it was a 248 * working directory or similar). 249 */ 250 spin_lock(&dentry->d_lock); 251 if (atomic_read(&dentry->d_count) > 1) { 252 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 253 spin_unlock(&dentry->d_lock); 254 spin_unlock(&dcache_lock); 255 return -EBUSY; 256 } 257 } 258 259 __d_drop(dentry); 260 spin_unlock(&dentry->d_lock); 261 spin_unlock(&dcache_lock); 262 return 0; 263 } 264 265 /* This should be called _only_ with dcache_lock held */ 266 267 static inline struct dentry * __dget_locked(struct dentry *dentry) 268 { 269 atomic_inc(&dentry->d_count); 270 if (!list_empty(&dentry->d_lru)) { 271 dentry_stat.nr_unused--; 272 list_del_init(&dentry->d_lru); 273 } 274 return dentry; 275 } 276 277 struct dentry * dget_locked(struct dentry *dentry) 278 { 279 return __dget_locked(dentry); 280 } 281 282 /** 283 * d_find_alias - grab a hashed alias of inode 284 * @inode: inode in question 285 * @want_discon: flag, used by d_splice_alias, to request 286 * that only a DISCONNECTED alias be returned. 287 * 288 * If inode has a hashed alias, or is a directory and has any alias, 289 * acquire the reference to alias and return it. Otherwise return NULL. 290 * Notice that if inode is a directory there can be only one alias and 291 * it can be unhashed only if it has no children, or if it is the root 292 * of a filesystem. 293 * 294 * If the inode has a DCACHE_DISCONNECTED alias, then prefer 295 * any other hashed alias over that one unless @want_discon is set, 296 * in which case only return a DCACHE_DISCONNECTED alias. 297 */ 298 299 static struct dentry * __d_find_alias(struct inode *inode, int want_discon) 300 { 301 struct list_head *head, *next, *tmp; 302 struct dentry *alias, *discon_alias=NULL; 303 304 head = &inode->i_dentry; 305 next = inode->i_dentry.next; 306 while (next != head) { 307 tmp = next; 308 next = tmp->next; 309 prefetch(next); 310 alias = list_entry(tmp, struct dentry, d_alias); 311 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 312 if (alias->d_flags & DCACHE_DISCONNECTED) 313 discon_alias = alias; 314 else if (!want_discon) { 315 __dget_locked(alias); 316 return alias; 317 } 318 } 319 } 320 if (discon_alias) 321 __dget_locked(discon_alias); 322 return discon_alias; 323 } 324 325 struct dentry * d_find_alias(struct inode *inode) 326 { 327 struct dentry *de = NULL; 328 329 if (!list_empty(&inode->i_dentry)) { 330 spin_lock(&dcache_lock); 331 de = __d_find_alias(inode, 0); 332 spin_unlock(&dcache_lock); 333 } 334 return de; 335 } 336 337 /* 338 * Try to kill dentries associated with this inode. 339 * WARNING: you must own a reference to inode. 340 */ 341 void d_prune_aliases(struct inode *inode) 342 { 343 struct dentry *dentry; 344 restart: 345 spin_lock(&dcache_lock); 346 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 347 spin_lock(&dentry->d_lock); 348 if (!atomic_read(&dentry->d_count)) { 349 __dget_locked(dentry); 350 __d_drop(dentry); 351 spin_unlock(&dentry->d_lock); 352 spin_unlock(&dcache_lock); 353 dput(dentry); 354 goto restart; 355 } 356 spin_unlock(&dentry->d_lock); 357 } 358 spin_unlock(&dcache_lock); 359 } 360 361 /* 362 * Throw away a dentry - free the inode, dput the parent. This requires that 363 * the LRU list has already been removed. 364 * 365 * Called with dcache_lock, drops it and then regains. 366 * Called with dentry->d_lock held, drops it. 367 */ 368 static void prune_one_dentry(struct dentry * dentry) 369 { 370 struct dentry * parent; 371 372 __d_drop(dentry); 373 list_del(&dentry->d_u.d_child); 374 dentry_stat.nr_dentry--; /* For d_free, below */ 375 dentry_iput(dentry); 376 parent = dentry->d_parent; 377 d_free(dentry); 378 if (parent != dentry) 379 dput(parent); 380 spin_lock(&dcache_lock); 381 } 382 383 /** 384 * prune_dcache - shrink the dcache 385 * @count: number of entries to try and free 386 * @sb: if given, ignore dentries for other superblocks 387 * which are being unmounted. 388 * 389 * Shrink the dcache. This is done when we need 390 * more memory, or simply when we need to unmount 391 * something (at which point we need to unuse 392 * all dentries). 393 * 394 * This function may fail to free any resources if 395 * all the dentries are in use. 396 */ 397 398 static void prune_dcache(int count, struct super_block *sb) 399 { 400 spin_lock(&dcache_lock); 401 for (; count ; count--) { 402 struct dentry *dentry; 403 struct list_head *tmp; 404 struct rw_semaphore *s_umount; 405 406 cond_resched_lock(&dcache_lock); 407 408 tmp = dentry_unused.prev; 409 if (sb) { 410 /* Try to find a dentry for this sb, but don't try 411 * too hard, if they aren't near the tail they will 412 * be moved down again soon 413 */ 414 int skip = count; 415 while (skip && tmp != &dentry_unused && 416 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 417 skip--; 418 tmp = tmp->prev; 419 } 420 } 421 if (tmp == &dentry_unused) 422 break; 423 list_del_init(tmp); 424 prefetch(dentry_unused.prev); 425 dentry_stat.nr_unused--; 426 dentry = list_entry(tmp, struct dentry, d_lru); 427 428 spin_lock(&dentry->d_lock); 429 /* 430 * We found an inuse dentry which was not removed from 431 * dentry_unused because of laziness during lookup. Do not free 432 * it - just keep it off the dentry_unused list. 433 */ 434 if (atomic_read(&dentry->d_count)) { 435 spin_unlock(&dentry->d_lock); 436 continue; 437 } 438 /* If the dentry was recently referenced, don't free it. */ 439 if (dentry->d_flags & DCACHE_REFERENCED) { 440 dentry->d_flags &= ~DCACHE_REFERENCED; 441 list_add(&dentry->d_lru, &dentry_unused); 442 dentry_stat.nr_unused++; 443 spin_unlock(&dentry->d_lock); 444 continue; 445 } 446 /* 447 * If the dentry is not DCACHED_REFERENCED, it is time 448 * to remove it from the dcache, provided the super block is 449 * NULL (which means we are trying to reclaim memory) 450 * or this dentry belongs to the same super block that 451 * we want to shrink. 452 */ 453 /* 454 * If this dentry is for "my" filesystem, then I can prune it 455 * without taking the s_umount lock (I already hold it). 456 */ 457 if (sb && dentry->d_sb == sb) { 458 prune_one_dentry(dentry); 459 continue; 460 } 461 /* 462 * ...otherwise we need to be sure this filesystem isn't being 463 * unmounted, otherwise we could race with 464 * generic_shutdown_super(), and end up holding a reference to 465 * an inode while the filesystem is unmounted. 466 * So we try to get s_umount, and make sure s_root isn't NULL. 467 * (Take a local copy of s_umount to avoid a use-after-free of 468 * `dentry'). 469 */ 470 s_umount = &dentry->d_sb->s_umount; 471 if (down_read_trylock(s_umount)) { 472 if (dentry->d_sb->s_root != NULL) { 473 prune_one_dentry(dentry); 474 up_read(s_umount); 475 continue; 476 } 477 up_read(s_umount); 478 } 479 spin_unlock(&dentry->d_lock); 480 /* Cannot remove the first dentry, and it isn't appropriate 481 * to move it to the head of the list, so give up, and try 482 * later 483 */ 484 break; 485 } 486 spin_unlock(&dcache_lock); 487 } 488 489 /* 490 * Shrink the dcache for the specified super block. 491 * This allows us to unmount a device without disturbing 492 * the dcache for the other devices. 493 * 494 * This implementation makes just two traversals of the 495 * unused list. On the first pass we move the selected 496 * dentries to the most recent end, and on the second 497 * pass we free them. The second pass must restart after 498 * each dput(), but since the target dentries are all at 499 * the end, it's really just a single traversal. 500 */ 501 502 /** 503 * shrink_dcache_sb - shrink dcache for a superblock 504 * @sb: superblock 505 * 506 * Shrink the dcache for the specified super block. This 507 * is used to free the dcache before unmounting a file 508 * system 509 */ 510 511 void shrink_dcache_sb(struct super_block * sb) 512 { 513 struct list_head *tmp, *next; 514 struct dentry *dentry; 515 516 /* 517 * Pass one ... move the dentries for the specified 518 * superblock to the most recent end of the unused list. 519 */ 520 spin_lock(&dcache_lock); 521 list_for_each_safe(tmp, next, &dentry_unused) { 522 dentry = list_entry(tmp, struct dentry, d_lru); 523 if (dentry->d_sb != sb) 524 continue; 525 list_move(tmp, &dentry_unused); 526 } 527 528 /* 529 * Pass two ... free the dentries for this superblock. 530 */ 531 repeat: 532 list_for_each_safe(tmp, next, &dentry_unused) { 533 dentry = list_entry(tmp, struct dentry, d_lru); 534 if (dentry->d_sb != sb) 535 continue; 536 dentry_stat.nr_unused--; 537 list_del_init(tmp); 538 spin_lock(&dentry->d_lock); 539 if (atomic_read(&dentry->d_count)) { 540 spin_unlock(&dentry->d_lock); 541 continue; 542 } 543 prune_one_dentry(dentry); 544 cond_resched_lock(&dcache_lock); 545 goto repeat; 546 } 547 spin_unlock(&dcache_lock); 548 } 549 550 /* 551 * Search for at least 1 mount point in the dentry's subdirs. 552 * We descend to the next level whenever the d_subdirs 553 * list is non-empty and continue searching. 554 */ 555 556 /** 557 * have_submounts - check for mounts over a dentry 558 * @parent: dentry to check. 559 * 560 * Return true if the parent or its subdirectories contain 561 * a mount point 562 */ 563 564 int have_submounts(struct dentry *parent) 565 { 566 struct dentry *this_parent = parent; 567 struct list_head *next; 568 569 spin_lock(&dcache_lock); 570 if (d_mountpoint(parent)) 571 goto positive; 572 repeat: 573 next = this_parent->d_subdirs.next; 574 resume: 575 while (next != &this_parent->d_subdirs) { 576 struct list_head *tmp = next; 577 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 578 next = tmp->next; 579 /* Have we found a mount point ? */ 580 if (d_mountpoint(dentry)) 581 goto positive; 582 if (!list_empty(&dentry->d_subdirs)) { 583 this_parent = dentry; 584 goto repeat; 585 } 586 } 587 /* 588 * All done at this level ... ascend and resume the search. 589 */ 590 if (this_parent != parent) { 591 next = this_parent->d_u.d_child.next; 592 this_parent = this_parent->d_parent; 593 goto resume; 594 } 595 spin_unlock(&dcache_lock); 596 return 0; /* No mount points found in tree */ 597 positive: 598 spin_unlock(&dcache_lock); 599 return 1; 600 } 601 602 /* 603 * Search the dentry child list for the specified parent, 604 * and move any unused dentries to the end of the unused 605 * list for prune_dcache(). We descend to the next level 606 * whenever the d_subdirs list is non-empty and continue 607 * searching. 608 * 609 * It returns zero iff there are no unused children, 610 * otherwise it returns the number of children moved to 611 * the end of the unused list. This may not be the total 612 * number of unused children, because select_parent can 613 * drop the lock and return early due to latency 614 * constraints. 615 */ 616 static int select_parent(struct dentry * parent) 617 { 618 struct dentry *this_parent = parent; 619 struct list_head *next; 620 int found = 0; 621 622 spin_lock(&dcache_lock); 623 repeat: 624 next = this_parent->d_subdirs.next; 625 resume: 626 while (next != &this_parent->d_subdirs) { 627 struct list_head *tmp = next; 628 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 629 next = tmp->next; 630 631 if (!list_empty(&dentry->d_lru)) { 632 dentry_stat.nr_unused--; 633 list_del_init(&dentry->d_lru); 634 } 635 /* 636 * move only zero ref count dentries to the end 637 * of the unused list for prune_dcache 638 */ 639 if (!atomic_read(&dentry->d_count)) { 640 list_add_tail(&dentry->d_lru, &dentry_unused); 641 dentry_stat.nr_unused++; 642 found++; 643 } 644 645 /* 646 * We can return to the caller if we have found some (this 647 * ensures forward progress). We'll be coming back to find 648 * the rest. 649 */ 650 if (found && need_resched()) 651 goto out; 652 653 /* 654 * Descend a level if the d_subdirs list is non-empty. 655 */ 656 if (!list_empty(&dentry->d_subdirs)) { 657 this_parent = dentry; 658 goto repeat; 659 } 660 } 661 /* 662 * All done at this level ... ascend and resume the search. 663 */ 664 if (this_parent != parent) { 665 next = this_parent->d_u.d_child.next; 666 this_parent = this_parent->d_parent; 667 goto resume; 668 } 669 out: 670 spin_unlock(&dcache_lock); 671 return found; 672 } 673 674 /** 675 * shrink_dcache_parent - prune dcache 676 * @parent: parent of entries to prune 677 * 678 * Prune the dcache to remove unused children of the parent dentry. 679 */ 680 681 void shrink_dcache_parent(struct dentry * parent) 682 { 683 int found; 684 685 while ((found = select_parent(parent)) != 0) 686 prune_dcache(found, parent->d_sb); 687 } 688 689 /* 690 * Scan `nr' dentries and return the number which remain. 691 * 692 * We need to avoid reentering the filesystem if the caller is performing a 693 * GFP_NOFS allocation attempt. One example deadlock is: 694 * 695 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 696 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 697 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 698 * 699 * In this case we return -1 to tell the caller that we baled. 700 */ 701 static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 702 { 703 if (nr) { 704 if (!(gfp_mask & __GFP_FS)) 705 return -1; 706 prune_dcache(nr, NULL); 707 } 708 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 709 } 710 711 /** 712 * d_alloc - allocate a dcache entry 713 * @parent: parent of entry to allocate 714 * @name: qstr of the name 715 * 716 * Allocates a dentry. It returns %NULL if there is insufficient memory 717 * available. On a success the dentry is returned. The name passed in is 718 * copied and the copy passed in may be reused after this call. 719 */ 720 721 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 722 { 723 struct dentry *dentry; 724 char *dname; 725 726 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 727 if (!dentry) 728 return NULL; 729 730 if (name->len > DNAME_INLINE_LEN-1) { 731 dname = kmalloc(name->len + 1, GFP_KERNEL); 732 if (!dname) { 733 kmem_cache_free(dentry_cache, dentry); 734 return NULL; 735 } 736 } else { 737 dname = dentry->d_iname; 738 } 739 dentry->d_name.name = dname; 740 741 dentry->d_name.len = name->len; 742 dentry->d_name.hash = name->hash; 743 memcpy(dname, name->name, name->len); 744 dname[name->len] = 0; 745 746 atomic_set(&dentry->d_count, 1); 747 dentry->d_flags = DCACHE_UNHASHED; 748 spin_lock_init(&dentry->d_lock); 749 dentry->d_inode = NULL; 750 dentry->d_parent = NULL; 751 dentry->d_sb = NULL; 752 dentry->d_op = NULL; 753 dentry->d_fsdata = NULL; 754 dentry->d_mounted = 0; 755 #ifdef CONFIG_PROFILING 756 dentry->d_cookie = NULL; 757 #endif 758 INIT_HLIST_NODE(&dentry->d_hash); 759 INIT_LIST_HEAD(&dentry->d_lru); 760 INIT_LIST_HEAD(&dentry->d_subdirs); 761 INIT_LIST_HEAD(&dentry->d_alias); 762 763 if (parent) { 764 dentry->d_parent = dget(parent); 765 dentry->d_sb = parent->d_sb; 766 } else { 767 INIT_LIST_HEAD(&dentry->d_u.d_child); 768 } 769 770 spin_lock(&dcache_lock); 771 if (parent) 772 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 773 dentry_stat.nr_dentry++; 774 spin_unlock(&dcache_lock); 775 776 return dentry; 777 } 778 779 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 780 { 781 struct qstr q; 782 783 q.name = name; 784 q.len = strlen(name); 785 q.hash = full_name_hash(q.name, q.len); 786 return d_alloc(parent, &q); 787 } 788 789 /** 790 * d_instantiate - fill in inode information for a dentry 791 * @entry: dentry to complete 792 * @inode: inode to attach to this dentry 793 * 794 * Fill in inode information in the entry. 795 * 796 * This turns negative dentries into productive full members 797 * of society. 798 * 799 * NOTE! This assumes that the inode count has been incremented 800 * (or otherwise set) by the caller to indicate that it is now 801 * in use by the dcache. 802 */ 803 804 void d_instantiate(struct dentry *entry, struct inode * inode) 805 { 806 BUG_ON(!list_empty(&entry->d_alias)); 807 spin_lock(&dcache_lock); 808 if (inode) 809 list_add(&entry->d_alias, &inode->i_dentry); 810 entry->d_inode = inode; 811 fsnotify_d_instantiate(entry, inode); 812 spin_unlock(&dcache_lock); 813 security_d_instantiate(entry, inode); 814 } 815 816 /** 817 * d_instantiate_unique - instantiate a non-aliased dentry 818 * @entry: dentry to instantiate 819 * @inode: inode to attach to this dentry 820 * 821 * Fill in inode information in the entry. On success, it returns NULL. 822 * If an unhashed alias of "entry" already exists, then we return the 823 * aliased dentry instead and drop one reference to inode. 824 * 825 * Note that in order to avoid conflicts with rename() etc, the caller 826 * had better be holding the parent directory semaphore. 827 * 828 * This also assumes that the inode count has been incremented 829 * (or otherwise set) by the caller to indicate that it is now 830 * in use by the dcache. 831 */ 832 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 833 { 834 struct dentry *alias; 835 int len = entry->d_name.len; 836 const char *name = entry->d_name.name; 837 unsigned int hash = entry->d_name.hash; 838 839 BUG_ON(!list_empty(&entry->d_alias)); 840 spin_lock(&dcache_lock); 841 if (!inode) 842 goto do_negative; 843 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 844 struct qstr *qstr = &alias->d_name; 845 846 if (qstr->hash != hash) 847 continue; 848 if (alias->d_parent != entry->d_parent) 849 continue; 850 if (qstr->len != len) 851 continue; 852 if (memcmp(qstr->name, name, len)) 853 continue; 854 dget_locked(alias); 855 spin_unlock(&dcache_lock); 856 BUG_ON(!d_unhashed(alias)); 857 iput(inode); 858 return alias; 859 } 860 list_add(&entry->d_alias, &inode->i_dentry); 861 do_negative: 862 entry->d_inode = inode; 863 fsnotify_d_instantiate(entry, inode); 864 spin_unlock(&dcache_lock); 865 security_d_instantiate(entry, inode); 866 return NULL; 867 } 868 EXPORT_SYMBOL(d_instantiate_unique); 869 870 /** 871 * d_alloc_root - allocate root dentry 872 * @root_inode: inode to allocate the root for 873 * 874 * Allocate a root ("/") dentry for the inode given. The inode is 875 * instantiated and returned. %NULL is returned if there is insufficient 876 * memory or the inode passed is %NULL. 877 */ 878 879 struct dentry * d_alloc_root(struct inode * root_inode) 880 { 881 struct dentry *res = NULL; 882 883 if (root_inode) { 884 static const struct qstr name = { .name = "/", .len = 1 }; 885 886 res = d_alloc(NULL, &name); 887 if (res) { 888 res->d_sb = root_inode->i_sb; 889 res->d_parent = res; 890 d_instantiate(res, root_inode); 891 } 892 } 893 return res; 894 } 895 896 static inline struct hlist_head *d_hash(struct dentry *parent, 897 unsigned long hash) 898 { 899 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 900 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 901 return dentry_hashtable + (hash & D_HASHMASK); 902 } 903 904 /** 905 * d_alloc_anon - allocate an anonymous dentry 906 * @inode: inode to allocate the dentry for 907 * 908 * This is similar to d_alloc_root. It is used by filesystems when 909 * creating a dentry for a given inode, often in the process of 910 * mapping a filehandle to a dentry. The returned dentry may be 911 * anonymous, or may have a full name (if the inode was already 912 * in the cache). The file system may need to make further 913 * efforts to connect this dentry into the dcache properly. 914 * 915 * When called on a directory inode, we must ensure that 916 * the inode only ever has one dentry. If a dentry is 917 * found, that is returned instead of allocating a new one. 918 * 919 * On successful return, the reference to the inode has been transferred 920 * to the dentry. If %NULL is returned (indicating kmalloc failure), 921 * the reference on the inode has not been released. 922 */ 923 924 struct dentry * d_alloc_anon(struct inode *inode) 925 { 926 static const struct qstr anonstring = { .name = "" }; 927 struct dentry *tmp; 928 struct dentry *res; 929 930 if ((res = d_find_alias(inode))) { 931 iput(inode); 932 return res; 933 } 934 935 tmp = d_alloc(NULL, &anonstring); 936 if (!tmp) 937 return NULL; 938 939 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 940 941 spin_lock(&dcache_lock); 942 res = __d_find_alias(inode, 0); 943 if (!res) { 944 /* attach a disconnected dentry */ 945 res = tmp; 946 tmp = NULL; 947 spin_lock(&res->d_lock); 948 res->d_sb = inode->i_sb; 949 res->d_parent = res; 950 res->d_inode = inode; 951 res->d_flags |= DCACHE_DISCONNECTED; 952 res->d_flags &= ~DCACHE_UNHASHED; 953 list_add(&res->d_alias, &inode->i_dentry); 954 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); 955 spin_unlock(&res->d_lock); 956 957 inode = NULL; /* don't drop reference */ 958 } 959 spin_unlock(&dcache_lock); 960 961 if (inode) 962 iput(inode); 963 if (tmp) 964 dput(tmp); 965 return res; 966 } 967 968 969 /** 970 * d_splice_alias - splice a disconnected dentry into the tree if one exists 971 * @inode: the inode which may have a disconnected dentry 972 * @dentry: a negative dentry which we want to point to the inode. 973 * 974 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 975 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 976 * and return it, else simply d_add the inode to the dentry and return NULL. 977 * 978 * This is needed in the lookup routine of any filesystem that is exportable 979 * (via knfsd) so that we can build dcache paths to directories effectively. 980 * 981 * If a dentry was found and moved, then it is returned. Otherwise NULL 982 * is returned. This matches the expected return value of ->lookup. 983 * 984 */ 985 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 986 { 987 struct dentry *new = NULL; 988 989 if (inode) { 990 spin_lock(&dcache_lock); 991 new = __d_find_alias(inode, 1); 992 if (new) { 993 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 994 fsnotify_d_instantiate(new, inode); 995 spin_unlock(&dcache_lock); 996 security_d_instantiate(new, inode); 997 d_rehash(dentry); 998 d_move(new, dentry); 999 iput(inode); 1000 } else { 1001 /* d_instantiate takes dcache_lock, so we do it by hand */ 1002 list_add(&dentry->d_alias, &inode->i_dentry); 1003 dentry->d_inode = inode; 1004 fsnotify_d_instantiate(dentry, inode); 1005 spin_unlock(&dcache_lock); 1006 security_d_instantiate(dentry, inode); 1007 d_rehash(dentry); 1008 } 1009 } else 1010 d_add(dentry, inode); 1011 return new; 1012 } 1013 1014 1015 /** 1016 * d_lookup - search for a dentry 1017 * @parent: parent dentry 1018 * @name: qstr of name we wish to find 1019 * 1020 * Searches the children of the parent dentry for the name in question. If 1021 * the dentry is found its reference count is incremented and the dentry 1022 * is returned. The caller must use d_put to free the entry when it has 1023 * finished using it. %NULL is returned on failure. 1024 * 1025 * __d_lookup is dcache_lock free. The hash list is protected using RCU. 1026 * Memory barriers are used while updating and doing lockless traversal. 1027 * To avoid races with d_move while rename is happening, d_lock is used. 1028 * 1029 * Overflows in memcmp(), while d_move, are avoided by keeping the length 1030 * and name pointer in one structure pointed by d_qstr. 1031 * 1032 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1033 * lookup is going on. 1034 * 1035 * dentry_unused list is not updated even if lookup finds the required dentry 1036 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1037 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1038 * acquisition. 1039 * 1040 * d_lookup() is protected against the concurrent renames in some unrelated 1041 * directory using the seqlockt_t rename_lock. 1042 */ 1043 1044 struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1045 { 1046 struct dentry * dentry = NULL; 1047 unsigned long seq; 1048 1049 do { 1050 seq = read_seqbegin(&rename_lock); 1051 dentry = __d_lookup(parent, name); 1052 if (dentry) 1053 break; 1054 } while (read_seqretry(&rename_lock, seq)); 1055 return dentry; 1056 } 1057 1058 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1059 { 1060 unsigned int len = name->len; 1061 unsigned int hash = name->hash; 1062 const unsigned char *str = name->name; 1063 struct hlist_head *head = d_hash(parent,hash); 1064 struct dentry *found = NULL; 1065 struct hlist_node *node; 1066 struct dentry *dentry; 1067 1068 rcu_read_lock(); 1069 1070 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1071 struct qstr *qstr; 1072 1073 if (dentry->d_name.hash != hash) 1074 continue; 1075 if (dentry->d_parent != parent) 1076 continue; 1077 1078 spin_lock(&dentry->d_lock); 1079 1080 /* 1081 * Recheck the dentry after taking the lock - d_move may have 1082 * changed things. Don't bother checking the hash because we're 1083 * about to compare the whole name anyway. 1084 */ 1085 if (dentry->d_parent != parent) 1086 goto next; 1087 1088 /* 1089 * It is safe to compare names since d_move() cannot 1090 * change the qstr (protected by d_lock). 1091 */ 1092 qstr = &dentry->d_name; 1093 if (parent->d_op && parent->d_op->d_compare) { 1094 if (parent->d_op->d_compare(parent, qstr, name)) 1095 goto next; 1096 } else { 1097 if (qstr->len != len) 1098 goto next; 1099 if (memcmp(qstr->name, str, len)) 1100 goto next; 1101 } 1102 1103 if (!d_unhashed(dentry)) { 1104 atomic_inc(&dentry->d_count); 1105 found = dentry; 1106 } 1107 spin_unlock(&dentry->d_lock); 1108 break; 1109 next: 1110 spin_unlock(&dentry->d_lock); 1111 } 1112 rcu_read_unlock(); 1113 1114 return found; 1115 } 1116 1117 /** 1118 * d_hash_and_lookup - hash the qstr then search for a dentry 1119 * @dir: Directory to search in 1120 * @name: qstr of name we wish to find 1121 * 1122 * On hash failure or on lookup failure NULL is returned. 1123 */ 1124 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1125 { 1126 struct dentry *dentry = NULL; 1127 1128 /* 1129 * Check for a fs-specific hash function. Note that we must 1130 * calculate the standard hash first, as the d_op->d_hash() 1131 * routine may choose to leave the hash value unchanged. 1132 */ 1133 name->hash = full_name_hash(name->name, name->len); 1134 if (dir->d_op && dir->d_op->d_hash) { 1135 if (dir->d_op->d_hash(dir, name) < 0) 1136 goto out; 1137 } 1138 dentry = d_lookup(dir, name); 1139 out: 1140 return dentry; 1141 } 1142 1143 /** 1144 * d_validate - verify dentry provided from insecure source 1145 * @dentry: The dentry alleged to be valid child of @dparent 1146 * @dparent: The parent dentry (known to be valid) 1147 * @hash: Hash of the dentry 1148 * @len: Length of the name 1149 * 1150 * An insecure source has sent us a dentry, here we verify it and dget() it. 1151 * This is used by ncpfs in its readdir implementation. 1152 * Zero is returned in the dentry is invalid. 1153 */ 1154 1155 int d_validate(struct dentry *dentry, struct dentry *dparent) 1156 { 1157 struct hlist_head *base; 1158 struct hlist_node *lhp; 1159 1160 /* Check whether the ptr might be valid at all.. */ 1161 if (!kmem_ptr_validate(dentry_cache, dentry)) 1162 goto out; 1163 1164 if (dentry->d_parent != dparent) 1165 goto out; 1166 1167 spin_lock(&dcache_lock); 1168 base = d_hash(dparent, dentry->d_name.hash); 1169 hlist_for_each(lhp,base) { 1170 /* hlist_for_each_entry_rcu() not required for d_hash list 1171 * as it is parsed under dcache_lock 1172 */ 1173 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { 1174 __dget_locked(dentry); 1175 spin_unlock(&dcache_lock); 1176 return 1; 1177 } 1178 } 1179 spin_unlock(&dcache_lock); 1180 out: 1181 return 0; 1182 } 1183 1184 /* 1185 * When a file is deleted, we have two options: 1186 * - turn this dentry into a negative dentry 1187 * - unhash this dentry and free it. 1188 * 1189 * Usually, we want to just turn this into 1190 * a negative dentry, but if anybody else is 1191 * currently using the dentry or the inode 1192 * we can't do that and we fall back on removing 1193 * it from the hash queues and waiting for 1194 * it to be deleted later when it has no users 1195 */ 1196 1197 /** 1198 * d_delete - delete a dentry 1199 * @dentry: The dentry to delete 1200 * 1201 * Turn the dentry into a negative dentry if possible, otherwise 1202 * remove it from the hash queues so it can be deleted later 1203 */ 1204 1205 void d_delete(struct dentry * dentry) 1206 { 1207 int isdir = 0; 1208 /* 1209 * Are we the only user? 1210 */ 1211 spin_lock(&dcache_lock); 1212 spin_lock(&dentry->d_lock); 1213 isdir = S_ISDIR(dentry->d_inode->i_mode); 1214 if (atomic_read(&dentry->d_count) == 1) { 1215 dentry_iput(dentry); 1216 fsnotify_nameremove(dentry, isdir); 1217 1218 /* remove this and other inotify debug checks after 2.6.18 */ 1219 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; 1220 return; 1221 } 1222 1223 if (!d_unhashed(dentry)) 1224 __d_drop(dentry); 1225 1226 spin_unlock(&dentry->d_lock); 1227 spin_unlock(&dcache_lock); 1228 1229 fsnotify_nameremove(dentry, isdir); 1230 } 1231 1232 static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1233 { 1234 1235 entry->d_flags &= ~DCACHE_UNHASHED; 1236 hlist_add_head_rcu(&entry->d_hash, list); 1237 } 1238 1239 /** 1240 * d_rehash - add an entry back to the hash 1241 * @entry: dentry to add to the hash 1242 * 1243 * Adds a dentry to the hash according to its name. 1244 */ 1245 1246 void d_rehash(struct dentry * entry) 1247 { 1248 struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); 1249 1250 spin_lock(&dcache_lock); 1251 spin_lock(&entry->d_lock); 1252 __d_rehash(entry, list); 1253 spin_unlock(&entry->d_lock); 1254 spin_unlock(&dcache_lock); 1255 } 1256 1257 #define do_switch(x,y) do { \ 1258 __typeof__ (x) __tmp = x; \ 1259 x = y; y = __tmp; } while (0) 1260 1261 /* 1262 * When switching names, the actual string doesn't strictly have to 1263 * be preserved in the target - because we're dropping the target 1264 * anyway. As such, we can just do a simple memcpy() to copy over 1265 * the new name before we switch. 1266 * 1267 * Note that we have to be a lot more careful about getting the hash 1268 * switched - we have to switch the hash value properly even if it 1269 * then no longer matches the actual (corrupted) string of the target. 1270 * The hash value has to match the hash queue that the dentry is on.. 1271 */ 1272 static void switch_names(struct dentry *dentry, struct dentry *target) 1273 { 1274 if (dname_external(target)) { 1275 if (dname_external(dentry)) { 1276 /* 1277 * Both external: swap the pointers 1278 */ 1279 do_switch(target->d_name.name, dentry->d_name.name); 1280 } else { 1281 /* 1282 * dentry:internal, target:external. Steal target's 1283 * storage and make target internal. 1284 */ 1285 dentry->d_name.name = target->d_name.name; 1286 target->d_name.name = target->d_iname; 1287 } 1288 } else { 1289 if (dname_external(dentry)) { 1290 /* 1291 * dentry:external, target:internal. Give dentry's 1292 * storage to target and make dentry internal 1293 */ 1294 memcpy(dentry->d_iname, target->d_name.name, 1295 target->d_name.len + 1); 1296 target->d_name.name = dentry->d_name.name; 1297 dentry->d_name.name = dentry->d_iname; 1298 } else { 1299 /* 1300 * Both are internal. Just copy target to dentry 1301 */ 1302 memcpy(dentry->d_iname, target->d_name.name, 1303 target->d_name.len + 1); 1304 } 1305 } 1306 } 1307 1308 /* 1309 * We cannibalize "target" when moving dentry on top of it, 1310 * because it's going to be thrown away anyway. We could be more 1311 * polite about it, though. 1312 * 1313 * This forceful removal will result in ugly /proc output if 1314 * somebody holds a file open that got deleted due to a rename. 1315 * We could be nicer about the deleted file, and let it show 1316 * up under the name it got deleted rather than the name that 1317 * deleted it. 1318 */ 1319 1320 /** 1321 * d_move - move a dentry 1322 * @dentry: entry to move 1323 * @target: new dentry 1324 * 1325 * Update the dcache to reflect the move of a file name. Negative 1326 * dcache entries should not be moved in this way. 1327 */ 1328 1329 void d_move(struct dentry * dentry, struct dentry * target) 1330 { 1331 struct hlist_head *list; 1332 1333 if (!dentry->d_inode) 1334 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 1335 1336 spin_lock(&dcache_lock); 1337 write_seqlock(&rename_lock); 1338 /* 1339 * XXXX: do we really need to take target->d_lock? 1340 */ 1341 if (target < dentry) { 1342 spin_lock(&target->d_lock); 1343 spin_lock(&dentry->d_lock); 1344 } else { 1345 spin_lock(&dentry->d_lock); 1346 spin_lock(&target->d_lock); 1347 } 1348 1349 /* Move the dentry to the target hash queue, if on different bucket */ 1350 if (dentry->d_flags & DCACHE_UNHASHED) 1351 goto already_unhashed; 1352 1353 hlist_del_rcu(&dentry->d_hash); 1354 1355 already_unhashed: 1356 list = d_hash(target->d_parent, target->d_name.hash); 1357 __d_rehash(dentry, list); 1358 1359 /* Unhash the target: dput() will then get rid of it */ 1360 __d_drop(target); 1361 1362 list_del(&dentry->d_u.d_child); 1363 list_del(&target->d_u.d_child); 1364 1365 /* Switch the names.. */ 1366 switch_names(dentry, target); 1367 do_switch(dentry->d_name.len, target->d_name.len); 1368 do_switch(dentry->d_name.hash, target->d_name.hash); 1369 1370 /* ... and switch the parents */ 1371 if (IS_ROOT(dentry)) { 1372 dentry->d_parent = target->d_parent; 1373 target->d_parent = target; 1374 INIT_LIST_HEAD(&target->d_u.d_child); 1375 } else { 1376 do_switch(dentry->d_parent, target->d_parent); 1377 1378 /* And add them back to the (new) parent lists */ 1379 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 1380 } 1381 1382 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1383 spin_unlock(&target->d_lock); 1384 fsnotify_d_move(dentry); 1385 spin_unlock(&dentry->d_lock); 1386 write_sequnlock(&rename_lock); 1387 spin_unlock(&dcache_lock); 1388 } 1389 1390 /** 1391 * d_path - return the path of a dentry 1392 * @dentry: dentry to report 1393 * @vfsmnt: vfsmnt to which the dentry belongs 1394 * @root: root dentry 1395 * @rootmnt: vfsmnt to which the root dentry belongs 1396 * @buffer: buffer to return value in 1397 * @buflen: buffer length 1398 * 1399 * Convert a dentry into an ASCII path name. If the entry has been deleted 1400 * the string " (deleted)" is appended. Note that this is ambiguous. 1401 * 1402 * Returns the buffer or an error code if the path was too long. 1403 * 1404 * "buflen" should be positive. Caller holds the dcache_lock. 1405 */ 1406 static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, 1407 struct dentry *root, struct vfsmount *rootmnt, 1408 char *buffer, int buflen) 1409 { 1410 char * end = buffer+buflen; 1411 char * retval; 1412 int namelen; 1413 1414 *--end = '\0'; 1415 buflen--; 1416 if (!IS_ROOT(dentry) && d_unhashed(dentry)) { 1417 buflen -= 10; 1418 end -= 10; 1419 if (buflen < 0) 1420 goto Elong; 1421 memcpy(end, " (deleted)", 10); 1422 } 1423 1424 if (buflen < 1) 1425 goto Elong; 1426 /* Get '/' right */ 1427 retval = end-1; 1428 *retval = '/'; 1429 1430 for (;;) { 1431 struct dentry * parent; 1432 1433 if (dentry == root && vfsmnt == rootmnt) 1434 break; 1435 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1436 /* Global root? */ 1437 spin_lock(&vfsmount_lock); 1438 if (vfsmnt->mnt_parent == vfsmnt) { 1439 spin_unlock(&vfsmount_lock); 1440 goto global_root; 1441 } 1442 dentry = vfsmnt->mnt_mountpoint; 1443 vfsmnt = vfsmnt->mnt_parent; 1444 spin_unlock(&vfsmount_lock); 1445 continue; 1446 } 1447 parent = dentry->d_parent; 1448 prefetch(parent); 1449 namelen = dentry->d_name.len; 1450 buflen -= namelen + 1; 1451 if (buflen < 0) 1452 goto Elong; 1453 end -= namelen; 1454 memcpy(end, dentry->d_name.name, namelen); 1455 *--end = '/'; 1456 retval = end; 1457 dentry = parent; 1458 } 1459 1460 return retval; 1461 1462 global_root: 1463 namelen = dentry->d_name.len; 1464 buflen -= namelen; 1465 if (buflen < 0) 1466 goto Elong; 1467 retval -= namelen-1; /* hit the slash */ 1468 memcpy(retval, dentry->d_name.name, namelen); 1469 return retval; 1470 Elong: 1471 return ERR_PTR(-ENAMETOOLONG); 1472 } 1473 1474 /* write full pathname into buffer and return start of pathname */ 1475 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, 1476 char *buf, int buflen) 1477 { 1478 char *res; 1479 struct vfsmount *rootmnt; 1480 struct dentry *root; 1481 1482 read_lock(¤t->fs->lock); 1483 rootmnt = mntget(current->fs->rootmnt); 1484 root = dget(current->fs->root); 1485 read_unlock(¤t->fs->lock); 1486 spin_lock(&dcache_lock); 1487 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); 1488 spin_unlock(&dcache_lock); 1489 dput(root); 1490 mntput(rootmnt); 1491 return res; 1492 } 1493 1494 /* 1495 * NOTE! The user-level library version returns a 1496 * character pointer. The kernel system call just 1497 * returns the length of the buffer filled (which 1498 * includes the ending '\0' character), or a negative 1499 * error value. So libc would do something like 1500 * 1501 * char *getcwd(char * buf, size_t size) 1502 * { 1503 * int retval; 1504 * 1505 * retval = sys_getcwd(buf, size); 1506 * if (retval >= 0) 1507 * return buf; 1508 * errno = -retval; 1509 * return NULL; 1510 * } 1511 */ 1512 asmlinkage long sys_getcwd(char __user *buf, unsigned long size) 1513 { 1514 int error; 1515 struct vfsmount *pwdmnt, *rootmnt; 1516 struct dentry *pwd, *root; 1517 char *page = (char *) __get_free_page(GFP_USER); 1518 1519 if (!page) 1520 return -ENOMEM; 1521 1522 read_lock(¤t->fs->lock); 1523 pwdmnt = mntget(current->fs->pwdmnt); 1524 pwd = dget(current->fs->pwd); 1525 rootmnt = mntget(current->fs->rootmnt); 1526 root = dget(current->fs->root); 1527 read_unlock(¤t->fs->lock); 1528 1529 error = -ENOENT; 1530 /* Has the current directory has been unlinked? */ 1531 spin_lock(&dcache_lock); 1532 if (pwd->d_parent == pwd || !d_unhashed(pwd)) { 1533 unsigned long len; 1534 char * cwd; 1535 1536 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); 1537 spin_unlock(&dcache_lock); 1538 1539 error = PTR_ERR(cwd); 1540 if (IS_ERR(cwd)) 1541 goto out; 1542 1543 error = -ERANGE; 1544 len = PAGE_SIZE + page - cwd; 1545 if (len <= size) { 1546 error = len; 1547 if (copy_to_user(buf, cwd, len)) 1548 error = -EFAULT; 1549 } 1550 } else 1551 spin_unlock(&dcache_lock); 1552 1553 out: 1554 dput(pwd); 1555 mntput(pwdmnt); 1556 dput(root); 1557 mntput(rootmnt); 1558 free_page((unsigned long) page); 1559 return error; 1560 } 1561 1562 /* 1563 * Test whether new_dentry is a subdirectory of old_dentry. 1564 * 1565 * Trivially implemented using the dcache structure 1566 */ 1567 1568 /** 1569 * is_subdir - is new dentry a subdirectory of old_dentry 1570 * @new_dentry: new dentry 1571 * @old_dentry: old dentry 1572 * 1573 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 1574 * Returns 0 otherwise. 1575 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 1576 */ 1577 1578 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 1579 { 1580 int result; 1581 struct dentry * saved = new_dentry; 1582 unsigned long seq; 1583 1584 /* need rcu_readlock to protect against the d_parent trashing due to 1585 * d_move 1586 */ 1587 rcu_read_lock(); 1588 do { 1589 /* for restarting inner loop in case of seq retry */ 1590 new_dentry = saved; 1591 result = 0; 1592 seq = read_seqbegin(&rename_lock); 1593 for (;;) { 1594 if (new_dentry != old_dentry) { 1595 struct dentry * parent = new_dentry->d_parent; 1596 if (parent == new_dentry) 1597 break; 1598 new_dentry = parent; 1599 continue; 1600 } 1601 result = 1; 1602 break; 1603 } 1604 } while (read_seqretry(&rename_lock, seq)); 1605 rcu_read_unlock(); 1606 1607 return result; 1608 } 1609 1610 void d_genocide(struct dentry *root) 1611 { 1612 struct dentry *this_parent = root; 1613 struct list_head *next; 1614 1615 spin_lock(&dcache_lock); 1616 repeat: 1617 next = this_parent->d_subdirs.next; 1618 resume: 1619 while (next != &this_parent->d_subdirs) { 1620 struct list_head *tmp = next; 1621 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1622 next = tmp->next; 1623 if (d_unhashed(dentry)||!dentry->d_inode) 1624 continue; 1625 if (!list_empty(&dentry->d_subdirs)) { 1626 this_parent = dentry; 1627 goto repeat; 1628 } 1629 atomic_dec(&dentry->d_count); 1630 } 1631 if (this_parent != root) { 1632 next = this_parent->d_u.d_child.next; 1633 atomic_dec(&this_parent->d_count); 1634 this_parent = this_parent->d_parent; 1635 goto resume; 1636 } 1637 spin_unlock(&dcache_lock); 1638 } 1639 1640 /** 1641 * find_inode_number - check for dentry with name 1642 * @dir: directory to check 1643 * @name: Name to find. 1644 * 1645 * Check whether a dentry already exists for the given name, 1646 * and return the inode number if it has an inode. Otherwise 1647 * 0 is returned. 1648 * 1649 * This routine is used to post-process directory listings for 1650 * filesystems using synthetic inode numbers, and is necessary 1651 * to keep getcwd() working. 1652 */ 1653 1654 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 1655 { 1656 struct dentry * dentry; 1657 ino_t ino = 0; 1658 1659 dentry = d_hash_and_lookup(dir, name); 1660 if (dentry) { 1661 if (dentry->d_inode) 1662 ino = dentry->d_inode->i_ino; 1663 dput(dentry); 1664 } 1665 return ino; 1666 } 1667 1668 static __initdata unsigned long dhash_entries; 1669 static int __init set_dhash_entries(char *str) 1670 { 1671 if (!str) 1672 return 0; 1673 dhash_entries = simple_strtoul(str, &str, 0); 1674 return 1; 1675 } 1676 __setup("dhash_entries=", set_dhash_entries); 1677 1678 static void __init dcache_init_early(void) 1679 { 1680 int loop; 1681 1682 /* If hashes are distributed across NUMA nodes, defer 1683 * hash allocation until vmalloc space is available. 1684 */ 1685 if (hashdist) 1686 return; 1687 1688 dentry_hashtable = 1689 alloc_large_system_hash("Dentry cache", 1690 sizeof(struct hlist_head), 1691 dhash_entries, 1692 13, 1693 HASH_EARLY, 1694 &d_hash_shift, 1695 &d_hash_mask, 1696 0); 1697 1698 for (loop = 0; loop < (1 << d_hash_shift); loop++) 1699 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 1700 } 1701 1702 static void __init dcache_init(unsigned long mempages) 1703 { 1704 int loop; 1705 1706 /* 1707 * A constructor could be added for stable state like the lists, 1708 * but it is probably not worth it because of the cache nature 1709 * of the dcache. 1710 */ 1711 dentry_cache = kmem_cache_create("dentry_cache", 1712 sizeof(struct dentry), 1713 0, 1714 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1715 SLAB_MEM_SPREAD), 1716 NULL, NULL); 1717 1718 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); 1719 1720 /* Hash may have been set up in dcache_init_early */ 1721 if (!hashdist) 1722 return; 1723 1724 dentry_hashtable = 1725 alloc_large_system_hash("Dentry cache", 1726 sizeof(struct hlist_head), 1727 dhash_entries, 1728 13, 1729 0, 1730 &d_hash_shift, 1731 &d_hash_mask, 1732 0); 1733 1734 for (loop = 0; loop < (1 << d_hash_shift); loop++) 1735 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 1736 } 1737 1738 /* SLAB cache for __getname() consumers */ 1739 kmem_cache_t *names_cachep __read_mostly; 1740 1741 /* SLAB cache for file structures */ 1742 kmem_cache_t *filp_cachep __read_mostly; 1743 1744 EXPORT_SYMBOL(d_genocide); 1745 1746 extern void bdev_cache_init(void); 1747 extern void chrdev_init(void); 1748 1749 void __init vfs_caches_init_early(void) 1750 { 1751 dcache_init_early(); 1752 inode_init_early(); 1753 } 1754 1755 void __init vfs_caches_init(unsigned long mempages) 1756 { 1757 unsigned long reserve; 1758 1759 /* Base hash sizes on available memory, with a reserve equal to 1760 150% of current kernel size */ 1761 1762 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 1763 mempages -= reserve; 1764 1765 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 1766 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1767 1768 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 1769 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1770 1771 dcache_init(mempages); 1772 inode_init(mempages); 1773 files_init(mempages); 1774 mnt_init(mempages); 1775 bdev_cache_init(); 1776 chrdev_init(); 1777 } 1778 1779 EXPORT_SYMBOL(d_alloc); 1780 EXPORT_SYMBOL(d_alloc_anon); 1781 EXPORT_SYMBOL(d_alloc_root); 1782 EXPORT_SYMBOL(d_delete); 1783 EXPORT_SYMBOL(d_find_alias); 1784 EXPORT_SYMBOL(d_instantiate); 1785 EXPORT_SYMBOL(d_invalidate); 1786 EXPORT_SYMBOL(d_lookup); 1787 EXPORT_SYMBOL(d_move); 1788 EXPORT_SYMBOL(d_path); 1789 EXPORT_SYMBOL(d_prune_aliases); 1790 EXPORT_SYMBOL(d_rehash); 1791 EXPORT_SYMBOL(d_splice_alias); 1792 EXPORT_SYMBOL(d_validate); 1793 EXPORT_SYMBOL(dget_locked); 1794 EXPORT_SYMBOL(dput); 1795 EXPORT_SYMBOL(find_inode_number); 1796 EXPORT_SYMBOL(have_submounts); 1797 EXPORT_SYMBOL(names_cachep); 1798 EXPORT_SYMBOL(shrink_dcache_parent); 1799 EXPORT_SYMBOL(shrink_dcache_sb); 1800