1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/module.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include <linux/fs_struct.h> 35 #include <linux/hardirq.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/rculist_bl.h> 38 #include <linux/prefetch.h> 39 #include <linux/ratelimit.h> 40 #include "internal.h" 41 42 /* 43 * Usage: 44 * dcache->d_inode->i_lock protects: 45 * - i_dentry, d_alias, d_inode of aliases 46 * dcache_hash_bucket lock protects: 47 * - the dcache hash table 48 * s_anon bl list spinlock protects: 49 * - the s_anon list (see __d_drop) 50 * dcache_lru_lock protects: 51 * - the dcache lru lists and counters 52 * d_lock protects: 53 * - d_flags 54 * - d_name 55 * - d_lru 56 * - d_count 57 * - d_unhashed() 58 * - d_parent and d_subdirs 59 * - childrens' d_child and d_parent 60 * - d_alias, d_inode 61 * 62 * Ordering: 63 * dentry->d_inode->i_lock 64 * dentry->d_lock 65 * dcache_lru_lock 66 * dcache_hash_bucket lock 67 * s_anon lock 68 * 69 * If there is an ancestor relationship: 70 * dentry->d_parent->...->d_parent->d_lock 71 * ... 72 * dentry->d_parent->d_lock 73 * dentry->d_lock 74 * 75 * If no ancestor relationship: 76 * if (dentry1 < dentry2) 77 * dentry1->d_lock 78 * dentry2->d_lock 79 */ 80 int sysctl_vfs_cache_pressure __read_mostly = 100; 81 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 82 83 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 84 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 85 86 EXPORT_SYMBOL(rename_lock); 87 88 static struct kmem_cache *dentry_cache __read_mostly; 89 90 /* 91 * This is the single most critical data structure when it comes 92 * to the dcache: the hashtable for lookups. Somebody should try 93 * to make this good - I've just made it work. 94 * 95 * This hash-function tries to avoid losing too many bits of hash 96 * information, yet avoid using a prime hash-size or similar. 97 */ 98 #define D_HASHBITS d_hash_shift 99 #define D_HASHMASK d_hash_mask 100 101 static unsigned int d_hash_mask __read_mostly; 102 static unsigned int d_hash_shift __read_mostly; 103 104 static struct hlist_bl_head *dentry_hashtable __read_mostly; 105 106 static inline struct hlist_bl_head *d_hash(struct dentry *parent, 107 unsigned long hash) 108 { 109 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 110 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 111 return dentry_hashtable + (hash & D_HASHMASK); 112 } 113 114 /* Statistics gathering. */ 115 struct dentry_stat_t dentry_stat = { 116 .age_limit = 45, 117 }; 118 119 static DEFINE_PER_CPU(unsigned int, nr_dentry); 120 121 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 122 static int get_nr_dentry(void) 123 { 124 int i; 125 int sum = 0; 126 for_each_possible_cpu(i) 127 sum += per_cpu(nr_dentry, i); 128 return sum < 0 ? 0 : sum; 129 } 130 131 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 132 size_t *lenp, loff_t *ppos) 133 { 134 dentry_stat.nr_dentry = get_nr_dentry(); 135 return proc_dointvec(table, write, buffer, lenp, ppos); 136 } 137 #endif 138 139 static void __d_free(struct rcu_head *head) 140 { 141 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 142 143 WARN_ON(!list_empty(&dentry->d_alias)); 144 if (dname_external(dentry)) 145 kfree(dentry->d_name.name); 146 kmem_cache_free(dentry_cache, dentry); 147 } 148 149 /* 150 * no locks, please. 151 */ 152 static void d_free(struct dentry *dentry) 153 { 154 BUG_ON(dentry->d_count); 155 this_cpu_dec(nr_dentry); 156 if (dentry->d_op && dentry->d_op->d_release) 157 dentry->d_op->d_release(dentry); 158 159 /* if dentry was never visible to RCU, immediate free is OK */ 160 if (!(dentry->d_flags & DCACHE_RCUACCESS)) 161 __d_free(&dentry->d_u.d_rcu); 162 else 163 call_rcu(&dentry->d_u.d_rcu, __d_free); 164 } 165 166 /** 167 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 168 * @dentry: the target dentry 169 * After this call, in-progress rcu-walk path lookup will fail. This 170 * should be called after unhashing, and after changing d_inode (if 171 * the dentry has not already been unhashed). 172 */ 173 static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 174 { 175 assert_spin_locked(&dentry->d_lock); 176 /* Go through a barrier */ 177 write_seqcount_barrier(&dentry->d_seq); 178 } 179 180 /* 181 * Release the dentry's inode, using the filesystem 182 * d_iput() operation if defined. Dentry has no refcount 183 * and is unhashed. 184 */ 185 static void dentry_iput(struct dentry * dentry) 186 __releases(dentry->d_lock) 187 __releases(dentry->d_inode->i_lock) 188 { 189 struct inode *inode = dentry->d_inode; 190 if (inode) { 191 dentry->d_inode = NULL; 192 list_del_init(&dentry->d_alias); 193 spin_unlock(&dentry->d_lock); 194 spin_unlock(&inode->i_lock); 195 if (!inode->i_nlink) 196 fsnotify_inoderemove(inode); 197 if (dentry->d_op && dentry->d_op->d_iput) 198 dentry->d_op->d_iput(dentry, inode); 199 else 200 iput(inode); 201 } else { 202 spin_unlock(&dentry->d_lock); 203 } 204 } 205 206 /* 207 * Release the dentry's inode, using the filesystem 208 * d_iput() operation if defined. dentry remains in-use. 209 */ 210 static void dentry_unlink_inode(struct dentry * dentry) 211 __releases(dentry->d_lock) 212 __releases(dentry->d_inode->i_lock) 213 { 214 struct inode *inode = dentry->d_inode; 215 dentry->d_inode = NULL; 216 list_del_init(&dentry->d_alias); 217 dentry_rcuwalk_barrier(dentry); 218 spin_unlock(&dentry->d_lock); 219 spin_unlock(&inode->i_lock); 220 if (!inode->i_nlink) 221 fsnotify_inoderemove(inode); 222 if (dentry->d_op && dentry->d_op->d_iput) 223 dentry->d_op->d_iput(dentry, inode); 224 else 225 iput(inode); 226 } 227 228 /* 229 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. 230 */ 231 static void dentry_lru_add(struct dentry *dentry) 232 { 233 if (list_empty(&dentry->d_lru)) { 234 spin_lock(&dcache_lru_lock); 235 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 236 dentry->d_sb->s_nr_dentry_unused++; 237 dentry_stat.nr_unused++; 238 spin_unlock(&dcache_lru_lock); 239 } 240 } 241 242 static void __dentry_lru_del(struct dentry *dentry) 243 { 244 list_del_init(&dentry->d_lru); 245 dentry->d_sb->s_nr_dentry_unused--; 246 dentry_stat.nr_unused--; 247 } 248 249 /* 250 * Remove a dentry with references from the LRU. 251 */ 252 static void dentry_lru_del(struct dentry *dentry) 253 { 254 if (!list_empty(&dentry->d_lru)) { 255 spin_lock(&dcache_lru_lock); 256 __dentry_lru_del(dentry); 257 spin_unlock(&dcache_lru_lock); 258 } 259 } 260 261 /* 262 * Remove a dentry that is unreferenced and about to be pruned 263 * (unhashed and destroyed) from the LRU, and inform the file system. 264 * This wrapper should be called _prior_ to unhashing a victim dentry. 265 */ 266 static void dentry_lru_prune(struct dentry *dentry) 267 { 268 if (!list_empty(&dentry->d_lru)) { 269 if (dentry->d_flags & DCACHE_OP_PRUNE) 270 dentry->d_op->d_prune(dentry); 271 272 spin_lock(&dcache_lru_lock); 273 __dentry_lru_del(dentry); 274 spin_unlock(&dcache_lru_lock); 275 } 276 } 277 278 static void dentry_lru_move_tail(struct dentry *dentry) 279 { 280 spin_lock(&dcache_lru_lock); 281 if (list_empty(&dentry->d_lru)) { 282 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 283 dentry->d_sb->s_nr_dentry_unused++; 284 dentry_stat.nr_unused++; 285 } else { 286 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 287 } 288 spin_unlock(&dcache_lru_lock); 289 } 290 291 /** 292 * d_kill - kill dentry and return parent 293 * @dentry: dentry to kill 294 * @parent: parent dentry 295 * 296 * The dentry must already be unhashed and removed from the LRU. 297 * 298 * If this is the root of the dentry tree, return NULL. 299 * 300 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 301 * d_kill. 302 */ 303 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 304 __releases(dentry->d_lock) 305 __releases(parent->d_lock) 306 __releases(dentry->d_inode->i_lock) 307 { 308 list_del(&dentry->d_u.d_child); 309 /* 310 * Inform try_to_ascend() that we are no longer attached to the 311 * dentry tree 312 */ 313 dentry->d_flags |= DCACHE_DISCONNECTED; 314 if (parent) 315 spin_unlock(&parent->d_lock); 316 dentry_iput(dentry); 317 /* 318 * dentry_iput drops the locks, at which point nobody (except 319 * transient RCU lookups) can reach this dentry. 320 */ 321 d_free(dentry); 322 return parent; 323 } 324 325 /* 326 * Unhash a dentry without inserting an RCU walk barrier or checking that 327 * dentry->d_lock is locked. The caller must take care of that, if 328 * appropriate. 329 */ 330 static void __d_shrink(struct dentry *dentry) 331 { 332 if (!d_unhashed(dentry)) { 333 struct hlist_bl_head *b; 334 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) 335 b = &dentry->d_sb->s_anon; 336 else 337 b = d_hash(dentry->d_parent, dentry->d_name.hash); 338 339 hlist_bl_lock(b); 340 __hlist_bl_del(&dentry->d_hash); 341 dentry->d_hash.pprev = NULL; 342 hlist_bl_unlock(b); 343 } 344 } 345 346 /** 347 * d_drop - drop a dentry 348 * @dentry: dentry to drop 349 * 350 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 351 * be found through a VFS lookup any more. Note that this is different from 352 * deleting the dentry - d_delete will try to mark the dentry negative if 353 * possible, giving a successful _negative_ lookup, while d_drop will 354 * just make the cache lookup fail. 355 * 356 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 357 * reason (NFS timeouts or autofs deletes). 358 * 359 * __d_drop requires dentry->d_lock. 360 */ 361 void __d_drop(struct dentry *dentry) 362 { 363 if (!d_unhashed(dentry)) { 364 __d_shrink(dentry); 365 dentry_rcuwalk_barrier(dentry); 366 } 367 } 368 EXPORT_SYMBOL(__d_drop); 369 370 void d_drop(struct dentry *dentry) 371 { 372 spin_lock(&dentry->d_lock); 373 __d_drop(dentry); 374 spin_unlock(&dentry->d_lock); 375 } 376 EXPORT_SYMBOL(d_drop); 377 378 /* 379 * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag 380 * @dentry: dentry to drop 381 * 382 * This is called when we do a lookup on a placeholder dentry that needed to be 383 * looked up. The dentry should have been hashed in order for it to be found by 384 * the lookup code, but now needs to be unhashed while we do the actual lookup 385 * and clear the DCACHE_NEED_LOOKUP flag. 386 */ 387 void d_clear_need_lookup(struct dentry *dentry) 388 { 389 spin_lock(&dentry->d_lock); 390 __d_drop(dentry); 391 dentry->d_flags &= ~DCACHE_NEED_LOOKUP; 392 spin_unlock(&dentry->d_lock); 393 } 394 EXPORT_SYMBOL(d_clear_need_lookup); 395 396 /* 397 * Finish off a dentry we've decided to kill. 398 * dentry->d_lock must be held, returns with it unlocked. 399 * If ref is non-zero, then decrement the refcount too. 400 * Returns dentry requiring refcount drop, or NULL if we're done. 401 */ 402 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) 403 __releases(dentry->d_lock) 404 { 405 struct inode *inode; 406 struct dentry *parent; 407 408 inode = dentry->d_inode; 409 if (inode && !spin_trylock(&inode->i_lock)) { 410 relock: 411 spin_unlock(&dentry->d_lock); 412 cpu_relax(); 413 return dentry; /* try again with same dentry */ 414 } 415 if (IS_ROOT(dentry)) 416 parent = NULL; 417 else 418 parent = dentry->d_parent; 419 if (parent && !spin_trylock(&parent->d_lock)) { 420 if (inode) 421 spin_unlock(&inode->i_lock); 422 goto relock; 423 } 424 425 if (ref) 426 dentry->d_count--; 427 /* 428 * if dentry was on the d_lru list delete it from there. 429 * inform the fs via d_prune that this dentry is about to be 430 * unhashed and destroyed. 431 */ 432 dentry_lru_prune(dentry); 433 /* if it was on the hash then remove it */ 434 __d_drop(dentry); 435 return d_kill(dentry, parent); 436 } 437 438 /* 439 * This is dput 440 * 441 * This is complicated by the fact that we do not want to put 442 * dentries that are no longer on any hash chain on the unused 443 * list: we'd much rather just get rid of them immediately. 444 * 445 * However, that implies that we have to traverse the dentry 446 * tree upwards to the parents which might _also_ now be 447 * scheduled for deletion (it may have been only waiting for 448 * its last child to go away). 449 * 450 * This tail recursion is done by hand as we don't want to depend 451 * on the compiler to always get this right (gcc generally doesn't). 452 * Real recursion would eat up our stack space. 453 */ 454 455 /* 456 * dput - release a dentry 457 * @dentry: dentry to release 458 * 459 * Release a dentry. This will drop the usage count and if appropriate 460 * call the dentry unlink method as well as removing it from the queues and 461 * releasing its resources. If the parent dentries were scheduled for release 462 * they too may now get deleted. 463 */ 464 void dput(struct dentry *dentry) 465 { 466 if (!dentry) 467 return; 468 469 repeat: 470 if (dentry->d_count == 1) 471 might_sleep(); 472 spin_lock(&dentry->d_lock); 473 BUG_ON(!dentry->d_count); 474 if (dentry->d_count > 1) { 475 dentry->d_count--; 476 spin_unlock(&dentry->d_lock); 477 return; 478 } 479 480 if (dentry->d_flags & DCACHE_OP_DELETE) { 481 if (dentry->d_op->d_delete(dentry)) 482 goto kill_it; 483 } 484 485 /* Unreachable? Get rid of it */ 486 if (d_unhashed(dentry)) 487 goto kill_it; 488 489 /* 490 * If this dentry needs lookup, don't set the referenced flag so that it 491 * is more likely to be cleaned up by the dcache shrinker in case of 492 * memory pressure. 493 */ 494 if (!d_need_lookup(dentry)) 495 dentry->d_flags |= DCACHE_REFERENCED; 496 dentry_lru_add(dentry); 497 498 dentry->d_count--; 499 spin_unlock(&dentry->d_lock); 500 return; 501 502 kill_it: 503 dentry = dentry_kill(dentry, 1); 504 if (dentry) 505 goto repeat; 506 } 507 EXPORT_SYMBOL(dput); 508 509 /** 510 * d_invalidate - invalidate a dentry 511 * @dentry: dentry to invalidate 512 * 513 * Try to invalidate the dentry if it turns out to be 514 * possible. If there are other dentries that can be 515 * reached through this one we can't delete it and we 516 * return -EBUSY. On success we return 0. 517 * 518 * no dcache lock. 519 */ 520 521 int d_invalidate(struct dentry * dentry) 522 { 523 /* 524 * If it's already been dropped, return OK. 525 */ 526 spin_lock(&dentry->d_lock); 527 if (d_unhashed(dentry)) { 528 spin_unlock(&dentry->d_lock); 529 return 0; 530 } 531 /* 532 * Check whether to do a partial shrink_dcache 533 * to get rid of unused child entries. 534 */ 535 if (!list_empty(&dentry->d_subdirs)) { 536 spin_unlock(&dentry->d_lock); 537 shrink_dcache_parent(dentry); 538 spin_lock(&dentry->d_lock); 539 } 540 541 /* 542 * Somebody else still using it? 543 * 544 * If it's a directory, we can't drop it 545 * for fear of somebody re-populating it 546 * with children (even though dropping it 547 * would make it unreachable from the root, 548 * we might still populate it if it was a 549 * working directory or similar). 550 * We also need to leave mountpoints alone, 551 * directory or not. 552 */ 553 if (dentry->d_count > 1 && dentry->d_inode) { 554 if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { 555 spin_unlock(&dentry->d_lock); 556 return -EBUSY; 557 } 558 } 559 560 __d_drop(dentry); 561 spin_unlock(&dentry->d_lock); 562 return 0; 563 } 564 EXPORT_SYMBOL(d_invalidate); 565 566 /* This must be called with d_lock held */ 567 static inline void __dget_dlock(struct dentry *dentry) 568 { 569 dentry->d_count++; 570 } 571 572 static inline void __dget(struct dentry *dentry) 573 { 574 spin_lock(&dentry->d_lock); 575 __dget_dlock(dentry); 576 spin_unlock(&dentry->d_lock); 577 } 578 579 struct dentry *dget_parent(struct dentry *dentry) 580 { 581 struct dentry *ret; 582 583 repeat: 584 /* 585 * Don't need rcu_dereference because we re-check it was correct under 586 * the lock. 587 */ 588 rcu_read_lock(); 589 ret = dentry->d_parent; 590 spin_lock(&ret->d_lock); 591 if (unlikely(ret != dentry->d_parent)) { 592 spin_unlock(&ret->d_lock); 593 rcu_read_unlock(); 594 goto repeat; 595 } 596 rcu_read_unlock(); 597 BUG_ON(!ret->d_count); 598 ret->d_count++; 599 spin_unlock(&ret->d_lock); 600 return ret; 601 } 602 EXPORT_SYMBOL(dget_parent); 603 604 /** 605 * d_find_alias - grab a hashed alias of inode 606 * @inode: inode in question 607 * @want_discon: flag, used by d_splice_alias, to request 608 * that only a DISCONNECTED alias be returned. 609 * 610 * If inode has a hashed alias, or is a directory and has any alias, 611 * acquire the reference to alias and return it. Otherwise return NULL. 612 * Notice that if inode is a directory there can be only one alias and 613 * it can be unhashed only if it has no children, or if it is the root 614 * of a filesystem. 615 * 616 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 617 * any other hashed alias over that one unless @want_discon is set, 618 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 619 */ 620 static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 621 { 622 struct dentry *alias, *discon_alias; 623 624 again: 625 discon_alias = NULL; 626 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 627 spin_lock(&alias->d_lock); 628 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 629 if (IS_ROOT(alias) && 630 (alias->d_flags & DCACHE_DISCONNECTED)) { 631 discon_alias = alias; 632 } else if (!want_discon) { 633 __dget_dlock(alias); 634 spin_unlock(&alias->d_lock); 635 return alias; 636 } 637 } 638 spin_unlock(&alias->d_lock); 639 } 640 if (discon_alias) { 641 alias = discon_alias; 642 spin_lock(&alias->d_lock); 643 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 644 if (IS_ROOT(alias) && 645 (alias->d_flags & DCACHE_DISCONNECTED)) { 646 __dget_dlock(alias); 647 spin_unlock(&alias->d_lock); 648 return alias; 649 } 650 } 651 spin_unlock(&alias->d_lock); 652 goto again; 653 } 654 return NULL; 655 } 656 657 struct dentry *d_find_alias(struct inode *inode) 658 { 659 struct dentry *de = NULL; 660 661 if (!list_empty(&inode->i_dentry)) { 662 spin_lock(&inode->i_lock); 663 de = __d_find_alias(inode, 0); 664 spin_unlock(&inode->i_lock); 665 } 666 return de; 667 } 668 EXPORT_SYMBOL(d_find_alias); 669 670 /* 671 * Try to kill dentries associated with this inode. 672 * WARNING: you must own a reference to inode. 673 */ 674 void d_prune_aliases(struct inode *inode) 675 { 676 struct dentry *dentry; 677 restart: 678 spin_lock(&inode->i_lock); 679 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 680 spin_lock(&dentry->d_lock); 681 if (!dentry->d_count) { 682 __dget_dlock(dentry); 683 __d_drop(dentry); 684 spin_unlock(&dentry->d_lock); 685 spin_unlock(&inode->i_lock); 686 dput(dentry); 687 goto restart; 688 } 689 spin_unlock(&dentry->d_lock); 690 } 691 spin_unlock(&inode->i_lock); 692 } 693 EXPORT_SYMBOL(d_prune_aliases); 694 695 /* 696 * Try to throw away a dentry - free the inode, dput the parent. 697 * Requires dentry->d_lock is held, and dentry->d_count == 0. 698 * Releases dentry->d_lock. 699 * 700 * This may fail if locks cannot be acquired no problem, just try again. 701 */ 702 static void try_prune_one_dentry(struct dentry *dentry) 703 __releases(dentry->d_lock) 704 { 705 struct dentry *parent; 706 707 parent = dentry_kill(dentry, 0); 708 /* 709 * If dentry_kill returns NULL, we have nothing more to do. 710 * if it returns the same dentry, trylocks failed. In either 711 * case, just loop again. 712 * 713 * Otherwise, we need to prune ancestors too. This is necessary 714 * to prevent quadratic behavior of shrink_dcache_parent(), but 715 * is also expected to be beneficial in reducing dentry cache 716 * fragmentation. 717 */ 718 if (!parent) 719 return; 720 if (parent == dentry) 721 return; 722 723 /* Prune ancestors. */ 724 dentry = parent; 725 while (dentry) { 726 spin_lock(&dentry->d_lock); 727 if (dentry->d_count > 1) { 728 dentry->d_count--; 729 spin_unlock(&dentry->d_lock); 730 return; 731 } 732 dentry = dentry_kill(dentry, 1); 733 } 734 } 735 736 static void shrink_dentry_list(struct list_head *list) 737 { 738 struct dentry *dentry; 739 740 rcu_read_lock(); 741 for (;;) { 742 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 743 if (&dentry->d_lru == list) 744 break; /* empty */ 745 spin_lock(&dentry->d_lock); 746 if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 747 spin_unlock(&dentry->d_lock); 748 continue; 749 } 750 751 /* 752 * We found an inuse dentry which was not removed from 753 * the LRU because of laziness during lookup. Do not free 754 * it - just keep it off the LRU list. 755 */ 756 if (dentry->d_count) { 757 dentry_lru_del(dentry); 758 spin_unlock(&dentry->d_lock); 759 continue; 760 } 761 762 rcu_read_unlock(); 763 764 try_prune_one_dentry(dentry); 765 766 rcu_read_lock(); 767 } 768 rcu_read_unlock(); 769 } 770 771 /** 772 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock 773 * @sb: superblock to shrink dentry LRU. 774 * @count: number of entries to prune 775 * @flags: flags to control the dentry processing 776 * 777 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 778 */ 779 static void __shrink_dcache_sb(struct super_block *sb, int count, int flags) 780 { 781 struct dentry *dentry; 782 LIST_HEAD(referenced); 783 LIST_HEAD(tmp); 784 785 relock: 786 spin_lock(&dcache_lru_lock); 787 while (!list_empty(&sb->s_dentry_lru)) { 788 dentry = list_entry(sb->s_dentry_lru.prev, 789 struct dentry, d_lru); 790 BUG_ON(dentry->d_sb != sb); 791 792 if (!spin_trylock(&dentry->d_lock)) { 793 spin_unlock(&dcache_lru_lock); 794 cpu_relax(); 795 goto relock; 796 } 797 798 /* 799 * If we are honouring the DCACHE_REFERENCED flag and the 800 * dentry has this flag set, don't free it. Clear the flag 801 * and put it back on the LRU. 802 */ 803 if (flags & DCACHE_REFERENCED && 804 dentry->d_flags & DCACHE_REFERENCED) { 805 dentry->d_flags &= ~DCACHE_REFERENCED; 806 list_move(&dentry->d_lru, &referenced); 807 spin_unlock(&dentry->d_lock); 808 } else { 809 list_move_tail(&dentry->d_lru, &tmp); 810 spin_unlock(&dentry->d_lock); 811 if (!--count) 812 break; 813 } 814 cond_resched_lock(&dcache_lru_lock); 815 } 816 if (!list_empty(&referenced)) 817 list_splice(&referenced, &sb->s_dentry_lru); 818 spin_unlock(&dcache_lru_lock); 819 820 shrink_dentry_list(&tmp); 821 } 822 823 /** 824 * prune_dcache_sb - shrink the dcache 825 * @sb: superblock 826 * @nr_to_scan: number of entries to try to free 827 * 828 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is 829 * done when we need more memory an called from the superblock shrinker 830 * function. 831 * 832 * This function may fail to free any resources if all the dentries are in 833 * use. 834 */ 835 void prune_dcache_sb(struct super_block *sb, int nr_to_scan) 836 { 837 __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED); 838 } 839 840 /** 841 * shrink_dcache_sb - shrink dcache for a superblock 842 * @sb: superblock 843 * 844 * Shrink the dcache for the specified super block. This is used to free 845 * the dcache before unmounting a file system. 846 */ 847 void shrink_dcache_sb(struct super_block *sb) 848 { 849 LIST_HEAD(tmp); 850 851 spin_lock(&dcache_lru_lock); 852 while (!list_empty(&sb->s_dentry_lru)) { 853 list_splice_init(&sb->s_dentry_lru, &tmp); 854 spin_unlock(&dcache_lru_lock); 855 shrink_dentry_list(&tmp); 856 spin_lock(&dcache_lru_lock); 857 } 858 spin_unlock(&dcache_lru_lock); 859 } 860 EXPORT_SYMBOL(shrink_dcache_sb); 861 862 /* 863 * destroy a single subtree of dentries for unmount 864 * - see the comments on shrink_dcache_for_umount() for a description of the 865 * locking 866 */ 867 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 868 { 869 struct dentry *parent; 870 871 BUG_ON(!IS_ROOT(dentry)); 872 873 for (;;) { 874 /* descend to the first leaf in the current subtree */ 875 while (!list_empty(&dentry->d_subdirs)) 876 dentry = list_entry(dentry->d_subdirs.next, 877 struct dentry, d_u.d_child); 878 879 /* consume the dentries from this leaf up through its parents 880 * until we find one with children or run out altogether */ 881 do { 882 struct inode *inode; 883 884 /* 885 * remove the dentry from the lru, and inform 886 * the fs that this dentry is about to be 887 * unhashed and destroyed. 888 */ 889 dentry_lru_prune(dentry); 890 __d_shrink(dentry); 891 892 if (dentry->d_count != 0) { 893 printk(KERN_ERR 894 "BUG: Dentry %p{i=%lx,n=%s}" 895 " still in use (%d)" 896 " [unmount of %s %s]\n", 897 dentry, 898 dentry->d_inode ? 899 dentry->d_inode->i_ino : 0UL, 900 dentry->d_name.name, 901 dentry->d_count, 902 dentry->d_sb->s_type->name, 903 dentry->d_sb->s_id); 904 BUG(); 905 } 906 907 if (IS_ROOT(dentry)) { 908 parent = NULL; 909 list_del(&dentry->d_u.d_child); 910 } else { 911 parent = dentry->d_parent; 912 parent->d_count--; 913 list_del(&dentry->d_u.d_child); 914 } 915 916 inode = dentry->d_inode; 917 if (inode) { 918 dentry->d_inode = NULL; 919 list_del_init(&dentry->d_alias); 920 if (dentry->d_op && dentry->d_op->d_iput) 921 dentry->d_op->d_iput(dentry, inode); 922 else 923 iput(inode); 924 } 925 926 d_free(dentry); 927 928 /* finished when we fall off the top of the tree, 929 * otherwise we ascend to the parent and move to the 930 * next sibling if there is one */ 931 if (!parent) 932 return; 933 dentry = parent; 934 } while (list_empty(&dentry->d_subdirs)); 935 936 dentry = list_entry(dentry->d_subdirs.next, 937 struct dentry, d_u.d_child); 938 } 939 } 940 941 /* 942 * destroy the dentries attached to a superblock on unmounting 943 * - we don't need to use dentry->d_lock because: 944 * - the superblock is detached from all mountings and open files, so the 945 * dentry trees will not be rearranged by the VFS 946 * - s_umount is write-locked, so the memory pressure shrinker will ignore 947 * any dentries belonging to this superblock that it comes across 948 * - the filesystem itself is no longer permitted to rearrange the dentries 949 * in this superblock 950 */ 951 void shrink_dcache_for_umount(struct super_block *sb) 952 { 953 struct dentry *dentry; 954 955 if (down_read_trylock(&sb->s_umount)) 956 BUG(); 957 958 dentry = sb->s_root; 959 sb->s_root = NULL; 960 dentry->d_count--; 961 shrink_dcache_for_umount_subtree(dentry); 962 963 while (!hlist_bl_empty(&sb->s_anon)) { 964 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 965 shrink_dcache_for_umount_subtree(dentry); 966 } 967 } 968 969 /* 970 * This tries to ascend one level of parenthood, but 971 * we can race with renaming, so we need to re-check 972 * the parenthood after dropping the lock and check 973 * that the sequence number still matches. 974 */ 975 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) 976 { 977 struct dentry *new = old->d_parent; 978 979 rcu_read_lock(); 980 spin_unlock(&old->d_lock); 981 spin_lock(&new->d_lock); 982 983 /* 984 * might go back up the wrong parent if we have had a rename 985 * or deletion 986 */ 987 if (new != old->d_parent || 988 (old->d_flags & DCACHE_DISCONNECTED) || 989 (!locked && read_seqretry(&rename_lock, seq))) { 990 spin_unlock(&new->d_lock); 991 new = NULL; 992 } 993 rcu_read_unlock(); 994 return new; 995 } 996 997 998 /* 999 * Search for at least 1 mount point in the dentry's subdirs. 1000 * We descend to the next level whenever the d_subdirs 1001 * list is non-empty and continue searching. 1002 */ 1003 1004 /** 1005 * have_submounts - check for mounts over a dentry 1006 * @parent: dentry to check. 1007 * 1008 * Return true if the parent or its subdirectories contain 1009 * a mount point 1010 */ 1011 int have_submounts(struct dentry *parent) 1012 { 1013 struct dentry *this_parent; 1014 struct list_head *next; 1015 unsigned seq; 1016 int locked = 0; 1017 1018 seq = read_seqbegin(&rename_lock); 1019 again: 1020 this_parent = parent; 1021 1022 if (d_mountpoint(parent)) 1023 goto positive; 1024 spin_lock(&this_parent->d_lock); 1025 repeat: 1026 next = this_parent->d_subdirs.next; 1027 resume: 1028 while (next != &this_parent->d_subdirs) { 1029 struct list_head *tmp = next; 1030 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1031 next = tmp->next; 1032 1033 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1034 /* Have we found a mount point ? */ 1035 if (d_mountpoint(dentry)) { 1036 spin_unlock(&dentry->d_lock); 1037 spin_unlock(&this_parent->d_lock); 1038 goto positive; 1039 } 1040 if (!list_empty(&dentry->d_subdirs)) { 1041 spin_unlock(&this_parent->d_lock); 1042 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1043 this_parent = dentry; 1044 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1045 goto repeat; 1046 } 1047 spin_unlock(&dentry->d_lock); 1048 } 1049 /* 1050 * All done at this level ... ascend and resume the search. 1051 */ 1052 if (this_parent != parent) { 1053 struct dentry *child = this_parent; 1054 this_parent = try_to_ascend(this_parent, locked, seq); 1055 if (!this_parent) 1056 goto rename_retry; 1057 next = child->d_u.d_child.next; 1058 goto resume; 1059 } 1060 spin_unlock(&this_parent->d_lock); 1061 if (!locked && read_seqretry(&rename_lock, seq)) 1062 goto rename_retry; 1063 if (locked) 1064 write_sequnlock(&rename_lock); 1065 return 0; /* No mount points found in tree */ 1066 positive: 1067 if (!locked && read_seqretry(&rename_lock, seq)) 1068 goto rename_retry; 1069 if (locked) 1070 write_sequnlock(&rename_lock); 1071 return 1; 1072 1073 rename_retry: 1074 locked = 1; 1075 write_seqlock(&rename_lock); 1076 goto again; 1077 } 1078 EXPORT_SYMBOL(have_submounts); 1079 1080 /* 1081 * Search the dentry child list for the specified parent, 1082 * and move any unused dentries to the end of the unused 1083 * list for prune_dcache(). We descend to the next level 1084 * whenever the d_subdirs list is non-empty and continue 1085 * searching. 1086 * 1087 * It returns zero iff there are no unused children, 1088 * otherwise it returns the number of children moved to 1089 * the end of the unused list. This may not be the total 1090 * number of unused children, because select_parent can 1091 * drop the lock and return early due to latency 1092 * constraints. 1093 */ 1094 static int select_parent(struct dentry * parent) 1095 { 1096 struct dentry *this_parent; 1097 struct list_head *next; 1098 unsigned seq; 1099 int found = 0; 1100 int locked = 0; 1101 1102 seq = read_seqbegin(&rename_lock); 1103 again: 1104 this_parent = parent; 1105 spin_lock(&this_parent->d_lock); 1106 repeat: 1107 next = this_parent->d_subdirs.next; 1108 resume: 1109 while (next != &this_parent->d_subdirs) { 1110 struct list_head *tmp = next; 1111 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1112 next = tmp->next; 1113 1114 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1115 1116 /* 1117 * move only zero ref count dentries to the end 1118 * of the unused list for prune_dcache 1119 */ 1120 if (!dentry->d_count) { 1121 dentry_lru_move_tail(dentry); 1122 found++; 1123 } else { 1124 dentry_lru_del(dentry); 1125 } 1126 1127 /* 1128 * We can return to the caller if we have found some (this 1129 * ensures forward progress). We'll be coming back to find 1130 * the rest. 1131 */ 1132 if (found && need_resched()) { 1133 spin_unlock(&dentry->d_lock); 1134 goto out; 1135 } 1136 1137 /* 1138 * Descend a level if the d_subdirs list is non-empty. 1139 */ 1140 if (!list_empty(&dentry->d_subdirs)) { 1141 spin_unlock(&this_parent->d_lock); 1142 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1143 this_parent = dentry; 1144 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1145 goto repeat; 1146 } 1147 1148 spin_unlock(&dentry->d_lock); 1149 } 1150 /* 1151 * All done at this level ... ascend and resume the search. 1152 */ 1153 if (this_parent != parent) { 1154 struct dentry *child = this_parent; 1155 this_parent = try_to_ascend(this_parent, locked, seq); 1156 if (!this_parent) 1157 goto rename_retry; 1158 next = child->d_u.d_child.next; 1159 goto resume; 1160 } 1161 out: 1162 spin_unlock(&this_parent->d_lock); 1163 if (!locked && read_seqretry(&rename_lock, seq)) 1164 goto rename_retry; 1165 if (locked) 1166 write_sequnlock(&rename_lock); 1167 return found; 1168 1169 rename_retry: 1170 if (found) 1171 return found; 1172 locked = 1; 1173 write_seqlock(&rename_lock); 1174 goto again; 1175 } 1176 1177 /** 1178 * shrink_dcache_parent - prune dcache 1179 * @parent: parent of entries to prune 1180 * 1181 * Prune the dcache to remove unused children of the parent dentry. 1182 */ 1183 1184 void shrink_dcache_parent(struct dentry * parent) 1185 { 1186 struct super_block *sb = parent->d_sb; 1187 int found; 1188 1189 while ((found = select_parent(parent)) != 0) 1190 __shrink_dcache_sb(sb, found, 0); 1191 } 1192 EXPORT_SYMBOL(shrink_dcache_parent); 1193 1194 /** 1195 * __d_alloc - allocate a dcache entry 1196 * @sb: filesystem it will belong to 1197 * @name: qstr of the name 1198 * 1199 * Allocates a dentry. It returns %NULL if there is insufficient memory 1200 * available. On a success the dentry is returned. The name passed in is 1201 * copied and the copy passed in may be reused after this call. 1202 */ 1203 1204 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) 1205 { 1206 struct dentry *dentry; 1207 char *dname; 1208 1209 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1210 if (!dentry) 1211 return NULL; 1212 1213 if (name->len > DNAME_INLINE_LEN-1) { 1214 dname = kmalloc(name->len + 1, GFP_KERNEL); 1215 if (!dname) { 1216 kmem_cache_free(dentry_cache, dentry); 1217 return NULL; 1218 } 1219 } else { 1220 dname = dentry->d_iname; 1221 } 1222 dentry->d_name.name = dname; 1223 1224 dentry->d_name.len = name->len; 1225 dentry->d_name.hash = name->hash; 1226 memcpy(dname, name->name, name->len); 1227 dname[name->len] = 0; 1228 1229 dentry->d_count = 1; 1230 dentry->d_flags = 0; 1231 spin_lock_init(&dentry->d_lock); 1232 seqcount_init(&dentry->d_seq); 1233 dentry->d_inode = NULL; 1234 dentry->d_parent = dentry; 1235 dentry->d_sb = sb; 1236 dentry->d_op = NULL; 1237 dentry->d_fsdata = NULL; 1238 INIT_HLIST_BL_NODE(&dentry->d_hash); 1239 INIT_LIST_HEAD(&dentry->d_lru); 1240 INIT_LIST_HEAD(&dentry->d_subdirs); 1241 INIT_LIST_HEAD(&dentry->d_alias); 1242 INIT_LIST_HEAD(&dentry->d_u.d_child); 1243 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1244 1245 this_cpu_inc(nr_dentry); 1246 1247 return dentry; 1248 } 1249 1250 /** 1251 * d_alloc - allocate a dcache entry 1252 * @parent: parent of entry to allocate 1253 * @name: qstr of the name 1254 * 1255 * Allocates a dentry. It returns %NULL if there is insufficient memory 1256 * available. On a success the dentry is returned. The name passed in is 1257 * copied and the copy passed in may be reused after this call. 1258 */ 1259 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1260 { 1261 struct dentry *dentry = __d_alloc(parent->d_sb, name); 1262 if (!dentry) 1263 return NULL; 1264 1265 spin_lock(&parent->d_lock); 1266 /* 1267 * don't need child lock because it is not subject 1268 * to concurrency here 1269 */ 1270 __dget_dlock(parent); 1271 dentry->d_parent = parent; 1272 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1273 spin_unlock(&parent->d_lock); 1274 1275 return dentry; 1276 } 1277 EXPORT_SYMBOL(d_alloc); 1278 1279 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1280 { 1281 struct dentry *dentry = __d_alloc(sb, name); 1282 if (dentry) 1283 dentry->d_flags |= DCACHE_DISCONNECTED; 1284 return dentry; 1285 } 1286 EXPORT_SYMBOL(d_alloc_pseudo); 1287 1288 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1289 { 1290 struct qstr q; 1291 1292 q.name = name; 1293 q.len = strlen(name); 1294 q.hash = full_name_hash(q.name, q.len); 1295 return d_alloc(parent, &q); 1296 } 1297 EXPORT_SYMBOL(d_alloc_name); 1298 1299 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 1300 { 1301 WARN_ON_ONCE(dentry->d_op); 1302 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1303 DCACHE_OP_COMPARE | 1304 DCACHE_OP_REVALIDATE | 1305 DCACHE_OP_DELETE )); 1306 dentry->d_op = op; 1307 if (!op) 1308 return; 1309 if (op->d_hash) 1310 dentry->d_flags |= DCACHE_OP_HASH; 1311 if (op->d_compare) 1312 dentry->d_flags |= DCACHE_OP_COMPARE; 1313 if (op->d_revalidate) 1314 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1315 if (op->d_delete) 1316 dentry->d_flags |= DCACHE_OP_DELETE; 1317 if (op->d_prune) 1318 dentry->d_flags |= DCACHE_OP_PRUNE; 1319 1320 } 1321 EXPORT_SYMBOL(d_set_d_op); 1322 1323 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1324 { 1325 spin_lock(&dentry->d_lock); 1326 if (inode) { 1327 if (unlikely(IS_AUTOMOUNT(inode))) 1328 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1329 list_add(&dentry->d_alias, &inode->i_dentry); 1330 } 1331 dentry->d_inode = inode; 1332 dentry_rcuwalk_barrier(dentry); 1333 spin_unlock(&dentry->d_lock); 1334 fsnotify_d_instantiate(dentry, inode); 1335 } 1336 1337 /** 1338 * d_instantiate - fill in inode information for a dentry 1339 * @entry: dentry to complete 1340 * @inode: inode to attach to this dentry 1341 * 1342 * Fill in inode information in the entry. 1343 * 1344 * This turns negative dentries into productive full members 1345 * of society. 1346 * 1347 * NOTE! This assumes that the inode count has been incremented 1348 * (or otherwise set) by the caller to indicate that it is now 1349 * in use by the dcache. 1350 */ 1351 1352 void d_instantiate(struct dentry *entry, struct inode * inode) 1353 { 1354 BUG_ON(!list_empty(&entry->d_alias)); 1355 if (inode) 1356 spin_lock(&inode->i_lock); 1357 __d_instantiate(entry, inode); 1358 if (inode) 1359 spin_unlock(&inode->i_lock); 1360 security_d_instantiate(entry, inode); 1361 } 1362 EXPORT_SYMBOL(d_instantiate); 1363 1364 /** 1365 * d_instantiate_unique - instantiate a non-aliased dentry 1366 * @entry: dentry to instantiate 1367 * @inode: inode to attach to this dentry 1368 * 1369 * Fill in inode information in the entry. On success, it returns NULL. 1370 * If an unhashed alias of "entry" already exists, then we return the 1371 * aliased dentry instead and drop one reference to inode. 1372 * 1373 * Note that in order to avoid conflicts with rename() etc, the caller 1374 * had better be holding the parent directory semaphore. 1375 * 1376 * This also assumes that the inode count has been incremented 1377 * (or otherwise set) by the caller to indicate that it is now 1378 * in use by the dcache. 1379 */ 1380 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1381 struct inode *inode) 1382 { 1383 struct dentry *alias; 1384 int len = entry->d_name.len; 1385 const char *name = entry->d_name.name; 1386 unsigned int hash = entry->d_name.hash; 1387 1388 if (!inode) { 1389 __d_instantiate(entry, NULL); 1390 return NULL; 1391 } 1392 1393 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1394 struct qstr *qstr = &alias->d_name; 1395 1396 /* 1397 * Don't need alias->d_lock here, because aliases with 1398 * d_parent == entry->d_parent are not subject to name or 1399 * parent changes, because the parent inode i_mutex is held. 1400 */ 1401 if (qstr->hash != hash) 1402 continue; 1403 if (alias->d_parent != entry->d_parent) 1404 continue; 1405 if (dentry_cmp(qstr->name, qstr->len, name, len)) 1406 continue; 1407 __dget(alias); 1408 return alias; 1409 } 1410 1411 __d_instantiate(entry, inode); 1412 return NULL; 1413 } 1414 1415 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1416 { 1417 struct dentry *result; 1418 1419 BUG_ON(!list_empty(&entry->d_alias)); 1420 1421 if (inode) 1422 spin_lock(&inode->i_lock); 1423 result = __d_instantiate_unique(entry, inode); 1424 if (inode) 1425 spin_unlock(&inode->i_lock); 1426 1427 if (!result) { 1428 security_d_instantiate(entry, inode); 1429 return NULL; 1430 } 1431 1432 BUG_ON(!d_unhashed(result)); 1433 iput(inode); 1434 return result; 1435 } 1436 1437 EXPORT_SYMBOL(d_instantiate_unique); 1438 1439 /** 1440 * d_alloc_root - allocate root dentry 1441 * @root_inode: inode to allocate the root for 1442 * 1443 * Allocate a root ("/") dentry for the inode given. The inode is 1444 * instantiated and returned. %NULL is returned if there is insufficient 1445 * memory or the inode passed is %NULL. 1446 */ 1447 1448 struct dentry * d_alloc_root(struct inode * root_inode) 1449 { 1450 struct dentry *res = NULL; 1451 1452 if (root_inode) { 1453 static const struct qstr name = { .name = "/", .len = 1 }; 1454 1455 res = __d_alloc(root_inode->i_sb, &name); 1456 if (res) 1457 d_instantiate(res, root_inode); 1458 } 1459 return res; 1460 } 1461 EXPORT_SYMBOL(d_alloc_root); 1462 1463 static struct dentry * __d_find_any_alias(struct inode *inode) 1464 { 1465 struct dentry *alias; 1466 1467 if (list_empty(&inode->i_dentry)) 1468 return NULL; 1469 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); 1470 __dget(alias); 1471 return alias; 1472 } 1473 1474 static struct dentry * d_find_any_alias(struct inode *inode) 1475 { 1476 struct dentry *de; 1477 1478 spin_lock(&inode->i_lock); 1479 de = __d_find_any_alias(inode); 1480 spin_unlock(&inode->i_lock); 1481 return de; 1482 } 1483 1484 1485 /** 1486 * d_obtain_alias - find or allocate a dentry for a given inode 1487 * @inode: inode to allocate the dentry for 1488 * 1489 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1490 * similar open by handle operations. The returned dentry may be anonymous, 1491 * or may have a full name (if the inode was already in the cache). 1492 * 1493 * When called on a directory inode, we must ensure that the inode only ever 1494 * has one dentry. If a dentry is found, that is returned instead of 1495 * allocating a new one. 1496 * 1497 * On successful return, the reference to the inode has been transferred 1498 * to the dentry. In case of an error the reference on the inode is released. 1499 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1500 * be passed in and will be the error will be propagate to the return value, 1501 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1502 */ 1503 struct dentry *d_obtain_alias(struct inode *inode) 1504 { 1505 static const struct qstr anonstring = { .name = "" }; 1506 struct dentry *tmp; 1507 struct dentry *res; 1508 1509 if (!inode) 1510 return ERR_PTR(-ESTALE); 1511 if (IS_ERR(inode)) 1512 return ERR_CAST(inode); 1513 1514 res = d_find_any_alias(inode); 1515 if (res) 1516 goto out_iput; 1517 1518 tmp = __d_alloc(inode->i_sb, &anonstring); 1519 if (!tmp) { 1520 res = ERR_PTR(-ENOMEM); 1521 goto out_iput; 1522 } 1523 1524 spin_lock(&inode->i_lock); 1525 res = __d_find_any_alias(inode); 1526 if (res) { 1527 spin_unlock(&inode->i_lock); 1528 dput(tmp); 1529 goto out_iput; 1530 } 1531 1532 /* attach a disconnected dentry */ 1533 spin_lock(&tmp->d_lock); 1534 tmp->d_inode = inode; 1535 tmp->d_flags |= DCACHE_DISCONNECTED; 1536 list_add(&tmp->d_alias, &inode->i_dentry); 1537 hlist_bl_lock(&tmp->d_sb->s_anon); 1538 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1539 hlist_bl_unlock(&tmp->d_sb->s_anon); 1540 spin_unlock(&tmp->d_lock); 1541 spin_unlock(&inode->i_lock); 1542 security_d_instantiate(tmp, inode); 1543 1544 return tmp; 1545 1546 out_iput: 1547 if (res && !IS_ERR(res)) 1548 security_d_instantiate(res, inode); 1549 iput(inode); 1550 return res; 1551 } 1552 EXPORT_SYMBOL(d_obtain_alias); 1553 1554 /** 1555 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1556 * @inode: the inode which may have a disconnected dentry 1557 * @dentry: a negative dentry which we want to point to the inode. 1558 * 1559 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1560 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1561 * and return it, else simply d_add the inode to the dentry and return NULL. 1562 * 1563 * This is needed in the lookup routine of any filesystem that is exportable 1564 * (via knfsd) so that we can build dcache paths to directories effectively. 1565 * 1566 * If a dentry was found and moved, then it is returned. Otherwise NULL 1567 * is returned. This matches the expected return value of ->lookup. 1568 * 1569 */ 1570 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1571 { 1572 struct dentry *new = NULL; 1573 1574 if (IS_ERR(inode)) 1575 return ERR_CAST(inode); 1576 1577 if (inode && S_ISDIR(inode->i_mode)) { 1578 spin_lock(&inode->i_lock); 1579 new = __d_find_alias(inode, 1); 1580 if (new) { 1581 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1582 spin_unlock(&inode->i_lock); 1583 security_d_instantiate(new, inode); 1584 d_move(new, dentry); 1585 iput(inode); 1586 } else { 1587 /* already taking inode->i_lock, so d_add() by hand */ 1588 __d_instantiate(dentry, inode); 1589 spin_unlock(&inode->i_lock); 1590 security_d_instantiate(dentry, inode); 1591 d_rehash(dentry); 1592 } 1593 } else 1594 d_add(dentry, inode); 1595 return new; 1596 } 1597 EXPORT_SYMBOL(d_splice_alias); 1598 1599 /** 1600 * d_add_ci - lookup or allocate new dentry with case-exact name 1601 * @inode: the inode case-insensitive lookup has found 1602 * @dentry: the negative dentry that was passed to the parent's lookup func 1603 * @name: the case-exact name to be associated with the returned dentry 1604 * 1605 * This is to avoid filling the dcache with case-insensitive names to the 1606 * same inode, only the actual correct case is stored in the dcache for 1607 * case-insensitive filesystems. 1608 * 1609 * For a case-insensitive lookup match and if the the case-exact dentry 1610 * already exists in in the dcache, use it and return it. 1611 * 1612 * If no entry exists with the exact case name, allocate new dentry with 1613 * the exact case, and return the spliced entry. 1614 */ 1615 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1616 struct qstr *name) 1617 { 1618 int error; 1619 struct dentry *found; 1620 struct dentry *new; 1621 1622 /* 1623 * First check if a dentry matching the name already exists, 1624 * if not go ahead and create it now. 1625 */ 1626 found = d_hash_and_lookup(dentry->d_parent, name); 1627 if (!found) { 1628 new = d_alloc(dentry->d_parent, name); 1629 if (!new) { 1630 error = -ENOMEM; 1631 goto err_out; 1632 } 1633 1634 found = d_splice_alias(inode, new); 1635 if (found) { 1636 dput(new); 1637 return found; 1638 } 1639 return new; 1640 } 1641 1642 /* 1643 * If a matching dentry exists, and it's not negative use it. 1644 * 1645 * Decrement the reference count to balance the iget() done 1646 * earlier on. 1647 */ 1648 if (found->d_inode) { 1649 if (unlikely(found->d_inode != inode)) { 1650 /* This can't happen because bad inodes are unhashed. */ 1651 BUG_ON(!is_bad_inode(inode)); 1652 BUG_ON(!is_bad_inode(found->d_inode)); 1653 } 1654 iput(inode); 1655 return found; 1656 } 1657 1658 /* 1659 * We are going to instantiate this dentry, unhash it and clear the 1660 * lookup flag so we can do that. 1661 */ 1662 if (unlikely(d_need_lookup(found))) 1663 d_clear_need_lookup(found); 1664 1665 /* 1666 * Negative dentry: instantiate it unless the inode is a directory and 1667 * already has a dentry. 1668 */ 1669 new = d_splice_alias(inode, found); 1670 if (new) { 1671 dput(found); 1672 found = new; 1673 } 1674 return found; 1675 1676 err_out: 1677 iput(inode); 1678 return ERR_PTR(error); 1679 } 1680 EXPORT_SYMBOL(d_add_ci); 1681 1682 /** 1683 * __d_lookup_rcu - search for a dentry (racy, store-free) 1684 * @parent: parent dentry 1685 * @name: qstr of name we wish to find 1686 * @seq: returns d_seq value at the point where the dentry was found 1687 * @inode: returns dentry->d_inode when the inode was found valid. 1688 * Returns: dentry, or NULL 1689 * 1690 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 1691 * resolution (store-free path walking) design described in 1692 * Documentation/filesystems/path-lookup.txt. 1693 * 1694 * This is not to be used outside core vfs. 1695 * 1696 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 1697 * held, and rcu_read_lock held. The returned dentry must not be stored into 1698 * without taking d_lock and checking d_seq sequence count against @seq 1699 * returned here. 1700 * 1701 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1702 * function. 1703 * 1704 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1705 * the returned dentry, so long as its parent's seqlock is checked after the 1706 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1707 * is formed, giving integrity down the path walk. 1708 */ 1709 struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1710 unsigned *seq, struct inode **inode) 1711 { 1712 unsigned int len = name->len; 1713 unsigned int hash = name->hash; 1714 const unsigned char *str = name->name; 1715 struct hlist_bl_head *b = d_hash(parent, hash); 1716 struct hlist_bl_node *node; 1717 struct dentry *dentry; 1718 1719 /* 1720 * Note: There is significant duplication with __d_lookup_rcu which is 1721 * required to prevent single threaded performance regressions 1722 * especially on architectures where smp_rmb (in seqcounts) are costly. 1723 * Keep the two functions in sync. 1724 */ 1725 1726 /* 1727 * The hash list is protected using RCU. 1728 * 1729 * Carefully use d_seq when comparing a candidate dentry, to avoid 1730 * races with d_move(). 1731 * 1732 * It is possible that concurrent renames can mess up our list 1733 * walk here and result in missing our dentry, resulting in the 1734 * false-negative result. d_lookup() protects against concurrent 1735 * renames using rename_lock seqlock. 1736 * 1737 * See Documentation/filesystems/path-lookup.txt for more details. 1738 */ 1739 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1740 struct inode *i; 1741 const char *tname; 1742 int tlen; 1743 1744 if (dentry->d_name.hash != hash) 1745 continue; 1746 1747 seqretry: 1748 *seq = read_seqcount_begin(&dentry->d_seq); 1749 if (dentry->d_parent != parent) 1750 continue; 1751 if (d_unhashed(dentry)) 1752 continue; 1753 tlen = dentry->d_name.len; 1754 tname = dentry->d_name.name; 1755 i = dentry->d_inode; 1756 prefetch(tname); 1757 /* 1758 * This seqcount check is required to ensure name and 1759 * len are loaded atomically, so as not to walk off the 1760 * edge of memory when walking. If we could load this 1761 * atomically some other way, we could drop this check. 1762 */ 1763 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1764 goto seqretry; 1765 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1766 if (parent->d_op->d_compare(parent, *inode, 1767 dentry, i, 1768 tlen, tname, name)) 1769 continue; 1770 } else { 1771 if (dentry_cmp(tname, tlen, str, len)) 1772 continue; 1773 } 1774 /* 1775 * No extra seqcount check is required after the name 1776 * compare. The caller must perform a seqcount check in 1777 * order to do anything useful with the returned dentry 1778 * anyway. 1779 */ 1780 *inode = i; 1781 return dentry; 1782 } 1783 return NULL; 1784 } 1785 1786 /** 1787 * d_lookup - search for a dentry 1788 * @parent: parent dentry 1789 * @name: qstr of name we wish to find 1790 * Returns: dentry, or NULL 1791 * 1792 * d_lookup searches the children of the parent dentry for the name in 1793 * question. If the dentry is found its reference count is incremented and the 1794 * dentry is returned. The caller must use dput to free the entry when it has 1795 * finished using it. %NULL is returned if the dentry does not exist. 1796 */ 1797 struct dentry *d_lookup(struct dentry *parent, struct qstr *name) 1798 { 1799 struct dentry *dentry; 1800 unsigned seq; 1801 1802 do { 1803 seq = read_seqbegin(&rename_lock); 1804 dentry = __d_lookup(parent, name); 1805 if (dentry) 1806 break; 1807 } while (read_seqretry(&rename_lock, seq)); 1808 return dentry; 1809 } 1810 EXPORT_SYMBOL(d_lookup); 1811 1812 /** 1813 * __d_lookup - search for a dentry (racy) 1814 * @parent: parent dentry 1815 * @name: qstr of name we wish to find 1816 * Returns: dentry, or NULL 1817 * 1818 * __d_lookup is like d_lookup, however it may (rarely) return a 1819 * false-negative result due to unrelated rename activity. 1820 * 1821 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 1822 * however it must be used carefully, eg. with a following d_lookup in 1823 * the case of failure. 1824 * 1825 * __d_lookup callers must be commented. 1826 */ 1827 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) 1828 { 1829 unsigned int len = name->len; 1830 unsigned int hash = name->hash; 1831 const unsigned char *str = name->name; 1832 struct hlist_bl_head *b = d_hash(parent, hash); 1833 struct hlist_bl_node *node; 1834 struct dentry *found = NULL; 1835 struct dentry *dentry; 1836 1837 /* 1838 * Note: There is significant duplication with __d_lookup_rcu which is 1839 * required to prevent single threaded performance regressions 1840 * especially on architectures where smp_rmb (in seqcounts) are costly. 1841 * Keep the two functions in sync. 1842 */ 1843 1844 /* 1845 * The hash list is protected using RCU. 1846 * 1847 * Take d_lock when comparing a candidate dentry, to avoid races 1848 * with d_move(). 1849 * 1850 * It is possible that concurrent renames can mess up our list 1851 * walk here and result in missing our dentry, resulting in the 1852 * false-negative result. d_lookup() protects against concurrent 1853 * renames using rename_lock seqlock. 1854 * 1855 * See Documentation/filesystems/path-lookup.txt for more details. 1856 */ 1857 rcu_read_lock(); 1858 1859 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1860 const char *tname; 1861 int tlen; 1862 1863 if (dentry->d_name.hash != hash) 1864 continue; 1865 1866 spin_lock(&dentry->d_lock); 1867 if (dentry->d_parent != parent) 1868 goto next; 1869 if (d_unhashed(dentry)) 1870 goto next; 1871 1872 /* 1873 * It is safe to compare names since d_move() cannot 1874 * change the qstr (protected by d_lock). 1875 */ 1876 tlen = dentry->d_name.len; 1877 tname = dentry->d_name.name; 1878 if (parent->d_flags & DCACHE_OP_COMPARE) { 1879 if (parent->d_op->d_compare(parent, parent->d_inode, 1880 dentry, dentry->d_inode, 1881 tlen, tname, name)) 1882 goto next; 1883 } else { 1884 if (dentry_cmp(tname, tlen, str, len)) 1885 goto next; 1886 } 1887 1888 dentry->d_count++; 1889 found = dentry; 1890 spin_unlock(&dentry->d_lock); 1891 break; 1892 next: 1893 spin_unlock(&dentry->d_lock); 1894 } 1895 rcu_read_unlock(); 1896 1897 return found; 1898 } 1899 1900 /** 1901 * d_hash_and_lookup - hash the qstr then search for a dentry 1902 * @dir: Directory to search in 1903 * @name: qstr of name we wish to find 1904 * 1905 * On hash failure or on lookup failure NULL is returned. 1906 */ 1907 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1908 { 1909 struct dentry *dentry = NULL; 1910 1911 /* 1912 * Check for a fs-specific hash function. Note that we must 1913 * calculate the standard hash first, as the d_op->d_hash() 1914 * routine may choose to leave the hash value unchanged. 1915 */ 1916 name->hash = full_name_hash(name->name, name->len); 1917 if (dir->d_flags & DCACHE_OP_HASH) { 1918 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) 1919 goto out; 1920 } 1921 dentry = d_lookup(dir, name); 1922 out: 1923 return dentry; 1924 } 1925 1926 /** 1927 * d_validate - verify dentry provided from insecure source (deprecated) 1928 * @dentry: The dentry alleged to be valid child of @dparent 1929 * @dparent: The parent dentry (known to be valid) 1930 * 1931 * An insecure source has sent us a dentry, here we verify it and dget() it. 1932 * This is used by ncpfs in its readdir implementation. 1933 * Zero is returned in the dentry is invalid. 1934 * 1935 * This function is slow for big directories, and deprecated, do not use it. 1936 */ 1937 int d_validate(struct dentry *dentry, struct dentry *dparent) 1938 { 1939 struct dentry *child; 1940 1941 spin_lock(&dparent->d_lock); 1942 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 1943 if (dentry == child) { 1944 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1945 __dget_dlock(dentry); 1946 spin_unlock(&dentry->d_lock); 1947 spin_unlock(&dparent->d_lock); 1948 return 1; 1949 } 1950 } 1951 spin_unlock(&dparent->d_lock); 1952 1953 return 0; 1954 } 1955 EXPORT_SYMBOL(d_validate); 1956 1957 /* 1958 * When a file is deleted, we have two options: 1959 * - turn this dentry into a negative dentry 1960 * - unhash this dentry and free it. 1961 * 1962 * Usually, we want to just turn this into 1963 * a negative dentry, but if anybody else is 1964 * currently using the dentry or the inode 1965 * we can't do that and we fall back on removing 1966 * it from the hash queues and waiting for 1967 * it to be deleted later when it has no users 1968 */ 1969 1970 /** 1971 * d_delete - delete a dentry 1972 * @dentry: The dentry to delete 1973 * 1974 * Turn the dentry into a negative dentry if possible, otherwise 1975 * remove it from the hash queues so it can be deleted later 1976 */ 1977 1978 void d_delete(struct dentry * dentry) 1979 { 1980 struct inode *inode; 1981 int isdir = 0; 1982 /* 1983 * Are we the only user? 1984 */ 1985 again: 1986 spin_lock(&dentry->d_lock); 1987 inode = dentry->d_inode; 1988 isdir = S_ISDIR(inode->i_mode); 1989 if (dentry->d_count == 1) { 1990 if (inode && !spin_trylock(&inode->i_lock)) { 1991 spin_unlock(&dentry->d_lock); 1992 cpu_relax(); 1993 goto again; 1994 } 1995 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 1996 dentry_unlink_inode(dentry); 1997 fsnotify_nameremove(dentry, isdir); 1998 return; 1999 } 2000 2001 if (!d_unhashed(dentry)) 2002 __d_drop(dentry); 2003 2004 spin_unlock(&dentry->d_lock); 2005 2006 fsnotify_nameremove(dentry, isdir); 2007 } 2008 EXPORT_SYMBOL(d_delete); 2009 2010 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) 2011 { 2012 BUG_ON(!d_unhashed(entry)); 2013 hlist_bl_lock(b); 2014 entry->d_flags |= DCACHE_RCUACCESS; 2015 hlist_bl_add_head_rcu(&entry->d_hash, b); 2016 hlist_bl_unlock(b); 2017 } 2018 2019 static void _d_rehash(struct dentry * entry) 2020 { 2021 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 2022 } 2023 2024 /** 2025 * d_rehash - add an entry back to the hash 2026 * @entry: dentry to add to the hash 2027 * 2028 * Adds a dentry to the hash according to its name. 2029 */ 2030 2031 void d_rehash(struct dentry * entry) 2032 { 2033 spin_lock(&entry->d_lock); 2034 _d_rehash(entry); 2035 spin_unlock(&entry->d_lock); 2036 } 2037 EXPORT_SYMBOL(d_rehash); 2038 2039 /** 2040 * dentry_update_name_case - update case insensitive dentry with a new name 2041 * @dentry: dentry to be updated 2042 * @name: new name 2043 * 2044 * Update a case insensitive dentry with new case of name. 2045 * 2046 * dentry must have been returned by d_lookup with name @name. Old and new 2047 * name lengths must match (ie. no d_compare which allows mismatched name 2048 * lengths). 2049 * 2050 * Parent inode i_mutex must be held over d_lookup and into this call (to 2051 * keep renames and concurrent inserts, and readdir(2) away). 2052 */ 2053 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2054 { 2055 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex)); 2056 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2057 2058 spin_lock(&dentry->d_lock); 2059 write_seqcount_begin(&dentry->d_seq); 2060 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 2061 write_seqcount_end(&dentry->d_seq); 2062 spin_unlock(&dentry->d_lock); 2063 } 2064 EXPORT_SYMBOL(dentry_update_name_case); 2065 2066 static void switch_names(struct dentry *dentry, struct dentry *target) 2067 { 2068 if (dname_external(target)) { 2069 if (dname_external(dentry)) { 2070 /* 2071 * Both external: swap the pointers 2072 */ 2073 swap(target->d_name.name, dentry->d_name.name); 2074 } else { 2075 /* 2076 * dentry:internal, target:external. Steal target's 2077 * storage and make target internal. 2078 */ 2079 memcpy(target->d_iname, dentry->d_name.name, 2080 dentry->d_name.len + 1); 2081 dentry->d_name.name = target->d_name.name; 2082 target->d_name.name = target->d_iname; 2083 } 2084 } else { 2085 if (dname_external(dentry)) { 2086 /* 2087 * dentry:external, target:internal. Give dentry's 2088 * storage to target and make dentry internal 2089 */ 2090 memcpy(dentry->d_iname, target->d_name.name, 2091 target->d_name.len + 1); 2092 target->d_name.name = dentry->d_name.name; 2093 dentry->d_name.name = dentry->d_iname; 2094 } else { 2095 /* 2096 * Both are internal. Just copy target to dentry 2097 */ 2098 memcpy(dentry->d_iname, target->d_name.name, 2099 target->d_name.len + 1); 2100 dentry->d_name.len = target->d_name.len; 2101 return; 2102 } 2103 } 2104 swap(dentry->d_name.len, target->d_name.len); 2105 } 2106 2107 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) 2108 { 2109 /* 2110 * XXXX: do we really need to take target->d_lock? 2111 */ 2112 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) 2113 spin_lock(&target->d_parent->d_lock); 2114 else { 2115 if (d_ancestor(dentry->d_parent, target->d_parent)) { 2116 spin_lock(&dentry->d_parent->d_lock); 2117 spin_lock_nested(&target->d_parent->d_lock, 2118 DENTRY_D_LOCK_NESTED); 2119 } else { 2120 spin_lock(&target->d_parent->d_lock); 2121 spin_lock_nested(&dentry->d_parent->d_lock, 2122 DENTRY_D_LOCK_NESTED); 2123 } 2124 } 2125 if (target < dentry) { 2126 spin_lock_nested(&target->d_lock, 2); 2127 spin_lock_nested(&dentry->d_lock, 3); 2128 } else { 2129 spin_lock_nested(&dentry->d_lock, 2); 2130 spin_lock_nested(&target->d_lock, 3); 2131 } 2132 } 2133 2134 static void dentry_unlock_parents_for_move(struct dentry *dentry, 2135 struct dentry *target) 2136 { 2137 if (target->d_parent != dentry->d_parent) 2138 spin_unlock(&dentry->d_parent->d_lock); 2139 if (target->d_parent != target) 2140 spin_unlock(&target->d_parent->d_lock); 2141 } 2142 2143 /* 2144 * When switching names, the actual string doesn't strictly have to 2145 * be preserved in the target - because we're dropping the target 2146 * anyway. As such, we can just do a simple memcpy() to copy over 2147 * the new name before we switch. 2148 * 2149 * Note that we have to be a lot more careful about getting the hash 2150 * switched - we have to switch the hash value properly even if it 2151 * then no longer matches the actual (corrupted) string of the target. 2152 * The hash value has to match the hash queue that the dentry is on.. 2153 */ 2154 /* 2155 * __d_move - move a dentry 2156 * @dentry: entry to move 2157 * @target: new dentry 2158 * 2159 * Update the dcache to reflect the move of a file name. Negative 2160 * dcache entries should not be moved in this way. Caller must hold 2161 * rename_lock, the i_mutex of the source and target directories, 2162 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). 2163 */ 2164 static void __d_move(struct dentry * dentry, struct dentry * target) 2165 { 2166 if (!dentry->d_inode) 2167 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2168 2169 BUG_ON(d_ancestor(dentry, target)); 2170 BUG_ON(d_ancestor(target, dentry)); 2171 2172 dentry_lock_for_move(dentry, target); 2173 2174 write_seqcount_begin(&dentry->d_seq); 2175 write_seqcount_begin(&target->d_seq); 2176 2177 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2178 2179 /* 2180 * Move the dentry to the target hash queue. Don't bother checking 2181 * for the same hash queue because of how unlikely it is. 2182 */ 2183 __d_drop(dentry); 2184 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 2185 2186 /* Unhash the target: dput() will then get rid of it */ 2187 __d_drop(target); 2188 2189 list_del(&dentry->d_u.d_child); 2190 list_del(&target->d_u.d_child); 2191 2192 /* Switch the names.. */ 2193 switch_names(dentry, target); 2194 swap(dentry->d_name.hash, target->d_name.hash); 2195 2196 /* ... and switch the parents */ 2197 if (IS_ROOT(dentry)) { 2198 dentry->d_parent = target->d_parent; 2199 target->d_parent = target; 2200 INIT_LIST_HEAD(&target->d_u.d_child); 2201 } else { 2202 swap(dentry->d_parent, target->d_parent); 2203 2204 /* And add them back to the (new) parent lists */ 2205 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 2206 } 2207 2208 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2209 2210 write_seqcount_end(&target->d_seq); 2211 write_seqcount_end(&dentry->d_seq); 2212 2213 dentry_unlock_parents_for_move(dentry, target); 2214 spin_unlock(&target->d_lock); 2215 fsnotify_d_move(dentry); 2216 spin_unlock(&dentry->d_lock); 2217 } 2218 2219 /* 2220 * d_move - move a dentry 2221 * @dentry: entry to move 2222 * @target: new dentry 2223 * 2224 * Update the dcache to reflect the move of a file name. Negative 2225 * dcache entries should not be moved in this way. See the locking 2226 * requirements for __d_move. 2227 */ 2228 void d_move(struct dentry *dentry, struct dentry *target) 2229 { 2230 write_seqlock(&rename_lock); 2231 __d_move(dentry, target); 2232 write_sequnlock(&rename_lock); 2233 } 2234 EXPORT_SYMBOL(d_move); 2235 2236 /** 2237 * d_ancestor - search for an ancestor 2238 * @p1: ancestor dentry 2239 * @p2: child dentry 2240 * 2241 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 2242 * an ancestor of p2, else NULL. 2243 */ 2244 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 2245 { 2246 struct dentry *p; 2247 2248 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 2249 if (p->d_parent == p1) 2250 return p; 2251 } 2252 return NULL; 2253 } 2254 2255 /* 2256 * This helper attempts to cope with remotely renamed directories 2257 * 2258 * It assumes that the caller is already holding 2259 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock 2260 * 2261 * Note: If ever the locking in lock_rename() changes, then please 2262 * remember to update this too... 2263 */ 2264 static struct dentry *__d_unalias(struct inode *inode, 2265 struct dentry *dentry, struct dentry *alias) 2266 { 2267 struct mutex *m1 = NULL, *m2 = NULL; 2268 struct dentry *ret; 2269 2270 /* If alias and dentry share a parent, then no extra locks required */ 2271 if (alias->d_parent == dentry->d_parent) 2272 goto out_unalias; 2273 2274 /* See lock_rename() */ 2275 ret = ERR_PTR(-EBUSY); 2276 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2277 goto out_err; 2278 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2279 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2280 goto out_err; 2281 m2 = &alias->d_parent->d_inode->i_mutex; 2282 out_unalias: 2283 __d_move(alias, dentry); 2284 ret = alias; 2285 out_err: 2286 spin_unlock(&inode->i_lock); 2287 if (m2) 2288 mutex_unlock(m2); 2289 if (m1) 2290 mutex_unlock(m1); 2291 return ret; 2292 } 2293 2294 /* 2295 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2296 * named dentry in place of the dentry to be replaced. 2297 * returns with anon->d_lock held! 2298 */ 2299 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2300 { 2301 struct dentry *dparent, *aparent; 2302 2303 dentry_lock_for_move(anon, dentry); 2304 2305 write_seqcount_begin(&dentry->d_seq); 2306 write_seqcount_begin(&anon->d_seq); 2307 2308 dparent = dentry->d_parent; 2309 aparent = anon->d_parent; 2310 2311 switch_names(dentry, anon); 2312 swap(dentry->d_name.hash, anon->d_name.hash); 2313 2314 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2315 list_del(&dentry->d_u.d_child); 2316 if (!IS_ROOT(dentry)) 2317 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2318 else 2319 INIT_LIST_HEAD(&dentry->d_u.d_child); 2320 2321 anon->d_parent = (dparent == dentry) ? anon : dparent; 2322 list_del(&anon->d_u.d_child); 2323 if (!IS_ROOT(anon)) 2324 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 2325 else 2326 INIT_LIST_HEAD(&anon->d_u.d_child); 2327 2328 write_seqcount_end(&dentry->d_seq); 2329 write_seqcount_end(&anon->d_seq); 2330 2331 dentry_unlock_parents_for_move(anon, dentry); 2332 spin_unlock(&dentry->d_lock); 2333 2334 /* anon->d_lock still locked, returns locked */ 2335 anon->d_flags &= ~DCACHE_DISCONNECTED; 2336 } 2337 2338 /** 2339 * d_materialise_unique - introduce an inode into the tree 2340 * @dentry: candidate dentry 2341 * @inode: inode to bind to the dentry, to which aliases may be attached 2342 * 2343 * Introduces an dentry into the tree, substituting an extant disconnected 2344 * root directory alias in its place if there is one. Caller must hold the 2345 * i_mutex of the parent directory. 2346 */ 2347 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2348 { 2349 struct dentry *actual; 2350 2351 BUG_ON(!d_unhashed(dentry)); 2352 2353 if (!inode) { 2354 actual = dentry; 2355 __d_instantiate(dentry, NULL); 2356 d_rehash(actual); 2357 goto out_nolock; 2358 } 2359 2360 spin_lock(&inode->i_lock); 2361 2362 if (S_ISDIR(inode->i_mode)) { 2363 struct dentry *alias; 2364 2365 /* Does an aliased dentry already exist? */ 2366 alias = __d_find_alias(inode, 0); 2367 if (alias) { 2368 actual = alias; 2369 write_seqlock(&rename_lock); 2370 2371 if (d_ancestor(alias, dentry)) { 2372 /* Check for loops */ 2373 actual = ERR_PTR(-ELOOP); 2374 } else if (IS_ROOT(alias)) { 2375 /* Is this an anonymous mountpoint that we 2376 * could splice into our tree? */ 2377 __d_materialise_dentry(dentry, alias); 2378 write_sequnlock(&rename_lock); 2379 __d_drop(alias); 2380 goto found; 2381 } else { 2382 /* Nope, but we must(!) avoid directory 2383 * aliasing */ 2384 actual = __d_unalias(inode, dentry, alias); 2385 } 2386 write_sequnlock(&rename_lock); 2387 if (IS_ERR(actual)) { 2388 if (PTR_ERR(actual) == -ELOOP) 2389 pr_warn_ratelimited( 2390 "VFS: Lookup of '%s' in %s %s" 2391 " would have caused loop\n", 2392 dentry->d_name.name, 2393 inode->i_sb->s_type->name, 2394 inode->i_sb->s_id); 2395 dput(alias); 2396 } 2397 goto out_nolock; 2398 } 2399 } 2400 2401 /* Add a unique reference */ 2402 actual = __d_instantiate_unique(dentry, inode); 2403 if (!actual) 2404 actual = dentry; 2405 else 2406 BUG_ON(!d_unhashed(actual)); 2407 2408 spin_lock(&actual->d_lock); 2409 found: 2410 _d_rehash(actual); 2411 spin_unlock(&actual->d_lock); 2412 spin_unlock(&inode->i_lock); 2413 out_nolock: 2414 if (actual == dentry) { 2415 security_d_instantiate(dentry, inode); 2416 return NULL; 2417 } 2418 2419 iput(inode); 2420 return actual; 2421 } 2422 EXPORT_SYMBOL_GPL(d_materialise_unique); 2423 2424 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2425 { 2426 *buflen -= namelen; 2427 if (*buflen < 0) 2428 return -ENAMETOOLONG; 2429 *buffer -= namelen; 2430 memcpy(*buffer, str, namelen); 2431 return 0; 2432 } 2433 2434 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2435 { 2436 return prepend(buffer, buflen, name->name, name->len); 2437 } 2438 2439 /** 2440 * prepend_path - Prepend path string to a buffer 2441 * @path: the dentry/vfsmount to report 2442 * @root: root vfsmnt/dentry (may be modified by this function) 2443 * @buffer: pointer to the end of the buffer 2444 * @buflen: pointer to buffer length 2445 * 2446 * Caller holds the rename_lock. 2447 * 2448 * If path is not reachable from the supplied root, then the value of 2449 * root is changed (without modifying refcounts). 2450 */ 2451 static int prepend_path(const struct path *path, struct path *root, 2452 char **buffer, int *buflen) 2453 { 2454 struct dentry *dentry = path->dentry; 2455 struct vfsmount *vfsmnt = path->mnt; 2456 bool slash = false; 2457 int error = 0; 2458 2459 br_read_lock(vfsmount_lock); 2460 while (dentry != root->dentry || vfsmnt != root->mnt) { 2461 struct dentry * parent; 2462 2463 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2464 /* Global root? */ 2465 if (vfsmnt->mnt_parent == vfsmnt) { 2466 goto global_root; 2467 } 2468 dentry = vfsmnt->mnt_mountpoint; 2469 vfsmnt = vfsmnt->mnt_parent; 2470 continue; 2471 } 2472 parent = dentry->d_parent; 2473 prefetch(parent); 2474 spin_lock(&dentry->d_lock); 2475 error = prepend_name(buffer, buflen, &dentry->d_name); 2476 spin_unlock(&dentry->d_lock); 2477 if (!error) 2478 error = prepend(buffer, buflen, "/", 1); 2479 if (error) 2480 break; 2481 2482 slash = true; 2483 dentry = parent; 2484 } 2485 2486 out: 2487 if (!error && !slash) 2488 error = prepend(buffer, buflen, "/", 1); 2489 2490 br_read_unlock(vfsmount_lock); 2491 return error; 2492 2493 global_root: 2494 /* 2495 * Filesystems needing to implement special "root names" 2496 * should do so with ->d_dname() 2497 */ 2498 if (IS_ROOT(dentry) && 2499 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { 2500 WARN(1, "Root dentry has weird name <%.*s>\n", 2501 (int) dentry->d_name.len, dentry->d_name.name); 2502 } 2503 root->mnt = vfsmnt; 2504 root->dentry = dentry; 2505 goto out; 2506 } 2507 2508 /** 2509 * __d_path - return the path of a dentry 2510 * @path: the dentry/vfsmount to report 2511 * @root: root vfsmnt/dentry (may be modified by this function) 2512 * @buf: buffer to return value in 2513 * @buflen: buffer length 2514 * 2515 * Convert a dentry into an ASCII path name. 2516 * 2517 * Returns a pointer into the buffer or an error code if the 2518 * path was too long. 2519 * 2520 * "buflen" should be positive. 2521 * 2522 * If path is not reachable from the supplied root, then the value of 2523 * root is changed (without modifying refcounts). 2524 */ 2525 char *__d_path(const struct path *path, struct path *root, 2526 char *buf, int buflen) 2527 { 2528 char *res = buf + buflen; 2529 int error; 2530 2531 prepend(&res, &buflen, "\0", 1); 2532 write_seqlock(&rename_lock); 2533 error = prepend_path(path, root, &res, &buflen); 2534 write_sequnlock(&rename_lock); 2535 2536 if (error) 2537 return ERR_PTR(error); 2538 return res; 2539 } 2540 2541 /* 2542 * same as __d_path but appends "(deleted)" for unlinked files. 2543 */ 2544 static int path_with_deleted(const struct path *path, struct path *root, 2545 char **buf, int *buflen) 2546 { 2547 prepend(buf, buflen, "\0", 1); 2548 if (d_unlinked(path->dentry)) { 2549 int error = prepend(buf, buflen, " (deleted)", 10); 2550 if (error) 2551 return error; 2552 } 2553 2554 return prepend_path(path, root, buf, buflen); 2555 } 2556 2557 static int prepend_unreachable(char **buffer, int *buflen) 2558 { 2559 return prepend(buffer, buflen, "(unreachable)", 13); 2560 } 2561 2562 /** 2563 * d_path - return the path of a dentry 2564 * @path: path to report 2565 * @buf: buffer to return value in 2566 * @buflen: buffer length 2567 * 2568 * Convert a dentry into an ASCII path name. If the entry has been deleted 2569 * the string " (deleted)" is appended. Note that this is ambiguous. 2570 * 2571 * Returns a pointer into the buffer or an error code if the path was 2572 * too long. Note: Callers should use the returned pointer, not the passed 2573 * in buffer, to use the name! The implementation often starts at an offset 2574 * into the buffer, and may leave 0 bytes at the start. 2575 * 2576 * "buflen" should be positive. 2577 */ 2578 char *d_path(const struct path *path, char *buf, int buflen) 2579 { 2580 char *res = buf + buflen; 2581 struct path root; 2582 struct path tmp; 2583 int error; 2584 2585 /* 2586 * We have various synthetic filesystems that never get mounted. On 2587 * these filesystems dentries are never used for lookup purposes, and 2588 * thus don't need to be hashed. They also don't need a name until a 2589 * user wants to identify the object in /proc/pid/fd/. The little hack 2590 * below allows us to generate a name for these objects on demand: 2591 */ 2592 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2593 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2594 2595 get_fs_root(current->fs, &root); 2596 write_seqlock(&rename_lock); 2597 tmp = root; 2598 error = path_with_deleted(path, &tmp, &res, &buflen); 2599 if (error) 2600 res = ERR_PTR(error); 2601 write_sequnlock(&rename_lock); 2602 path_put(&root); 2603 return res; 2604 } 2605 EXPORT_SYMBOL(d_path); 2606 2607 /** 2608 * d_path_with_unreachable - return the path of a dentry 2609 * @path: path to report 2610 * @buf: buffer to return value in 2611 * @buflen: buffer length 2612 * 2613 * The difference from d_path() is that this prepends "(unreachable)" 2614 * to paths which are unreachable from the current process' root. 2615 */ 2616 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) 2617 { 2618 char *res = buf + buflen; 2619 struct path root; 2620 struct path tmp; 2621 int error; 2622 2623 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2624 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2625 2626 get_fs_root(current->fs, &root); 2627 write_seqlock(&rename_lock); 2628 tmp = root; 2629 error = path_with_deleted(path, &tmp, &res, &buflen); 2630 if (!error && !path_equal(&tmp, &root)) 2631 error = prepend_unreachable(&res, &buflen); 2632 write_sequnlock(&rename_lock); 2633 path_put(&root); 2634 if (error) 2635 res = ERR_PTR(error); 2636 2637 return res; 2638 } 2639 2640 /* 2641 * Helper function for dentry_operations.d_dname() members 2642 */ 2643 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2644 const char *fmt, ...) 2645 { 2646 va_list args; 2647 char temp[64]; 2648 int sz; 2649 2650 va_start(args, fmt); 2651 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2652 va_end(args); 2653 2654 if (sz > sizeof(temp) || sz > buflen) 2655 return ERR_PTR(-ENAMETOOLONG); 2656 2657 buffer += buflen - sz; 2658 return memcpy(buffer, temp, sz); 2659 } 2660 2661 /* 2662 * Write full pathname from the root of the filesystem into the buffer. 2663 */ 2664 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2665 { 2666 char *end = buf + buflen; 2667 char *retval; 2668 2669 prepend(&end, &buflen, "\0", 1); 2670 if (buflen < 1) 2671 goto Elong; 2672 /* Get '/' right */ 2673 retval = end-1; 2674 *retval = '/'; 2675 2676 while (!IS_ROOT(dentry)) { 2677 struct dentry *parent = dentry->d_parent; 2678 int error; 2679 2680 prefetch(parent); 2681 spin_lock(&dentry->d_lock); 2682 error = prepend_name(&end, &buflen, &dentry->d_name); 2683 spin_unlock(&dentry->d_lock); 2684 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) 2685 goto Elong; 2686 2687 retval = end; 2688 dentry = parent; 2689 } 2690 return retval; 2691 Elong: 2692 return ERR_PTR(-ENAMETOOLONG); 2693 } 2694 2695 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2696 { 2697 char *retval; 2698 2699 write_seqlock(&rename_lock); 2700 retval = __dentry_path(dentry, buf, buflen); 2701 write_sequnlock(&rename_lock); 2702 2703 return retval; 2704 } 2705 EXPORT_SYMBOL(dentry_path_raw); 2706 2707 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2708 { 2709 char *p = NULL; 2710 char *retval; 2711 2712 write_seqlock(&rename_lock); 2713 if (d_unlinked(dentry)) { 2714 p = buf + buflen; 2715 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2716 goto Elong; 2717 buflen++; 2718 } 2719 retval = __dentry_path(dentry, buf, buflen); 2720 write_sequnlock(&rename_lock); 2721 if (!IS_ERR(retval) && p) 2722 *p = '/'; /* restore '/' overriden with '\0' */ 2723 return retval; 2724 Elong: 2725 return ERR_PTR(-ENAMETOOLONG); 2726 } 2727 2728 /* 2729 * NOTE! The user-level library version returns a 2730 * character pointer. The kernel system call just 2731 * returns the length of the buffer filled (which 2732 * includes the ending '\0' character), or a negative 2733 * error value. So libc would do something like 2734 * 2735 * char *getcwd(char * buf, size_t size) 2736 * { 2737 * int retval; 2738 * 2739 * retval = sys_getcwd(buf, size); 2740 * if (retval >= 0) 2741 * return buf; 2742 * errno = -retval; 2743 * return NULL; 2744 * } 2745 */ 2746 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 2747 { 2748 int error; 2749 struct path pwd, root; 2750 char *page = (char *) __get_free_page(GFP_USER); 2751 2752 if (!page) 2753 return -ENOMEM; 2754 2755 get_fs_root_and_pwd(current->fs, &root, &pwd); 2756 2757 error = -ENOENT; 2758 write_seqlock(&rename_lock); 2759 if (!d_unlinked(pwd.dentry)) { 2760 unsigned long len; 2761 struct path tmp = root; 2762 char *cwd = page + PAGE_SIZE; 2763 int buflen = PAGE_SIZE; 2764 2765 prepend(&cwd, &buflen, "\0", 1); 2766 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2767 write_sequnlock(&rename_lock); 2768 2769 if (error) 2770 goto out; 2771 2772 /* Unreachable from current root */ 2773 if (!path_equal(&tmp, &root)) { 2774 error = prepend_unreachable(&cwd, &buflen); 2775 if (error) 2776 goto out; 2777 } 2778 2779 error = -ERANGE; 2780 len = PAGE_SIZE + page - cwd; 2781 if (len <= size) { 2782 error = len; 2783 if (copy_to_user(buf, cwd, len)) 2784 error = -EFAULT; 2785 } 2786 } else { 2787 write_sequnlock(&rename_lock); 2788 } 2789 2790 out: 2791 path_put(&pwd); 2792 path_put(&root); 2793 free_page((unsigned long) page); 2794 return error; 2795 } 2796 2797 /* 2798 * Test whether new_dentry is a subdirectory of old_dentry. 2799 * 2800 * Trivially implemented using the dcache structure 2801 */ 2802 2803 /** 2804 * is_subdir - is new dentry a subdirectory of old_dentry 2805 * @new_dentry: new dentry 2806 * @old_dentry: old dentry 2807 * 2808 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 2809 * Returns 0 otherwise. 2810 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2811 */ 2812 2813 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2814 { 2815 int result; 2816 unsigned seq; 2817 2818 if (new_dentry == old_dentry) 2819 return 1; 2820 2821 do { 2822 /* for restarting inner loop in case of seq retry */ 2823 seq = read_seqbegin(&rename_lock); 2824 /* 2825 * Need rcu_readlock to protect against the d_parent trashing 2826 * due to d_move 2827 */ 2828 rcu_read_lock(); 2829 if (d_ancestor(old_dentry, new_dentry)) 2830 result = 1; 2831 else 2832 result = 0; 2833 rcu_read_unlock(); 2834 } while (read_seqretry(&rename_lock, seq)); 2835 2836 return result; 2837 } 2838 2839 int path_is_under(struct path *path1, struct path *path2) 2840 { 2841 struct vfsmount *mnt = path1->mnt; 2842 struct dentry *dentry = path1->dentry; 2843 int res; 2844 2845 br_read_lock(vfsmount_lock); 2846 if (mnt != path2->mnt) { 2847 for (;;) { 2848 if (mnt->mnt_parent == mnt) { 2849 br_read_unlock(vfsmount_lock); 2850 return 0; 2851 } 2852 if (mnt->mnt_parent == path2->mnt) 2853 break; 2854 mnt = mnt->mnt_parent; 2855 } 2856 dentry = mnt->mnt_mountpoint; 2857 } 2858 res = is_subdir(dentry, path2->dentry); 2859 br_read_unlock(vfsmount_lock); 2860 return res; 2861 } 2862 EXPORT_SYMBOL(path_is_under); 2863 2864 void d_genocide(struct dentry *root) 2865 { 2866 struct dentry *this_parent; 2867 struct list_head *next; 2868 unsigned seq; 2869 int locked = 0; 2870 2871 seq = read_seqbegin(&rename_lock); 2872 again: 2873 this_parent = root; 2874 spin_lock(&this_parent->d_lock); 2875 repeat: 2876 next = this_parent->d_subdirs.next; 2877 resume: 2878 while (next != &this_parent->d_subdirs) { 2879 struct list_head *tmp = next; 2880 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2881 next = tmp->next; 2882 2883 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2884 if (d_unhashed(dentry) || !dentry->d_inode) { 2885 spin_unlock(&dentry->d_lock); 2886 continue; 2887 } 2888 if (!list_empty(&dentry->d_subdirs)) { 2889 spin_unlock(&this_parent->d_lock); 2890 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 2891 this_parent = dentry; 2892 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 2893 goto repeat; 2894 } 2895 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 2896 dentry->d_flags |= DCACHE_GENOCIDE; 2897 dentry->d_count--; 2898 } 2899 spin_unlock(&dentry->d_lock); 2900 } 2901 if (this_parent != root) { 2902 struct dentry *child = this_parent; 2903 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2904 this_parent->d_flags |= DCACHE_GENOCIDE; 2905 this_parent->d_count--; 2906 } 2907 this_parent = try_to_ascend(this_parent, locked, seq); 2908 if (!this_parent) 2909 goto rename_retry; 2910 next = child->d_u.d_child.next; 2911 goto resume; 2912 } 2913 spin_unlock(&this_parent->d_lock); 2914 if (!locked && read_seqretry(&rename_lock, seq)) 2915 goto rename_retry; 2916 if (locked) 2917 write_sequnlock(&rename_lock); 2918 return; 2919 2920 rename_retry: 2921 locked = 1; 2922 write_seqlock(&rename_lock); 2923 goto again; 2924 } 2925 2926 /** 2927 * find_inode_number - check for dentry with name 2928 * @dir: directory to check 2929 * @name: Name to find. 2930 * 2931 * Check whether a dentry already exists for the given name, 2932 * and return the inode number if it has an inode. Otherwise 2933 * 0 is returned. 2934 * 2935 * This routine is used to post-process directory listings for 2936 * filesystems using synthetic inode numbers, and is necessary 2937 * to keep getcwd() working. 2938 */ 2939 2940 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 2941 { 2942 struct dentry * dentry; 2943 ino_t ino = 0; 2944 2945 dentry = d_hash_and_lookup(dir, name); 2946 if (dentry) { 2947 if (dentry->d_inode) 2948 ino = dentry->d_inode->i_ino; 2949 dput(dentry); 2950 } 2951 return ino; 2952 } 2953 EXPORT_SYMBOL(find_inode_number); 2954 2955 static __initdata unsigned long dhash_entries; 2956 static int __init set_dhash_entries(char *str) 2957 { 2958 if (!str) 2959 return 0; 2960 dhash_entries = simple_strtoul(str, &str, 0); 2961 return 1; 2962 } 2963 __setup("dhash_entries=", set_dhash_entries); 2964 2965 static void __init dcache_init_early(void) 2966 { 2967 int loop; 2968 2969 /* If hashes are distributed across NUMA nodes, defer 2970 * hash allocation until vmalloc space is available. 2971 */ 2972 if (hashdist) 2973 return; 2974 2975 dentry_hashtable = 2976 alloc_large_system_hash("Dentry cache", 2977 sizeof(struct hlist_bl_head), 2978 dhash_entries, 2979 13, 2980 HASH_EARLY, 2981 &d_hash_shift, 2982 &d_hash_mask, 2983 0); 2984 2985 for (loop = 0; loop < (1 << d_hash_shift); loop++) 2986 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 2987 } 2988 2989 static void __init dcache_init(void) 2990 { 2991 int loop; 2992 2993 /* 2994 * A constructor could be added for stable state like the lists, 2995 * but it is probably not worth it because of the cache nature 2996 * of the dcache. 2997 */ 2998 dentry_cache = KMEM_CACHE(dentry, 2999 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3000 3001 /* Hash may have been set up in dcache_init_early */ 3002 if (!hashdist) 3003 return; 3004 3005 dentry_hashtable = 3006 alloc_large_system_hash("Dentry cache", 3007 sizeof(struct hlist_bl_head), 3008 dhash_entries, 3009 13, 3010 0, 3011 &d_hash_shift, 3012 &d_hash_mask, 3013 0); 3014 3015 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3016 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3017 } 3018 3019 /* SLAB cache for __getname() consumers */ 3020 struct kmem_cache *names_cachep __read_mostly; 3021 EXPORT_SYMBOL(names_cachep); 3022 3023 EXPORT_SYMBOL(d_genocide); 3024 3025 void __init vfs_caches_init_early(void) 3026 { 3027 dcache_init_early(); 3028 inode_init_early(); 3029 } 3030 3031 void __init vfs_caches_init(unsigned long mempages) 3032 { 3033 unsigned long reserve; 3034 3035 /* Base hash sizes on available memory, with a reserve equal to 3036 150% of current kernel size */ 3037 3038 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3039 mempages -= reserve; 3040 3041 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3042 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3043 3044 dcache_init(); 3045 inode_init(); 3046 files_init(mempages); 3047 mnt_init(); 3048 bdev_cache_init(); 3049 chrdev_init(); 3050 } 3051