1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/module.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include <linux/fs_struct.h> 35 #include <linux/hardirq.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/rculist_bl.h> 38 #include "internal.h" 39 40 /* 41 * Usage: 42 * dcache->d_inode->i_lock protects: 43 * - i_dentry, d_alias, d_inode of aliases 44 * dcache_hash_bucket lock protects: 45 * - the dcache hash table 46 * s_anon bl list spinlock protects: 47 * - the s_anon list (see __d_drop) 48 * dcache_lru_lock protects: 49 * - the dcache lru lists and counters 50 * d_lock protects: 51 * - d_flags 52 * - d_name 53 * - d_lru 54 * - d_count 55 * - d_unhashed() 56 * - d_parent and d_subdirs 57 * - childrens' d_child and d_parent 58 * - d_alias, d_inode 59 * 60 * Ordering: 61 * dentry->d_inode->i_lock 62 * dentry->d_lock 63 * dcache_lru_lock 64 * dcache_hash_bucket lock 65 * s_anon lock 66 * 67 * If there is an ancestor relationship: 68 * dentry->d_parent->...->d_parent->d_lock 69 * ... 70 * dentry->d_parent->d_lock 71 * dentry->d_lock 72 * 73 * If no ancestor relationship: 74 * if (dentry1 < dentry2) 75 * dentry1->d_lock 76 * dentry2->d_lock 77 */ 78 int sysctl_vfs_cache_pressure __read_mostly = 100; 79 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 80 81 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 82 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 83 84 EXPORT_SYMBOL(rename_lock); 85 86 static struct kmem_cache *dentry_cache __read_mostly; 87 88 /* 89 * This is the single most critical data structure when it comes 90 * to the dcache: the hashtable for lookups. Somebody should try 91 * to make this good - I've just made it work. 92 * 93 * This hash-function tries to avoid losing too many bits of hash 94 * information, yet avoid using a prime hash-size or similar. 95 */ 96 #define D_HASHBITS d_hash_shift 97 #define D_HASHMASK d_hash_mask 98 99 static unsigned int d_hash_mask __read_mostly; 100 static unsigned int d_hash_shift __read_mostly; 101 102 struct dcache_hash_bucket { 103 struct hlist_bl_head head; 104 }; 105 static struct dcache_hash_bucket *dentry_hashtable __read_mostly; 106 107 static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, 108 unsigned long hash) 109 { 110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 112 return dentry_hashtable + (hash & D_HASHMASK); 113 } 114 115 static inline void spin_lock_bucket(struct dcache_hash_bucket *b) 116 { 117 bit_spin_lock(0, (unsigned long *)&b->head.first); 118 } 119 120 static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) 121 { 122 __bit_spin_unlock(0, (unsigned long *)&b->head.first); 123 } 124 125 /* Statistics gathering. */ 126 struct dentry_stat_t dentry_stat = { 127 .age_limit = 45, 128 }; 129 130 static DEFINE_PER_CPU(unsigned int, nr_dentry); 131 132 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 133 static int get_nr_dentry(void) 134 { 135 int i; 136 int sum = 0; 137 for_each_possible_cpu(i) 138 sum += per_cpu(nr_dentry, i); 139 return sum < 0 ? 0 : sum; 140 } 141 142 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 143 size_t *lenp, loff_t *ppos) 144 { 145 dentry_stat.nr_dentry = get_nr_dentry(); 146 return proc_dointvec(table, write, buffer, lenp, ppos); 147 } 148 #endif 149 150 static void __d_free(struct rcu_head *head) 151 { 152 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 153 154 WARN_ON(!list_empty(&dentry->d_alias)); 155 if (dname_external(dentry)) 156 kfree(dentry->d_name.name); 157 kmem_cache_free(dentry_cache, dentry); 158 } 159 160 /* 161 * no locks, please. 162 */ 163 static void d_free(struct dentry *dentry) 164 { 165 BUG_ON(dentry->d_count); 166 this_cpu_dec(nr_dentry); 167 if (dentry->d_op && dentry->d_op->d_release) 168 dentry->d_op->d_release(dentry); 169 170 /* if dentry was never inserted into hash, immediate free is OK */ 171 if (hlist_bl_unhashed(&dentry->d_hash)) 172 __d_free(&dentry->d_u.d_rcu); 173 else 174 call_rcu(&dentry->d_u.d_rcu, __d_free); 175 } 176 177 /** 178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 179 * After this call, in-progress rcu-walk path lookup will fail. This 180 * should be called after unhashing, and after changing d_inode (if 181 * the dentry has not already been unhashed). 182 */ 183 static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 184 { 185 assert_spin_locked(&dentry->d_lock); 186 /* Go through a barrier */ 187 write_seqcount_barrier(&dentry->d_seq); 188 } 189 190 /* 191 * Release the dentry's inode, using the filesystem 192 * d_iput() operation if defined. Dentry has no refcount 193 * and is unhashed. 194 */ 195 static void dentry_iput(struct dentry * dentry) 196 __releases(dentry->d_lock) 197 __releases(dentry->d_inode->i_lock) 198 { 199 struct inode *inode = dentry->d_inode; 200 if (inode) { 201 dentry->d_inode = NULL; 202 list_del_init(&dentry->d_alias); 203 spin_unlock(&dentry->d_lock); 204 spin_unlock(&inode->i_lock); 205 if (!inode->i_nlink) 206 fsnotify_inoderemove(inode); 207 if (dentry->d_op && dentry->d_op->d_iput) 208 dentry->d_op->d_iput(dentry, inode); 209 else 210 iput(inode); 211 } else { 212 spin_unlock(&dentry->d_lock); 213 } 214 } 215 216 /* 217 * Release the dentry's inode, using the filesystem 218 * d_iput() operation if defined. dentry remains in-use. 219 */ 220 static void dentry_unlink_inode(struct dentry * dentry) 221 __releases(dentry->d_lock) 222 __releases(dentry->d_inode->i_lock) 223 { 224 struct inode *inode = dentry->d_inode; 225 dentry->d_inode = NULL; 226 list_del_init(&dentry->d_alias); 227 dentry_rcuwalk_barrier(dentry); 228 spin_unlock(&dentry->d_lock); 229 spin_unlock(&inode->i_lock); 230 if (!inode->i_nlink) 231 fsnotify_inoderemove(inode); 232 if (dentry->d_op && dentry->d_op->d_iput) 233 dentry->d_op->d_iput(dentry, inode); 234 else 235 iput(inode); 236 } 237 238 /* 239 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 240 */ 241 static void dentry_lru_add(struct dentry *dentry) 242 { 243 if (list_empty(&dentry->d_lru)) { 244 spin_lock(&dcache_lru_lock); 245 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 246 dentry->d_sb->s_nr_dentry_unused++; 247 dentry_stat.nr_unused++; 248 spin_unlock(&dcache_lru_lock); 249 } 250 } 251 252 static void __dentry_lru_del(struct dentry *dentry) 253 { 254 list_del_init(&dentry->d_lru); 255 dentry->d_sb->s_nr_dentry_unused--; 256 dentry_stat.nr_unused--; 257 } 258 259 static void dentry_lru_del(struct dentry *dentry) 260 { 261 if (!list_empty(&dentry->d_lru)) { 262 spin_lock(&dcache_lru_lock); 263 __dentry_lru_del(dentry); 264 spin_unlock(&dcache_lru_lock); 265 } 266 } 267 268 static void dentry_lru_move_tail(struct dentry *dentry) 269 { 270 spin_lock(&dcache_lru_lock); 271 if (list_empty(&dentry->d_lru)) { 272 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 273 dentry->d_sb->s_nr_dentry_unused++; 274 dentry_stat.nr_unused++; 275 } else { 276 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 277 } 278 spin_unlock(&dcache_lru_lock); 279 } 280 281 /** 282 * d_kill - kill dentry and return parent 283 * @dentry: dentry to kill 284 * 285 * The dentry must already be unhashed and removed from the LRU. 286 * 287 * If this is the root of the dentry tree, return NULL. 288 * 289 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 290 * d_kill. 291 */ 292 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 293 __releases(dentry->d_lock) 294 __releases(parent->d_lock) 295 __releases(dentry->d_inode->i_lock) 296 { 297 dentry->d_parent = NULL; 298 list_del(&dentry->d_u.d_child); 299 if (parent) 300 spin_unlock(&parent->d_lock); 301 dentry_iput(dentry); 302 /* 303 * dentry_iput drops the locks, at which point nobody (except 304 * transient RCU lookups) can reach this dentry. 305 */ 306 d_free(dentry); 307 return parent; 308 } 309 310 /** 311 * d_drop - drop a dentry 312 * @dentry: dentry to drop 313 * 314 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 315 * be found through a VFS lookup any more. Note that this is different from 316 * deleting the dentry - d_delete will try to mark the dentry negative if 317 * possible, giving a successful _negative_ lookup, while d_drop will 318 * just make the cache lookup fail. 319 * 320 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 321 * reason (NFS timeouts or autofs deletes). 322 * 323 * __d_drop requires dentry->d_lock. 324 */ 325 void __d_drop(struct dentry *dentry) 326 { 327 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 328 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { 329 bit_spin_lock(0, 330 (unsigned long *)&dentry->d_sb->s_anon.first); 331 dentry->d_flags |= DCACHE_UNHASHED; 332 hlist_bl_del_init(&dentry->d_hash); 333 __bit_spin_unlock(0, 334 (unsigned long *)&dentry->d_sb->s_anon.first); 335 } else { 336 struct dcache_hash_bucket *b; 337 b = d_hash(dentry->d_parent, dentry->d_name.hash); 338 spin_lock_bucket(b); 339 /* 340 * We may not actually need to put DCACHE_UNHASHED 341 * manipulations under the hash lock, but follow 342 * the principle of least surprise. 343 */ 344 dentry->d_flags |= DCACHE_UNHASHED; 345 hlist_bl_del_rcu(&dentry->d_hash); 346 spin_unlock_bucket(b); 347 dentry_rcuwalk_barrier(dentry); 348 } 349 } 350 } 351 EXPORT_SYMBOL(__d_drop); 352 353 void d_drop(struct dentry *dentry) 354 { 355 spin_lock(&dentry->d_lock); 356 __d_drop(dentry); 357 spin_unlock(&dentry->d_lock); 358 } 359 EXPORT_SYMBOL(d_drop); 360 361 /* 362 * Finish off a dentry we've decided to kill. 363 * dentry->d_lock must be held, returns with it unlocked. 364 * If ref is non-zero, then decrement the refcount too. 365 * Returns dentry requiring refcount drop, or NULL if we're done. 366 */ 367 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) 368 __releases(dentry->d_lock) 369 { 370 struct inode *inode; 371 struct dentry *parent; 372 373 inode = dentry->d_inode; 374 if (inode && !spin_trylock(&inode->i_lock)) { 375 relock: 376 spin_unlock(&dentry->d_lock); 377 cpu_relax(); 378 return dentry; /* try again with same dentry */ 379 } 380 if (IS_ROOT(dentry)) 381 parent = NULL; 382 else 383 parent = dentry->d_parent; 384 if (parent && !spin_trylock(&parent->d_lock)) { 385 if (inode) 386 spin_unlock(&inode->i_lock); 387 goto relock; 388 } 389 390 if (ref) 391 dentry->d_count--; 392 /* if dentry was on the d_lru list delete it from there */ 393 dentry_lru_del(dentry); 394 /* if it was on the hash then remove it */ 395 __d_drop(dentry); 396 return d_kill(dentry, parent); 397 } 398 399 /* 400 * This is dput 401 * 402 * This is complicated by the fact that we do not want to put 403 * dentries that are no longer on any hash chain on the unused 404 * list: we'd much rather just get rid of them immediately. 405 * 406 * However, that implies that we have to traverse the dentry 407 * tree upwards to the parents which might _also_ now be 408 * scheduled for deletion (it may have been only waiting for 409 * its last child to go away). 410 * 411 * This tail recursion is done by hand as we don't want to depend 412 * on the compiler to always get this right (gcc generally doesn't). 413 * Real recursion would eat up our stack space. 414 */ 415 416 /* 417 * dput - release a dentry 418 * @dentry: dentry to release 419 * 420 * Release a dentry. This will drop the usage count and if appropriate 421 * call the dentry unlink method as well as removing it from the queues and 422 * releasing its resources. If the parent dentries were scheduled for release 423 * they too may now get deleted. 424 */ 425 void dput(struct dentry *dentry) 426 { 427 if (!dentry) 428 return; 429 430 repeat: 431 if (dentry->d_count == 1) 432 might_sleep(); 433 spin_lock(&dentry->d_lock); 434 BUG_ON(!dentry->d_count); 435 if (dentry->d_count > 1) { 436 dentry->d_count--; 437 spin_unlock(&dentry->d_lock); 438 return; 439 } 440 441 if (dentry->d_flags & DCACHE_OP_DELETE) { 442 if (dentry->d_op->d_delete(dentry)) 443 goto kill_it; 444 } 445 446 /* Unreachable? Get rid of it */ 447 if (d_unhashed(dentry)) 448 goto kill_it; 449 450 /* Otherwise leave it cached and ensure it's on the LRU */ 451 dentry->d_flags |= DCACHE_REFERENCED; 452 dentry_lru_add(dentry); 453 454 dentry->d_count--; 455 spin_unlock(&dentry->d_lock); 456 return; 457 458 kill_it: 459 dentry = dentry_kill(dentry, 1); 460 if (dentry) 461 goto repeat; 462 } 463 EXPORT_SYMBOL(dput); 464 465 /** 466 * d_invalidate - invalidate a dentry 467 * @dentry: dentry to invalidate 468 * 469 * Try to invalidate the dentry if it turns out to be 470 * possible. If there are other dentries that can be 471 * reached through this one we can't delete it and we 472 * return -EBUSY. On success we return 0. 473 * 474 * no dcache lock. 475 */ 476 477 int d_invalidate(struct dentry * dentry) 478 { 479 /* 480 * If it's already been dropped, return OK. 481 */ 482 spin_lock(&dentry->d_lock); 483 if (d_unhashed(dentry)) { 484 spin_unlock(&dentry->d_lock); 485 return 0; 486 } 487 /* 488 * Check whether to do a partial shrink_dcache 489 * to get rid of unused child entries. 490 */ 491 if (!list_empty(&dentry->d_subdirs)) { 492 spin_unlock(&dentry->d_lock); 493 shrink_dcache_parent(dentry); 494 spin_lock(&dentry->d_lock); 495 } 496 497 /* 498 * Somebody else still using it? 499 * 500 * If it's a directory, we can't drop it 501 * for fear of somebody re-populating it 502 * with children (even though dropping it 503 * would make it unreachable from the root, 504 * we might still populate it if it was a 505 * working directory or similar). 506 */ 507 if (dentry->d_count > 1) { 508 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 509 spin_unlock(&dentry->d_lock); 510 return -EBUSY; 511 } 512 } 513 514 __d_drop(dentry); 515 spin_unlock(&dentry->d_lock); 516 return 0; 517 } 518 EXPORT_SYMBOL(d_invalidate); 519 520 /* This must be called with d_lock held */ 521 static inline void __dget_dlock(struct dentry *dentry) 522 { 523 dentry->d_count++; 524 } 525 526 static inline void __dget(struct dentry *dentry) 527 { 528 spin_lock(&dentry->d_lock); 529 __dget_dlock(dentry); 530 spin_unlock(&dentry->d_lock); 531 } 532 533 struct dentry *dget_parent(struct dentry *dentry) 534 { 535 struct dentry *ret; 536 537 repeat: 538 /* 539 * Don't need rcu_dereference because we re-check it was correct under 540 * the lock. 541 */ 542 rcu_read_lock(); 543 ret = dentry->d_parent; 544 if (!ret) { 545 rcu_read_unlock(); 546 goto out; 547 } 548 spin_lock(&ret->d_lock); 549 if (unlikely(ret != dentry->d_parent)) { 550 spin_unlock(&ret->d_lock); 551 rcu_read_unlock(); 552 goto repeat; 553 } 554 rcu_read_unlock(); 555 BUG_ON(!ret->d_count); 556 ret->d_count++; 557 spin_unlock(&ret->d_lock); 558 out: 559 return ret; 560 } 561 EXPORT_SYMBOL(dget_parent); 562 563 /** 564 * d_find_alias - grab a hashed alias of inode 565 * @inode: inode in question 566 * @want_discon: flag, used by d_splice_alias, to request 567 * that only a DISCONNECTED alias be returned. 568 * 569 * If inode has a hashed alias, or is a directory and has any alias, 570 * acquire the reference to alias and return it. Otherwise return NULL. 571 * Notice that if inode is a directory there can be only one alias and 572 * it can be unhashed only if it has no children, or if it is the root 573 * of a filesystem. 574 * 575 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 576 * any other hashed alias over that one unless @want_discon is set, 577 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 578 */ 579 static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 580 { 581 struct dentry *alias, *discon_alias; 582 583 again: 584 discon_alias = NULL; 585 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 586 spin_lock(&alias->d_lock); 587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 588 if (IS_ROOT(alias) && 589 (alias->d_flags & DCACHE_DISCONNECTED)) { 590 discon_alias = alias; 591 } else if (!want_discon) { 592 __dget_dlock(alias); 593 spin_unlock(&alias->d_lock); 594 return alias; 595 } 596 } 597 spin_unlock(&alias->d_lock); 598 } 599 if (discon_alias) { 600 alias = discon_alias; 601 spin_lock(&alias->d_lock); 602 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 603 if (IS_ROOT(alias) && 604 (alias->d_flags & DCACHE_DISCONNECTED)) { 605 __dget_dlock(alias); 606 spin_unlock(&alias->d_lock); 607 return alias; 608 } 609 } 610 spin_unlock(&alias->d_lock); 611 goto again; 612 } 613 return NULL; 614 } 615 616 struct dentry *d_find_alias(struct inode *inode) 617 { 618 struct dentry *de = NULL; 619 620 if (!list_empty(&inode->i_dentry)) { 621 spin_lock(&inode->i_lock); 622 de = __d_find_alias(inode, 0); 623 spin_unlock(&inode->i_lock); 624 } 625 return de; 626 } 627 EXPORT_SYMBOL(d_find_alias); 628 629 /* 630 * Try to kill dentries associated with this inode. 631 * WARNING: you must own a reference to inode. 632 */ 633 void d_prune_aliases(struct inode *inode) 634 { 635 struct dentry *dentry; 636 restart: 637 spin_lock(&inode->i_lock); 638 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 639 spin_lock(&dentry->d_lock); 640 if (!dentry->d_count) { 641 __dget_dlock(dentry); 642 __d_drop(dentry); 643 spin_unlock(&dentry->d_lock); 644 spin_unlock(&inode->i_lock); 645 dput(dentry); 646 goto restart; 647 } 648 spin_unlock(&dentry->d_lock); 649 } 650 spin_unlock(&inode->i_lock); 651 } 652 EXPORT_SYMBOL(d_prune_aliases); 653 654 /* 655 * Try to throw away a dentry - free the inode, dput the parent. 656 * Requires dentry->d_lock is held, and dentry->d_count == 0. 657 * Releases dentry->d_lock. 658 * 659 * This may fail if locks cannot be acquired no problem, just try again. 660 */ 661 static void try_prune_one_dentry(struct dentry *dentry) 662 __releases(dentry->d_lock) 663 { 664 struct dentry *parent; 665 666 parent = dentry_kill(dentry, 0); 667 /* 668 * If dentry_kill returns NULL, we have nothing more to do. 669 * if it returns the same dentry, trylocks failed. In either 670 * case, just loop again. 671 * 672 * Otherwise, we need to prune ancestors too. This is necessary 673 * to prevent quadratic behavior of shrink_dcache_parent(), but 674 * is also expected to be beneficial in reducing dentry cache 675 * fragmentation. 676 */ 677 if (!parent) 678 return; 679 if (parent == dentry) 680 return; 681 682 /* Prune ancestors. */ 683 dentry = parent; 684 while (dentry) { 685 spin_lock(&dentry->d_lock); 686 if (dentry->d_count > 1) { 687 dentry->d_count--; 688 spin_unlock(&dentry->d_lock); 689 return; 690 } 691 dentry = dentry_kill(dentry, 1); 692 } 693 } 694 695 static void shrink_dentry_list(struct list_head *list) 696 { 697 struct dentry *dentry; 698 699 rcu_read_lock(); 700 for (;;) { 701 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 702 if (&dentry->d_lru == list) 703 break; /* empty */ 704 spin_lock(&dentry->d_lock); 705 if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 706 spin_unlock(&dentry->d_lock); 707 continue; 708 } 709 710 /* 711 * We found an inuse dentry which was not removed from 712 * the LRU because of laziness during lookup. Do not free 713 * it - just keep it off the LRU list. 714 */ 715 if (dentry->d_count) { 716 dentry_lru_del(dentry); 717 spin_unlock(&dentry->d_lock); 718 continue; 719 } 720 721 rcu_read_unlock(); 722 723 try_prune_one_dentry(dentry); 724 725 rcu_read_lock(); 726 } 727 rcu_read_unlock(); 728 } 729 730 /** 731 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock 732 * @sb: superblock to shrink dentry LRU. 733 * @count: number of entries to prune 734 * @flags: flags to control the dentry processing 735 * 736 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 737 */ 738 static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) 739 { 740 /* called from prune_dcache() and shrink_dcache_parent() */ 741 struct dentry *dentry; 742 LIST_HEAD(referenced); 743 LIST_HEAD(tmp); 744 int cnt = *count; 745 746 relock: 747 spin_lock(&dcache_lru_lock); 748 while (!list_empty(&sb->s_dentry_lru)) { 749 dentry = list_entry(sb->s_dentry_lru.prev, 750 struct dentry, d_lru); 751 BUG_ON(dentry->d_sb != sb); 752 753 if (!spin_trylock(&dentry->d_lock)) { 754 spin_unlock(&dcache_lru_lock); 755 cpu_relax(); 756 goto relock; 757 } 758 759 /* 760 * If we are honouring the DCACHE_REFERENCED flag and the 761 * dentry has this flag set, don't free it. Clear the flag 762 * and put it back on the LRU. 763 */ 764 if (flags & DCACHE_REFERENCED && 765 dentry->d_flags & DCACHE_REFERENCED) { 766 dentry->d_flags &= ~DCACHE_REFERENCED; 767 list_move(&dentry->d_lru, &referenced); 768 spin_unlock(&dentry->d_lock); 769 } else { 770 list_move_tail(&dentry->d_lru, &tmp); 771 spin_unlock(&dentry->d_lock); 772 if (!--cnt) 773 break; 774 } 775 cond_resched_lock(&dcache_lru_lock); 776 } 777 if (!list_empty(&referenced)) 778 list_splice(&referenced, &sb->s_dentry_lru); 779 spin_unlock(&dcache_lru_lock); 780 781 shrink_dentry_list(&tmp); 782 783 *count = cnt; 784 } 785 786 /** 787 * prune_dcache - shrink the dcache 788 * @count: number of entries to try to free 789 * 790 * Shrink the dcache. This is done when we need more memory, or simply when we 791 * need to unmount something (at which point we need to unuse all dentries). 792 * 793 * This function may fail to free any resources if all the dentries are in use. 794 */ 795 static void prune_dcache(int count) 796 { 797 struct super_block *sb, *p = NULL; 798 int w_count; 799 int unused = dentry_stat.nr_unused; 800 int prune_ratio; 801 int pruned; 802 803 if (unused == 0 || count == 0) 804 return; 805 if (count >= unused) 806 prune_ratio = 1; 807 else 808 prune_ratio = unused / count; 809 spin_lock(&sb_lock); 810 list_for_each_entry(sb, &super_blocks, s_list) { 811 if (list_empty(&sb->s_instances)) 812 continue; 813 if (sb->s_nr_dentry_unused == 0) 814 continue; 815 sb->s_count++; 816 /* Now, we reclaim unused dentrins with fairness. 817 * We reclaim them same percentage from each superblock. 818 * We calculate number of dentries to scan on this sb 819 * as follows, but the implementation is arranged to avoid 820 * overflows: 821 * number of dentries to scan on this sb = 822 * count * (number of dentries on this sb / 823 * number of dentries in the machine) 824 */ 825 spin_unlock(&sb_lock); 826 if (prune_ratio != 1) 827 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; 828 else 829 w_count = sb->s_nr_dentry_unused; 830 pruned = w_count; 831 /* 832 * We need to be sure this filesystem isn't being unmounted, 833 * otherwise we could race with generic_shutdown_super(), and 834 * end up holding a reference to an inode while the filesystem 835 * is unmounted. So we try to get s_umount, and make sure 836 * s_root isn't NULL. 837 */ 838 if (down_read_trylock(&sb->s_umount)) { 839 if ((sb->s_root != NULL) && 840 (!list_empty(&sb->s_dentry_lru))) { 841 __shrink_dcache_sb(sb, &w_count, 842 DCACHE_REFERENCED); 843 pruned -= w_count; 844 } 845 up_read(&sb->s_umount); 846 } 847 spin_lock(&sb_lock); 848 if (p) 849 __put_super(p); 850 count -= pruned; 851 p = sb; 852 /* more work left to do? */ 853 if (count <= 0) 854 break; 855 } 856 if (p) 857 __put_super(p); 858 spin_unlock(&sb_lock); 859 } 860 861 /** 862 * shrink_dcache_sb - shrink dcache for a superblock 863 * @sb: superblock 864 * 865 * Shrink the dcache for the specified super block. This is used to free 866 * the dcache before unmounting a file system. 867 */ 868 void shrink_dcache_sb(struct super_block *sb) 869 { 870 LIST_HEAD(tmp); 871 872 spin_lock(&dcache_lru_lock); 873 while (!list_empty(&sb->s_dentry_lru)) { 874 list_splice_init(&sb->s_dentry_lru, &tmp); 875 spin_unlock(&dcache_lru_lock); 876 shrink_dentry_list(&tmp); 877 spin_lock(&dcache_lru_lock); 878 } 879 spin_unlock(&dcache_lru_lock); 880 } 881 EXPORT_SYMBOL(shrink_dcache_sb); 882 883 /* 884 * destroy a single subtree of dentries for unmount 885 * - see the comments on shrink_dcache_for_umount() for a description of the 886 * locking 887 */ 888 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 889 { 890 struct dentry *parent; 891 unsigned detached = 0; 892 893 BUG_ON(!IS_ROOT(dentry)); 894 895 /* detach this root from the system */ 896 spin_lock(&dentry->d_lock); 897 dentry_lru_del(dentry); 898 __d_drop(dentry); 899 spin_unlock(&dentry->d_lock); 900 901 for (;;) { 902 /* descend to the first leaf in the current subtree */ 903 while (!list_empty(&dentry->d_subdirs)) { 904 struct dentry *loop; 905 906 /* this is a branch with children - detach all of them 907 * from the system in one go */ 908 spin_lock(&dentry->d_lock); 909 list_for_each_entry(loop, &dentry->d_subdirs, 910 d_u.d_child) { 911 spin_lock_nested(&loop->d_lock, 912 DENTRY_D_LOCK_NESTED); 913 dentry_lru_del(loop); 914 __d_drop(loop); 915 spin_unlock(&loop->d_lock); 916 } 917 spin_unlock(&dentry->d_lock); 918 919 /* move to the first child */ 920 dentry = list_entry(dentry->d_subdirs.next, 921 struct dentry, d_u.d_child); 922 } 923 924 /* consume the dentries from this leaf up through its parents 925 * until we find one with children or run out altogether */ 926 do { 927 struct inode *inode; 928 929 if (dentry->d_count != 0) { 930 printk(KERN_ERR 931 "BUG: Dentry %p{i=%lx,n=%s}" 932 " still in use (%d)" 933 " [unmount of %s %s]\n", 934 dentry, 935 dentry->d_inode ? 936 dentry->d_inode->i_ino : 0UL, 937 dentry->d_name.name, 938 dentry->d_count, 939 dentry->d_sb->s_type->name, 940 dentry->d_sb->s_id); 941 BUG(); 942 } 943 944 if (IS_ROOT(dentry)) { 945 parent = NULL; 946 list_del(&dentry->d_u.d_child); 947 } else { 948 parent = dentry->d_parent; 949 spin_lock(&parent->d_lock); 950 parent->d_count--; 951 list_del(&dentry->d_u.d_child); 952 spin_unlock(&parent->d_lock); 953 } 954 955 detached++; 956 957 inode = dentry->d_inode; 958 if (inode) { 959 dentry->d_inode = NULL; 960 list_del_init(&dentry->d_alias); 961 if (dentry->d_op && dentry->d_op->d_iput) 962 dentry->d_op->d_iput(dentry, inode); 963 else 964 iput(inode); 965 } 966 967 d_free(dentry); 968 969 /* finished when we fall off the top of the tree, 970 * otherwise we ascend to the parent and move to the 971 * next sibling if there is one */ 972 if (!parent) 973 return; 974 dentry = parent; 975 } while (list_empty(&dentry->d_subdirs)); 976 977 dentry = list_entry(dentry->d_subdirs.next, 978 struct dentry, d_u.d_child); 979 } 980 } 981 982 /* 983 * destroy the dentries attached to a superblock on unmounting 984 * - we don't need to use dentry->d_lock because: 985 * - the superblock is detached from all mountings and open files, so the 986 * dentry trees will not be rearranged by the VFS 987 * - s_umount is write-locked, so the memory pressure shrinker will ignore 988 * any dentries belonging to this superblock that it comes across 989 * - the filesystem itself is no longer permitted to rearrange the dentries 990 * in this superblock 991 */ 992 void shrink_dcache_for_umount(struct super_block *sb) 993 { 994 struct dentry *dentry; 995 996 if (down_read_trylock(&sb->s_umount)) 997 BUG(); 998 999 dentry = sb->s_root; 1000 sb->s_root = NULL; 1001 spin_lock(&dentry->d_lock); 1002 dentry->d_count--; 1003 spin_unlock(&dentry->d_lock); 1004 shrink_dcache_for_umount_subtree(dentry); 1005 1006 while (!hlist_bl_empty(&sb->s_anon)) { 1007 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 1008 shrink_dcache_for_umount_subtree(dentry); 1009 } 1010 } 1011 1012 /* 1013 * Search for at least 1 mount point in the dentry's subdirs. 1014 * We descend to the next level whenever the d_subdirs 1015 * list is non-empty and continue searching. 1016 */ 1017 1018 /** 1019 * have_submounts - check for mounts over a dentry 1020 * @parent: dentry to check. 1021 * 1022 * Return true if the parent or its subdirectories contain 1023 * a mount point 1024 */ 1025 int have_submounts(struct dentry *parent) 1026 { 1027 struct dentry *this_parent; 1028 struct list_head *next; 1029 unsigned seq; 1030 int locked = 0; 1031 1032 seq = read_seqbegin(&rename_lock); 1033 again: 1034 this_parent = parent; 1035 1036 if (d_mountpoint(parent)) 1037 goto positive; 1038 spin_lock(&this_parent->d_lock); 1039 repeat: 1040 next = this_parent->d_subdirs.next; 1041 resume: 1042 while (next != &this_parent->d_subdirs) { 1043 struct list_head *tmp = next; 1044 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1045 next = tmp->next; 1046 1047 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1048 /* Have we found a mount point ? */ 1049 if (d_mountpoint(dentry)) { 1050 spin_unlock(&dentry->d_lock); 1051 spin_unlock(&this_parent->d_lock); 1052 goto positive; 1053 } 1054 if (!list_empty(&dentry->d_subdirs)) { 1055 spin_unlock(&this_parent->d_lock); 1056 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1057 this_parent = dentry; 1058 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1059 goto repeat; 1060 } 1061 spin_unlock(&dentry->d_lock); 1062 } 1063 /* 1064 * All done at this level ... ascend and resume the search. 1065 */ 1066 if (this_parent != parent) { 1067 struct dentry *tmp; 1068 struct dentry *child; 1069 1070 tmp = this_parent->d_parent; 1071 rcu_read_lock(); 1072 spin_unlock(&this_parent->d_lock); 1073 child = this_parent; 1074 this_parent = tmp; 1075 spin_lock(&this_parent->d_lock); 1076 /* might go back up the wrong parent if we have had a rename 1077 * or deletion */ 1078 if (this_parent != child->d_parent || 1079 (!locked && read_seqretry(&rename_lock, seq))) { 1080 spin_unlock(&this_parent->d_lock); 1081 rcu_read_unlock(); 1082 goto rename_retry; 1083 } 1084 rcu_read_unlock(); 1085 next = child->d_u.d_child.next; 1086 goto resume; 1087 } 1088 spin_unlock(&this_parent->d_lock); 1089 if (!locked && read_seqretry(&rename_lock, seq)) 1090 goto rename_retry; 1091 if (locked) 1092 write_sequnlock(&rename_lock); 1093 return 0; /* No mount points found in tree */ 1094 positive: 1095 if (!locked && read_seqretry(&rename_lock, seq)) 1096 goto rename_retry; 1097 if (locked) 1098 write_sequnlock(&rename_lock); 1099 return 1; 1100 1101 rename_retry: 1102 locked = 1; 1103 write_seqlock(&rename_lock); 1104 goto again; 1105 } 1106 EXPORT_SYMBOL(have_submounts); 1107 1108 /* 1109 * Search the dentry child list for the specified parent, 1110 * and move any unused dentries to the end of the unused 1111 * list for prune_dcache(). We descend to the next level 1112 * whenever the d_subdirs list is non-empty and continue 1113 * searching. 1114 * 1115 * It returns zero iff there are no unused children, 1116 * otherwise it returns the number of children moved to 1117 * the end of the unused list. This may not be the total 1118 * number of unused children, because select_parent can 1119 * drop the lock and return early due to latency 1120 * constraints. 1121 */ 1122 static int select_parent(struct dentry * parent) 1123 { 1124 struct dentry *this_parent; 1125 struct list_head *next; 1126 unsigned seq; 1127 int found = 0; 1128 int locked = 0; 1129 1130 seq = read_seqbegin(&rename_lock); 1131 again: 1132 this_parent = parent; 1133 spin_lock(&this_parent->d_lock); 1134 repeat: 1135 next = this_parent->d_subdirs.next; 1136 resume: 1137 while (next != &this_parent->d_subdirs) { 1138 struct list_head *tmp = next; 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1140 next = tmp->next; 1141 1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1143 1144 /* 1145 * move only zero ref count dentries to the end 1146 * of the unused list for prune_dcache 1147 */ 1148 if (!dentry->d_count) { 1149 dentry_lru_move_tail(dentry); 1150 found++; 1151 } else { 1152 dentry_lru_del(dentry); 1153 } 1154 1155 /* 1156 * We can return to the caller if we have found some (this 1157 * ensures forward progress). We'll be coming back to find 1158 * the rest. 1159 */ 1160 if (found && need_resched()) { 1161 spin_unlock(&dentry->d_lock); 1162 goto out; 1163 } 1164 1165 /* 1166 * Descend a level if the d_subdirs list is non-empty. 1167 */ 1168 if (!list_empty(&dentry->d_subdirs)) { 1169 spin_unlock(&this_parent->d_lock); 1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1171 this_parent = dentry; 1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1173 goto repeat; 1174 } 1175 1176 spin_unlock(&dentry->d_lock); 1177 } 1178 /* 1179 * All done at this level ... ascend and resume the search. 1180 */ 1181 if (this_parent != parent) { 1182 struct dentry *tmp; 1183 struct dentry *child; 1184 1185 tmp = this_parent->d_parent; 1186 rcu_read_lock(); 1187 spin_unlock(&this_parent->d_lock); 1188 child = this_parent; 1189 this_parent = tmp; 1190 spin_lock(&this_parent->d_lock); 1191 /* might go back up the wrong parent if we have had a rename 1192 * or deletion */ 1193 if (this_parent != child->d_parent || 1194 (!locked && read_seqretry(&rename_lock, seq))) { 1195 spin_unlock(&this_parent->d_lock); 1196 rcu_read_unlock(); 1197 goto rename_retry; 1198 } 1199 rcu_read_unlock(); 1200 next = child->d_u.d_child.next; 1201 goto resume; 1202 } 1203 out: 1204 spin_unlock(&this_parent->d_lock); 1205 if (!locked && read_seqretry(&rename_lock, seq)) 1206 goto rename_retry; 1207 if (locked) 1208 write_sequnlock(&rename_lock); 1209 return found; 1210 1211 rename_retry: 1212 if (found) 1213 return found; 1214 locked = 1; 1215 write_seqlock(&rename_lock); 1216 goto again; 1217 } 1218 1219 /** 1220 * shrink_dcache_parent - prune dcache 1221 * @parent: parent of entries to prune 1222 * 1223 * Prune the dcache to remove unused children of the parent dentry. 1224 */ 1225 1226 void shrink_dcache_parent(struct dentry * parent) 1227 { 1228 struct super_block *sb = parent->d_sb; 1229 int found; 1230 1231 while ((found = select_parent(parent)) != 0) 1232 __shrink_dcache_sb(sb, &found, 0); 1233 } 1234 EXPORT_SYMBOL(shrink_dcache_parent); 1235 1236 /* 1237 * Scan `nr' dentries and return the number which remain. 1238 * 1239 * We need to avoid reentering the filesystem if the caller is performing a 1240 * GFP_NOFS allocation attempt. One example deadlock is: 1241 * 1242 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 1243 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 1244 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 1245 * 1246 * In this case we return -1 to tell the caller that we baled. 1247 */ 1248 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1249 { 1250 if (nr) { 1251 if (!(gfp_mask & __GFP_FS)) 1252 return -1; 1253 prune_dcache(nr); 1254 } 1255 1256 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 1257 } 1258 1259 static struct shrinker dcache_shrinker = { 1260 .shrink = shrink_dcache_memory, 1261 .seeks = DEFAULT_SEEKS, 1262 }; 1263 1264 /** 1265 * d_alloc - allocate a dcache entry 1266 * @parent: parent of entry to allocate 1267 * @name: qstr of the name 1268 * 1269 * Allocates a dentry. It returns %NULL if there is insufficient memory 1270 * available. On a success the dentry is returned. The name passed in is 1271 * copied and the copy passed in may be reused after this call. 1272 */ 1273 1274 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1275 { 1276 struct dentry *dentry; 1277 char *dname; 1278 1279 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1280 if (!dentry) 1281 return NULL; 1282 1283 if (name->len > DNAME_INLINE_LEN-1) { 1284 dname = kmalloc(name->len + 1, GFP_KERNEL); 1285 if (!dname) { 1286 kmem_cache_free(dentry_cache, dentry); 1287 return NULL; 1288 } 1289 } else { 1290 dname = dentry->d_iname; 1291 } 1292 dentry->d_name.name = dname; 1293 1294 dentry->d_name.len = name->len; 1295 dentry->d_name.hash = name->hash; 1296 memcpy(dname, name->name, name->len); 1297 dname[name->len] = 0; 1298 1299 dentry->d_count = 1; 1300 dentry->d_flags = DCACHE_UNHASHED; 1301 spin_lock_init(&dentry->d_lock); 1302 seqcount_init(&dentry->d_seq); 1303 dentry->d_inode = NULL; 1304 dentry->d_parent = NULL; 1305 dentry->d_sb = NULL; 1306 dentry->d_op = NULL; 1307 dentry->d_fsdata = NULL; 1308 INIT_HLIST_BL_NODE(&dentry->d_hash); 1309 INIT_LIST_HEAD(&dentry->d_lru); 1310 INIT_LIST_HEAD(&dentry->d_subdirs); 1311 INIT_LIST_HEAD(&dentry->d_alias); 1312 INIT_LIST_HEAD(&dentry->d_u.d_child); 1313 1314 if (parent) { 1315 spin_lock(&parent->d_lock); 1316 /* 1317 * don't need child lock because it is not subject 1318 * to concurrency here 1319 */ 1320 __dget_dlock(parent); 1321 dentry->d_parent = parent; 1322 dentry->d_sb = parent->d_sb; 1323 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1324 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1325 spin_unlock(&parent->d_lock); 1326 } 1327 1328 this_cpu_inc(nr_dentry); 1329 1330 return dentry; 1331 } 1332 EXPORT_SYMBOL(d_alloc); 1333 1334 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1335 { 1336 struct dentry *dentry = d_alloc(NULL, name); 1337 if (dentry) { 1338 dentry->d_sb = sb; 1339 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1340 dentry->d_parent = dentry; 1341 dentry->d_flags |= DCACHE_DISCONNECTED; 1342 } 1343 return dentry; 1344 } 1345 EXPORT_SYMBOL(d_alloc_pseudo); 1346 1347 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1348 { 1349 struct qstr q; 1350 1351 q.name = name; 1352 q.len = strlen(name); 1353 q.hash = full_name_hash(q.name, q.len); 1354 return d_alloc(parent, &q); 1355 } 1356 EXPORT_SYMBOL(d_alloc_name); 1357 1358 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 1359 { 1360 WARN_ON_ONCE(dentry->d_op); 1361 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1362 DCACHE_OP_COMPARE | 1363 DCACHE_OP_REVALIDATE | 1364 DCACHE_OP_DELETE )); 1365 dentry->d_op = op; 1366 if (!op) 1367 return; 1368 if (op->d_hash) 1369 dentry->d_flags |= DCACHE_OP_HASH; 1370 if (op->d_compare) 1371 dentry->d_flags |= DCACHE_OP_COMPARE; 1372 if (op->d_revalidate) 1373 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1374 if (op->d_delete) 1375 dentry->d_flags |= DCACHE_OP_DELETE; 1376 1377 } 1378 EXPORT_SYMBOL(d_set_d_op); 1379 1380 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1381 { 1382 spin_lock(&dentry->d_lock); 1383 if (inode) { 1384 if (unlikely(IS_AUTOMOUNT(inode))) 1385 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1386 list_add(&dentry->d_alias, &inode->i_dentry); 1387 } 1388 dentry->d_inode = inode; 1389 dentry_rcuwalk_barrier(dentry); 1390 spin_unlock(&dentry->d_lock); 1391 fsnotify_d_instantiate(dentry, inode); 1392 } 1393 1394 /** 1395 * d_instantiate - fill in inode information for a dentry 1396 * @entry: dentry to complete 1397 * @inode: inode to attach to this dentry 1398 * 1399 * Fill in inode information in the entry. 1400 * 1401 * This turns negative dentries into productive full members 1402 * of society. 1403 * 1404 * NOTE! This assumes that the inode count has been incremented 1405 * (or otherwise set) by the caller to indicate that it is now 1406 * in use by the dcache. 1407 */ 1408 1409 void d_instantiate(struct dentry *entry, struct inode * inode) 1410 { 1411 BUG_ON(!list_empty(&entry->d_alias)); 1412 if (inode) 1413 spin_lock(&inode->i_lock); 1414 __d_instantiate(entry, inode); 1415 if (inode) 1416 spin_unlock(&inode->i_lock); 1417 security_d_instantiate(entry, inode); 1418 } 1419 EXPORT_SYMBOL(d_instantiate); 1420 1421 /** 1422 * d_instantiate_unique - instantiate a non-aliased dentry 1423 * @entry: dentry to instantiate 1424 * @inode: inode to attach to this dentry 1425 * 1426 * Fill in inode information in the entry. On success, it returns NULL. 1427 * If an unhashed alias of "entry" already exists, then we return the 1428 * aliased dentry instead and drop one reference to inode. 1429 * 1430 * Note that in order to avoid conflicts with rename() etc, the caller 1431 * had better be holding the parent directory semaphore. 1432 * 1433 * This also assumes that the inode count has been incremented 1434 * (or otherwise set) by the caller to indicate that it is now 1435 * in use by the dcache. 1436 */ 1437 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1438 struct inode *inode) 1439 { 1440 struct dentry *alias; 1441 int len = entry->d_name.len; 1442 const char *name = entry->d_name.name; 1443 unsigned int hash = entry->d_name.hash; 1444 1445 if (!inode) { 1446 __d_instantiate(entry, NULL); 1447 return NULL; 1448 } 1449 1450 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1451 struct qstr *qstr = &alias->d_name; 1452 1453 /* 1454 * Don't need alias->d_lock here, because aliases with 1455 * d_parent == entry->d_parent are not subject to name or 1456 * parent changes, because the parent inode i_mutex is held. 1457 */ 1458 if (qstr->hash != hash) 1459 continue; 1460 if (alias->d_parent != entry->d_parent) 1461 continue; 1462 if (dentry_cmp(qstr->name, qstr->len, name, len)) 1463 continue; 1464 __dget(alias); 1465 return alias; 1466 } 1467 1468 __d_instantiate(entry, inode); 1469 return NULL; 1470 } 1471 1472 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1473 { 1474 struct dentry *result; 1475 1476 BUG_ON(!list_empty(&entry->d_alias)); 1477 1478 if (inode) 1479 spin_lock(&inode->i_lock); 1480 result = __d_instantiate_unique(entry, inode); 1481 if (inode) 1482 spin_unlock(&inode->i_lock); 1483 1484 if (!result) { 1485 security_d_instantiate(entry, inode); 1486 return NULL; 1487 } 1488 1489 BUG_ON(!d_unhashed(result)); 1490 iput(inode); 1491 return result; 1492 } 1493 1494 EXPORT_SYMBOL(d_instantiate_unique); 1495 1496 /** 1497 * d_alloc_root - allocate root dentry 1498 * @root_inode: inode to allocate the root for 1499 * 1500 * Allocate a root ("/") dentry for the inode given. The inode is 1501 * instantiated and returned. %NULL is returned if there is insufficient 1502 * memory or the inode passed is %NULL. 1503 */ 1504 1505 struct dentry * d_alloc_root(struct inode * root_inode) 1506 { 1507 struct dentry *res = NULL; 1508 1509 if (root_inode) { 1510 static const struct qstr name = { .name = "/", .len = 1 }; 1511 1512 res = d_alloc(NULL, &name); 1513 if (res) { 1514 res->d_sb = root_inode->i_sb; 1515 d_set_d_op(res, res->d_sb->s_d_op); 1516 res->d_parent = res; 1517 d_instantiate(res, root_inode); 1518 } 1519 } 1520 return res; 1521 } 1522 EXPORT_SYMBOL(d_alloc_root); 1523 1524 /** 1525 * d_obtain_alias - find or allocate a dentry for a given inode 1526 * @inode: inode to allocate the dentry for 1527 * 1528 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1529 * similar open by handle operations. The returned dentry may be anonymous, 1530 * or may have a full name (if the inode was already in the cache). 1531 * 1532 * When called on a directory inode, we must ensure that the inode only ever 1533 * has one dentry. If a dentry is found, that is returned instead of 1534 * allocating a new one. 1535 * 1536 * On successful return, the reference to the inode has been transferred 1537 * to the dentry. In case of an error the reference on the inode is released. 1538 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1539 * be passed in and will be the error will be propagate to the return value, 1540 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1541 */ 1542 struct dentry *d_obtain_alias(struct inode *inode) 1543 { 1544 static const struct qstr anonstring = { .name = "" }; 1545 struct dentry *tmp; 1546 struct dentry *res; 1547 1548 if (!inode) 1549 return ERR_PTR(-ESTALE); 1550 if (IS_ERR(inode)) 1551 return ERR_CAST(inode); 1552 1553 res = d_find_alias(inode); 1554 if (res) 1555 goto out_iput; 1556 1557 tmp = d_alloc(NULL, &anonstring); 1558 if (!tmp) { 1559 res = ERR_PTR(-ENOMEM); 1560 goto out_iput; 1561 } 1562 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1563 1564 1565 spin_lock(&inode->i_lock); 1566 res = __d_find_alias(inode, 0); 1567 if (res) { 1568 spin_unlock(&inode->i_lock); 1569 dput(tmp); 1570 goto out_iput; 1571 } 1572 1573 /* attach a disconnected dentry */ 1574 spin_lock(&tmp->d_lock); 1575 tmp->d_sb = inode->i_sb; 1576 d_set_d_op(tmp, tmp->d_sb->s_d_op); 1577 tmp->d_inode = inode; 1578 tmp->d_flags |= DCACHE_DISCONNECTED; 1579 list_add(&tmp->d_alias, &inode->i_dentry); 1580 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1581 tmp->d_flags &= ~DCACHE_UNHASHED; 1582 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1583 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1584 spin_unlock(&tmp->d_lock); 1585 spin_unlock(&inode->i_lock); 1586 1587 return tmp; 1588 1589 out_iput: 1590 iput(inode); 1591 return res; 1592 } 1593 EXPORT_SYMBOL(d_obtain_alias); 1594 1595 /** 1596 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1597 * @inode: the inode which may have a disconnected dentry 1598 * @dentry: a negative dentry which we want to point to the inode. 1599 * 1600 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1601 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1602 * and return it, else simply d_add the inode to the dentry and return NULL. 1603 * 1604 * This is needed in the lookup routine of any filesystem that is exportable 1605 * (via knfsd) so that we can build dcache paths to directories effectively. 1606 * 1607 * If a dentry was found and moved, then it is returned. Otherwise NULL 1608 * is returned. This matches the expected return value of ->lookup. 1609 * 1610 */ 1611 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1612 { 1613 struct dentry *new = NULL; 1614 1615 if (inode && S_ISDIR(inode->i_mode)) { 1616 spin_lock(&inode->i_lock); 1617 new = __d_find_alias(inode, 1); 1618 if (new) { 1619 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1620 spin_unlock(&inode->i_lock); 1621 security_d_instantiate(new, inode); 1622 d_move(new, dentry); 1623 iput(inode); 1624 } else { 1625 /* already taking inode->i_lock, so d_add() by hand */ 1626 __d_instantiate(dentry, inode); 1627 spin_unlock(&inode->i_lock); 1628 security_d_instantiate(dentry, inode); 1629 d_rehash(dentry); 1630 } 1631 } else 1632 d_add(dentry, inode); 1633 return new; 1634 } 1635 EXPORT_SYMBOL(d_splice_alias); 1636 1637 /** 1638 * d_add_ci - lookup or allocate new dentry with case-exact name 1639 * @inode: the inode case-insensitive lookup has found 1640 * @dentry: the negative dentry that was passed to the parent's lookup func 1641 * @name: the case-exact name to be associated with the returned dentry 1642 * 1643 * This is to avoid filling the dcache with case-insensitive names to the 1644 * same inode, only the actual correct case is stored in the dcache for 1645 * case-insensitive filesystems. 1646 * 1647 * For a case-insensitive lookup match and if the the case-exact dentry 1648 * already exists in in the dcache, use it and return it. 1649 * 1650 * If no entry exists with the exact case name, allocate new dentry with 1651 * the exact case, and return the spliced entry. 1652 */ 1653 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1654 struct qstr *name) 1655 { 1656 int error; 1657 struct dentry *found; 1658 struct dentry *new; 1659 1660 /* 1661 * First check if a dentry matching the name already exists, 1662 * if not go ahead and create it now. 1663 */ 1664 found = d_hash_and_lookup(dentry->d_parent, name); 1665 if (!found) { 1666 new = d_alloc(dentry->d_parent, name); 1667 if (!new) { 1668 error = -ENOMEM; 1669 goto err_out; 1670 } 1671 1672 found = d_splice_alias(inode, new); 1673 if (found) { 1674 dput(new); 1675 return found; 1676 } 1677 return new; 1678 } 1679 1680 /* 1681 * If a matching dentry exists, and it's not negative use it. 1682 * 1683 * Decrement the reference count to balance the iget() done 1684 * earlier on. 1685 */ 1686 if (found->d_inode) { 1687 if (unlikely(found->d_inode != inode)) { 1688 /* This can't happen because bad inodes are unhashed. */ 1689 BUG_ON(!is_bad_inode(inode)); 1690 BUG_ON(!is_bad_inode(found->d_inode)); 1691 } 1692 iput(inode); 1693 return found; 1694 } 1695 1696 /* 1697 * Negative dentry: instantiate it unless the inode is a directory and 1698 * already has a dentry. 1699 */ 1700 spin_lock(&inode->i_lock); 1701 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1702 __d_instantiate(found, inode); 1703 spin_unlock(&inode->i_lock); 1704 security_d_instantiate(found, inode); 1705 return found; 1706 } 1707 1708 /* 1709 * In case a directory already has a (disconnected) entry grab a 1710 * reference to it, move it in place and use it. 1711 */ 1712 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1713 __dget(new); 1714 spin_unlock(&inode->i_lock); 1715 security_d_instantiate(found, inode); 1716 d_move(new, found); 1717 iput(inode); 1718 dput(found); 1719 return new; 1720 1721 err_out: 1722 iput(inode); 1723 return ERR_PTR(error); 1724 } 1725 EXPORT_SYMBOL(d_add_ci); 1726 1727 /** 1728 * __d_lookup_rcu - search for a dentry (racy, store-free) 1729 * @parent: parent dentry 1730 * @name: qstr of name we wish to find 1731 * @seq: returns d_seq value at the point where the dentry was found 1732 * @inode: returns dentry->d_inode when the inode was found valid. 1733 * Returns: dentry, or NULL 1734 * 1735 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 1736 * resolution (store-free path walking) design described in 1737 * Documentation/filesystems/path-lookup.txt. 1738 * 1739 * This is not to be used outside core vfs. 1740 * 1741 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 1742 * held, and rcu_read_lock held. The returned dentry must not be stored into 1743 * without taking d_lock and checking d_seq sequence count against @seq 1744 * returned here. 1745 * 1746 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1747 * function. 1748 * 1749 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1750 * the returned dentry, so long as its parent's seqlock is checked after the 1751 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1752 * is formed, giving integrity down the path walk. 1753 */ 1754 struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1755 unsigned *seq, struct inode **inode) 1756 { 1757 unsigned int len = name->len; 1758 unsigned int hash = name->hash; 1759 const unsigned char *str = name->name; 1760 struct dcache_hash_bucket *b = d_hash(parent, hash); 1761 struct hlist_bl_node *node; 1762 struct dentry *dentry; 1763 1764 /* 1765 * Note: There is significant duplication with __d_lookup_rcu which is 1766 * required to prevent single threaded performance regressions 1767 * especially on architectures where smp_rmb (in seqcounts) are costly. 1768 * Keep the two functions in sync. 1769 */ 1770 1771 /* 1772 * The hash list is protected using RCU. 1773 * 1774 * Carefully use d_seq when comparing a candidate dentry, to avoid 1775 * races with d_move(). 1776 * 1777 * It is possible that concurrent renames can mess up our list 1778 * walk here and result in missing our dentry, resulting in the 1779 * false-negative result. d_lookup() protects against concurrent 1780 * renames using rename_lock seqlock. 1781 * 1782 * See Documentation/vfs/dcache-locking.txt for more details. 1783 */ 1784 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1785 struct inode *i; 1786 const char *tname; 1787 int tlen; 1788 1789 if (dentry->d_name.hash != hash) 1790 continue; 1791 1792 seqretry: 1793 *seq = read_seqcount_begin(&dentry->d_seq); 1794 if (dentry->d_parent != parent) 1795 continue; 1796 if (d_unhashed(dentry)) 1797 continue; 1798 tlen = dentry->d_name.len; 1799 tname = dentry->d_name.name; 1800 i = dentry->d_inode; 1801 prefetch(tname); 1802 if (i) 1803 prefetch(i); 1804 /* 1805 * This seqcount check is required to ensure name and 1806 * len are loaded atomically, so as not to walk off the 1807 * edge of memory when walking. If we could load this 1808 * atomically some other way, we could drop this check. 1809 */ 1810 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1811 goto seqretry; 1812 if (parent->d_flags & DCACHE_OP_COMPARE) { 1813 if (parent->d_op->d_compare(parent, *inode, 1814 dentry, i, 1815 tlen, tname, name)) 1816 continue; 1817 } else { 1818 if (dentry_cmp(tname, tlen, str, len)) 1819 continue; 1820 } 1821 /* 1822 * No extra seqcount check is required after the name 1823 * compare. The caller must perform a seqcount check in 1824 * order to do anything useful with the returned dentry 1825 * anyway. 1826 */ 1827 *inode = i; 1828 return dentry; 1829 } 1830 return NULL; 1831 } 1832 1833 /** 1834 * d_lookup - search for a dentry 1835 * @parent: parent dentry 1836 * @name: qstr of name we wish to find 1837 * Returns: dentry, or NULL 1838 * 1839 * d_lookup searches the children of the parent dentry for the name in 1840 * question. If the dentry is found its reference count is incremented and the 1841 * dentry is returned. The caller must use dput to free the entry when it has 1842 * finished using it. %NULL is returned if the dentry does not exist. 1843 */ 1844 struct dentry *d_lookup(struct dentry *parent, struct qstr *name) 1845 { 1846 struct dentry *dentry; 1847 unsigned seq; 1848 1849 do { 1850 seq = read_seqbegin(&rename_lock); 1851 dentry = __d_lookup(parent, name); 1852 if (dentry) 1853 break; 1854 } while (read_seqretry(&rename_lock, seq)); 1855 return dentry; 1856 } 1857 EXPORT_SYMBOL(d_lookup); 1858 1859 /** 1860 * __d_lookup - search for a dentry (racy) 1861 * @parent: parent dentry 1862 * @name: qstr of name we wish to find 1863 * Returns: dentry, or NULL 1864 * 1865 * __d_lookup is like d_lookup, however it may (rarely) return a 1866 * false-negative result due to unrelated rename activity. 1867 * 1868 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 1869 * however it must be used carefully, eg. with a following d_lookup in 1870 * the case of failure. 1871 * 1872 * __d_lookup callers must be commented. 1873 */ 1874 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) 1875 { 1876 unsigned int len = name->len; 1877 unsigned int hash = name->hash; 1878 const unsigned char *str = name->name; 1879 struct dcache_hash_bucket *b = d_hash(parent, hash); 1880 struct hlist_bl_node *node; 1881 struct dentry *found = NULL; 1882 struct dentry *dentry; 1883 1884 /* 1885 * Note: There is significant duplication with __d_lookup_rcu which is 1886 * required to prevent single threaded performance regressions 1887 * especially on architectures where smp_rmb (in seqcounts) are costly. 1888 * Keep the two functions in sync. 1889 */ 1890 1891 /* 1892 * The hash list is protected using RCU. 1893 * 1894 * Take d_lock when comparing a candidate dentry, to avoid races 1895 * with d_move(). 1896 * 1897 * It is possible that concurrent renames can mess up our list 1898 * walk here and result in missing our dentry, resulting in the 1899 * false-negative result. d_lookup() protects against concurrent 1900 * renames using rename_lock seqlock. 1901 * 1902 * See Documentation/vfs/dcache-locking.txt for more details. 1903 */ 1904 rcu_read_lock(); 1905 1906 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1907 const char *tname; 1908 int tlen; 1909 1910 if (dentry->d_name.hash != hash) 1911 continue; 1912 1913 spin_lock(&dentry->d_lock); 1914 if (dentry->d_parent != parent) 1915 goto next; 1916 if (d_unhashed(dentry)) 1917 goto next; 1918 1919 /* 1920 * It is safe to compare names since d_move() cannot 1921 * change the qstr (protected by d_lock). 1922 */ 1923 tlen = dentry->d_name.len; 1924 tname = dentry->d_name.name; 1925 if (parent->d_flags & DCACHE_OP_COMPARE) { 1926 if (parent->d_op->d_compare(parent, parent->d_inode, 1927 dentry, dentry->d_inode, 1928 tlen, tname, name)) 1929 goto next; 1930 } else { 1931 if (dentry_cmp(tname, tlen, str, len)) 1932 goto next; 1933 } 1934 1935 dentry->d_count++; 1936 found = dentry; 1937 spin_unlock(&dentry->d_lock); 1938 break; 1939 next: 1940 spin_unlock(&dentry->d_lock); 1941 } 1942 rcu_read_unlock(); 1943 1944 return found; 1945 } 1946 1947 /** 1948 * d_hash_and_lookup - hash the qstr then search for a dentry 1949 * @dir: Directory to search in 1950 * @name: qstr of name we wish to find 1951 * 1952 * On hash failure or on lookup failure NULL is returned. 1953 */ 1954 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1955 { 1956 struct dentry *dentry = NULL; 1957 1958 /* 1959 * Check for a fs-specific hash function. Note that we must 1960 * calculate the standard hash first, as the d_op->d_hash() 1961 * routine may choose to leave the hash value unchanged. 1962 */ 1963 name->hash = full_name_hash(name->name, name->len); 1964 if (dir->d_flags & DCACHE_OP_HASH) { 1965 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) 1966 goto out; 1967 } 1968 dentry = d_lookup(dir, name); 1969 out: 1970 return dentry; 1971 } 1972 1973 /** 1974 * d_validate - verify dentry provided from insecure source (deprecated) 1975 * @dentry: The dentry alleged to be valid child of @dparent 1976 * @parent: The parent dentry (known to be valid) 1977 * 1978 * An insecure source has sent us a dentry, here we verify it and dget() it. 1979 * This is used by ncpfs in its readdir implementation. 1980 * Zero is returned in the dentry is invalid. 1981 * 1982 * This function is slow for big directories, and deprecated, do not use it. 1983 */ 1984 int d_validate(struct dentry *dentry, struct dentry *dparent) 1985 { 1986 struct dentry *child; 1987 1988 spin_lock(&dparent->d_lock); 1989 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 1990 if (dentry == child) { 1991 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1992 __dget_dlock(dentry); 1993 spin_unlock(&dentry->d_lock); 1994 spin_unlock(&dparent->d_lock); 1995 return 1; 1996 } 1997 } 1998 spin_unlock(&dparent->d_lock); 1999 2000 return 0; 2001 } 2002 EXPORT_SYMBOL(d_validate); 2003 2004 /* 2005 * When a file is deleted, we have two options: 2006 * - turn this dentry into a negative dentry 2007 * - unhash this dentry and free it. 2008 * 2009 * Usually, we want to just turn this into 2010 * a negative dentry, but if anybody else is 2011 * currently using the dentry or the inode 2012 * we can't do that and we fall back on removing 2013 * it from the hash queues and waiting for 2014 * it to be deleted later when it has no users 2015 */ 2016 2017 /** 2018 * d_delete - delete a dentry 2019 * @dentry: The dentry to delete 2020 * 2021 * Turn the dentry into a negative dentry if possible, otherwise 2022 * remove it from the hash queues so it can be deleted later 2023 */ 2024 2025 void d_delete(struct dentry * dentry) 2026 { 2027 struct inode *inode; 2028 int isdir = 0; 2029 /* 2030 * Are we the only user? 2031 */ 2032 again: 2033 spin_lock(&dentry->d_lock); 2034 inode = dentry->d_inode; 2035 isdir = S_ISDIR(inode->i_mode); 2036 if (dentry->d_count == 1) { 2037 if (inode && !spin_trylock(&inode->i_lock)) { 2038 spin_unlock(&dentry->d_lock); 2039 cpu_relax(); 2040 goto again; 2041 } 2042 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2043 dentry_unlink_inode(dentry); 2044 fsnotify_nameremove(dentry, isdir); 2045 return; 2046 } 2047 2048 if (!d_unhashed(dentry)) 2049 __d_drop(dentry); 2050 2051 spin_unlock(&dentry->d_lock); 2052 2053 fsnotify_nameremove(dentry, isdir); 2054 } 2055 EXPORT_SYMBOL(d_delete); 2056 2057 static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) 2058 { 2059 BUG_ON(!d_unhashed(entry)); 2060 spin_lock_bucket(b); 2061 entry->d_flags &= ~DCACHE_UNHASHED; 2062 hlist_bl_add_head_rcu(&entry->d_hash, &b->head); 2063 spin_unlock_bucket(b); 2064 } 2065 2066 static void _d_rehash(struct dentry * entry) 2067 { 2068 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 2069 } 2070 2071 /** 2072 * d_rehash - add an entry back to the hash 2073 * @entry: dentry to add to the hash 2074 * 2075 * Adds a dentry to the hash according to its name. 2076 */ 2077 2078 void d_rehash(struct dentry * entry) 2079 { 2080 spin_lock(&entry->d_lock); 2081 _d_rehash(entry); 2082 spin_unlock(&entry->d_lock); 2083 } 2084 EXPORT_SYMBOL(d_rehash); 2085 2086 /** 2087 * dentry_update_name_case - update case insensitive dentry with a new name 2088 * @dentry: dentry to be updated 2089 * @name: new name 2090 * 2091 * Update a case insensitive dentry with new case of name. 2092 * 2093 * dentry must have been returned by d_lookup with name @name. Old and new 2094 * name lengths must match (ie. no d_compare which allows mismatched name 2095 * lengths). 2096 * 2097 * Parent inode i_mutex must be held over d_lookup and into this call (to 2098 * keep renames and concurrent inserts, and readdir(2) away). 2099 */ 2100 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2101 { 2102 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2103 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2104 2105 spin_lock(&dentry->d_lock); 2106 write_seqcount_begin(&dentry->d_seq); 2107 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 2108 write_seqcount_end(&dentry->d_seq); 2109 spin_unlock(&dentry->d_lock); 2110 } 2111 EXPORT_SYMBOL(dentry_update_name_case); 2112 2113 static void switch_names(struct dentry *dentry, struct dentry *target) 2114 { 2115 if (dname_external(target)) { 2116 if (dname_external(dentry)) { 2117 /* 2118 * Both external: swap the pointers 2119 */ 2120 swap(target->d_name.name, dentry->d_name.name); 2121 } else { 2122 /* 2123 * dentry:internal, target:external. Steal target's 2124 * storage and make target internal. 2125 */ 2126 memcpy(target->d_iname, dentry->d_name.name, 2127 dentry->d_name.len + 1); 2128 dentry->d_name.name = target->d_name.name; 2129 target->d_name.name = target->d_iname; 2130 } 2131 } else { 2132 if (dname_external(dentry)) { 2133 /* 2134 * dentry:external, target:internal. Give dentry's 2135 * storage to target and make dentry internal 2136 */ 2137 memcpy(dentry->d_iname, target->d_name.name, 2138 target->d_name.len + 1); 2139 target->d_name.name = dentry->d_name.name; 2140 dentry->d_name.name = dentry->d_iname; 2141 } else { 2142 /* 2143 * Both are internal. Just copy target to dentry 2144 */ 2145 memcpy(dentry->d_iname, target->d_name.name, 2146 target->d_name.len + 1); 2147 dentry->d_name.len = target->d_name.len; 2148 return; 2149 } 2150 } 2151 swap(dentry->d_name.len, target->d_name.len); 2152 } 2153 2154 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) 2155 { 2156 /* 2157 * XXXX: do we really need to take target->d_lock? 2158 */ 2159 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) 2160 spin_lock(&target->d_parent->d_lock); 2161 else { 2162 if (d_ancestor(dentry->d_parent, target->d_parent)) { 2163 spin_lock(&dentry->d_parent->d_lock); 2164 spin_lock_nested(&target->d_parent->d_lock, 2165 DENTRY_D_LOCK_NESTED); 2166 } else { 2167 spin_lock(&target->d_parent->d_lock); 2168 spin_lock_nested(&dentry->d_parent->d_lock, 2169 DENTRY_D_LOCK_NESTED); 2170 } 2171 } 2172 if (target < dentry) { 2173 spin_lock_nested(&target->d_lock, 2); 2174 spin_lock_nested(&dentry->d_lock, 3); 2175 } else { 2176 spin_lock_nested(&dentry->d_lock, 2); 2177 spin_lock_nested(&target->d_lock, 3); 2178 } 2179 } 2180 2181 static void dentry_unlock_parents_for_move(struct dentry *dentry, 2182 struct dentry *target) 2183 { 2184 if (target->d_parent != dentry->d_parent) 2185 spin_unlock(&dentry->d_parent->d_lock); 2186 if (target->d_parent != target) 2187 spin_unlock(&target->d_parent->d_lock); 2188 } 2189 2190 /* 2191 * When switching names, the actual string doesn't strictly have to 2192 * be preserved in the target - because we're dropping the target 2193 * anyway. As such, we can just do a simple memcpy() to copy over 2194 * the new name before we switch. 2195 * 2196 * Note that we have to be a lot more careful about getting the hash 2197 * switched - we have to switch the hash value properly even if it 2198 * then no longer matches the actual (corrupted) string of the target. 2199 * The hash value has to match the hash queue that the dentry is on.. 2200 */ 2201 /* 2202 * d_move - move a dentry 2203 * @dentry: entry to move 2204 * @target: new dentry 2205 * 2206 * Update the dcache to reflect the move of a file name. Negative 2207 * dcache entries should not be moved in this way. 2208 */ 2209 void d_move(struct dentry * dentry, struct dentry * target) 2210 { 2211 if (!dentry->d_inode) 2212 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2213 2214 BUG_ON(d_ancestor(dentry, target)); 2215 BUG_ON(d_ancestor(target, dentry)); 2216 2217 write_seqlock(&rename_lock); 2218 2219 dentry_lock_for_move(dentry, target); 2220 2221 write_seqcount_begin(&dentry->d_seq); 2222 write_seqcount_begin(&target->d_seq); 2223 2224 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2225 2226 /* 2227 * Move the dentry to the target hash queue. Don't bother checking 2228 * for the same hash queue because of how unlikely it is. 2229 */ 2230 __d_drop(dentry); 2231 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 2232 2233 /* Unhash the target: dput() will then get rid of it */ 2234 __d_drop(target); 2235 2236 list_del(&dentry->d_u.d_child); 2237 list_del(&target->d_u.d_child); 2238 2239 /* Switch the names.. */ 2240 switch_names(dentry, target); 2241 swap(dentry->d_name.hash, target->d_name.hash); 2242 2243 /* ... and switch the parents */ 2244 if (IS_ROOT(dentry)) { 2245 dentry->d_parent = target->d_parent; 2246 target->d_parent = target; 2247 INIT_LIST_HEAD(&target->d_u.d_child); 2248 } else { 2249 swap(dentry->d_parent, target->d_parent); 2250 2251 /* And add them back to the (new) parent lists */ 2252 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 2253 } 2254 2255 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2256 2257 write_seqcount_end(&target->d_seq); 2258 write_seqcount_end(&dentry->d_seq); 2259 2260 dentry_unlock_parents_for_move(dentry, target); 2261 spin_unlock(&target->d_lock); 2262 fsnotify_d_move(dentry); 2263 spin_unlock(&dentry->d_lock); 2264 write_sequnlock(&rename_lock); 2265 } 2266 EXPORT_SYMBOL(d_move); 2267 2268 /** 2269 * d_ancestor - search for an ancestor 2270 * @p1: ancestor dentry 2271 * @p2: child dentry 2272 * 2273 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 2274 * an ancestor of p2, else NULL. 2275 */ 2276 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 2277 { 2278 struct dentry *p; 2279 2280 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 2281 if (p->d_parent == p1) 2282 return p; 2283 } 2284 return NULL; 2285 } 2286 2287 /* 2288 * This helper attempts to cope with remotely renamed directories 2289 * 2290 * It assumes that the caller is already holding 2291 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock 2292 * 2293 * Note: If ever the locking in lock_rename() changes, then please 2294 * remember to update this too... 2295 */ 2296 static struct dentry *__d_unalias(struct inode *inode, 2297 struct dentry *dentry, struct dentry *alias) 2298 { 2299 struct mutex *m1 = NULL, *m2 = NULL; 2300 struct dentry *ret; 2301 2302 /* If alias and dentry share a parent, then no extra locks required */ 2303 if (alias->d_parent == dentry->d_parent) 2304 goto out_unalias; 2305 2306 /* Check for loops */ 2307 ret = ERR_PTR(-ELOOP); 2308 if (d_ancestor(alias, dentry)) 2309 goto out_err; 2310 2311 /* See lock_rename() */ 2312 ret = ERR_PTR(-EBUSY); 2313 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2314 goto out_err; 2315 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2316 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2317 goto out_err; 2318 m2 = &alias->d_parent->d_inode->i_mutex; 2319 out_unalias: 2320 d_move(alias, dentry); 2321 ret = alias; 2322 out_err: 2323 spin_unlock(&inode->i_lock); 2324 if (m2) 2325 mutex_unlock(m2); 2326 if (m1) 2327 mutex_unlock(m1); 2328 return ret; 2329 } 2330 2331 /* 2332 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2333 * named dentry in place of the dentry to be replaced. 2334 * returns with anon->d_lock held! 2335 */ 2336 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2337 { 2338 struct dentry *dparent, *aparent; 2339 2340 dentry_lock_for_move(anon, dentry); 2341 2342 write_seqcount_begin(&dentry->d_seq); 2343 write_seqcount_begin(&anon->d_seq); 2344 2345 dparent = dentry->d_parent; 2346 aparent = anon->d_parent; 2347 2348 switch_names(dentry, anon); 2349 swap(dentry->d_name.hash, anon->d_name.hash); 2350 2351 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2352 list_del(&dentry->d_u.d_child); 2353 if (!IS_ROOT(dentry)) 2354 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2355 else 2356 INIT_LIST_HEAD(&dentry->d_u.d_child); 2357 2358 anon->d_parent = (dparent == dentry) ? anon : dparent; 2359 list_del(&anon->d_u.d_child); 2360 if (!IS_ROOT(anon)) 2361 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 2362 else 2363 INIT_LIST_HEAD(&anon->d_u.d_child); 2364 2365 write_seqcount_end(&dentry->d_seq); 2366 write_seqcount_end(&anon->d_seq); 2367 2368 dentry_unlock_parents_for_move(anon, dentry); 2369 spin_unlock(&dentry->d_lock); 2370 2371 /* anon->d_lock still locked, returns locked */ 2372 anon->d_flags &= ~DCACHE_DISCONNECTED; 2373 } 2374 2375 /** 2376 * d_materialise_unique - introduce an inode into the tree 2377 * @dentry: candidate dentry 2378 * @inode: inode to bind to the dentry, to which aliases may be attached 2379 * 2380 * Introduces an dentry into the tree, substituting an extant disconnected 2381 * root directory alias in its place if there is one 2382 */ 2383 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2384 { 2385 struct dentry *actual; 2386 2387 BUG_ON(!d_unhashed(dentry)); 2388 2389 if (!inode) { 2390 actual = dentry; 2391 __d_instantiate(dentry, NULL); 2392 d_rehash(actual); 2393 goto out_nolock; 2394 } 2395 2396 spin_lock(&inode->i_lock); 2397 2398 if (S_ISDIR(inode->i_mode)) { 2399 struct dentry *alias; 2400 2401 /* Does an aliased dentry already exist? */ 2402 alias = __d_find_alias(inode, 0); 2403 if (alias) { 2404 actual = alias; 2405 /* Is this an anonymous mountpoint that we could splice 2406 * into our tree? */ 2407 if (IS_ROOT(alias)) { 2408 __d_materialise_dentry(dentry, alias); 2409 __d_drop(alias); 2410 goto found; 2411 } 2412 /* Nope, but we must(!) avoid directory aliasing */ 2413 actual = __d_unalias(inode, dentry, alias); 2414 if (IS_ERR(actual)) 2415 dput(alias); 2416 goto out_nolock; 2417 } 2418 } 2419 2420 /* Add a unique reference */ 2421 actual = __d_instantiate_unique(dentry, inode); 2422 if (!actual) 2423 actual = dentry; 2424 else 2425 BUG_ON(!d_unhashed(actual)); 2426 2427 spin_lock(&actual->d_lock); 2428 found: 2429 _d_rehash(actual); 2430 spin_unlock(&actual->d_lock); 2431 spin_unlock(&inode->i_lock); 2432 out_nolock: 2433 if (actual == dentry) { 2434 security_d_instantiate(dentry, inode); 2435 return NULL; 2436 } 2437 2438 iput(inode); 2439 return actual; 2440 } 2441 EXPORT_SYMBOL_GPL(d_materialise_unique); 2442 2443 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2444 { 2445 *buflen -= namelen; 2446 if (*buflen < 0) 2447 return -ENAMETOOLONG; 2448 *buffer -= namelen; 2449 memcpy(*buffer, str, namelen); 2450 return 0; 2451 } 2452 2453 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2454 { 2455 return prepend(buffer, buflen, name->name, name->len); 2456 } 2457 2458 /** 2459 * prepend_path - Prepend path string to a buffer 2460 * @path: the dentry/vfsmount to report 2461 * @root: root vfsmnt/dentry (may be modified by this function) 2462 * @buffer: pointer to the end of the buffer 2463 * @buflen: pointer to buffer length 2464 * 2465 * Caller holds the rename_lock. 2466 * 2467 * If path is not reachable from the supplied root, then the value of 2468 * root is changed (without modifying refcounts). 2469 */ 2470 static int prepend_path(const struct path *path, struct path *root, 2471 char **buffer, int *buflen) 2472 { 2473 struct dentry *dentry = path->dentry; 2474 struct vfsmount *vfsmnt = path->mnt; 2475 bool slash = false; 2476 int error = 0; 2477 2478 br_read_lock(vfsmount_lock); 2479 while (dentry != root->dentry || vfsmnt != root->mnt) { 2480 struct dentry * parent; 2481 2482 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2483 /* Global root? */ 2484 if (vfsmnt->mnt_parent == vfsmnt) { 2485 goto global_root; 2486 } 2487 dentry = vfsmnt->mnt_mountpoint; 2488 vfsmnt = vfsmnt->mnt_parent; 2489 continue; 2490 } 2491 parent = dentry->d_parent; 2492 prefetch(parent); 2493 spin_lock(&dentry->d_lock); 2494 error = prepend_name(buffer, buflen, &dentry->d_name); 2495 spin_unlock(&dentry->d_lock); 2496 if (!error) 2497 error = prepend(buffer, buflen, "/", 1); 2498 if (error) 2499 break; 2500 2501 slash = true; 2502 dentry = parent; 2503 } 2504 2505 out: 2506 if (!error && !slash) 2507 error = prepend(buffer, buflen, "/", 1); 2508 2509 br_read_unlock(vfsmount_lock); 2510 return error; 2511 2512 global_root: 2513 /* 2514 * Filesystems needing to implement special "root names" 2515 * should do so with ->d_dname() 2516 */ 2517 if (IS_ROOT(dentry) && 2518 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { 2519 WARN(1, "Root dentry has weird name <%.*s>\n", 2520 (int) dentry->d_name.len, dentry->d_name.name); 2521 } 2522 root->mnt = vfsmnt; 2523 root->dentry = dentry; 2524 goto out; 2525 } 2526 2527 /** 2528 * __d_path - return the path of a dentry 2529 * @path: the dentry/vfsmount to report 2530 * @root: root vfsmnt/dentry (may be modified by this function) 2531 * @buf: buffer to return value in 2532 * @buflen: buffer length 2533 * 2534 * Convert a dentry into an ASCII path name. 2535 * 2536 * Returns a pointer into the buffer or an error code if the 2537 * path was too long. 2538 * 2539 * "buflen" should be positive. 2540 * 2541 * If path is not reachable from the supplied root, then the value of 2542 * root is changed (without modifying refcounts). 2543 */ 2544 char *__d_path(const struct path *path, struct path *root, 2545 char *buf, int buflen) 2546 { 2547 char *res = buf + buflen; 2548 int error; 2549 2550 prepend(&res, &buflen, "\0", 1); 2551 write_seqlock(&rename_lock); 2552 error = prepend_path(path, root, &res, &buflen); 2553 write_sequnlock(&rename_lock); 2554 2555 if (error) 2556 return ERR_PTR(error); 2557 return res; 2558 } 2559 2560 /* 2561 * same as __d_path but appends "(deleted)" for unlinked files. 2562 */ 2563 static int path_with_deleted(const struct path *path, struct path *root, 2564 char **buf, int *buflen) 2565 { 2566 prepend(buf, buflen, "\0", 1); 2567 if (d_unlinked(path->dentry)) { 2568 int error = prepend(buf, buflen, " (deleted)", 10); 2569 if (error) 2570 return error; 2571 } 2572 2573 return prepend_path(path, root, buf, buflen); 2574 } 2575 2576 static int prepend_unreachable(char **buffer, int *buflen) 2577 { 2578 return prepend(buffer, buflen, "(unreachable)", 13); 2579 } 2580 2581 /** 2582 * d_path - return the path of a dentry 2583 * @path: path to report 2584 * @buf: buffer to return value in 2585 * @buflen: buffer length 2586 * 2587 * Convert a dentry into an ASCII path name. If the entry has been deleted 2588 * the string " (deleted)" is appended. Note that this is ambiguous. 2589 * 2590 * Returns a pointer into the buffer or an error code if the path was 2591 * too long. Note: Callers should use the returned pointer, not the passed 2592 * in buffer, to use the name! The implementation often starts at an offset 2593 * into the buffer, and may leave 0 bytes at the start. 2594 * 2595 * "buflen" should be positive. 2596 */ 2597 char *d_path(const struct path *path, char *buf, int buflen) 2598 { 2599 char *res = buf + buflen; 2600 struct path root; 2601 struct path tmp; 2602 int error; 2603 2604 /* 2605 * We have various synthetic filesystems that never get mounted. On 2606 * these filesystems dentries are never used for lookup purposes, and 2607 * thus don't need to be hashed. They also don't need a name until a 2608 * user wants to identify the object in /proc/pid/fd/. The little hack 2609 * below allows us to generate a name for these objects on demand: 2610 */ 2611 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2612 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2613 2614 get_fs_root(current->fs, &root); 2615 write_seqlock(&rename_lock); 2616 tmp = root; 2617 error = path_with_deleted(path, &tmp, &res, &buflen); 2618 if (error) 2619 res = ERR_PTR(error); 2620 write_sequnlock(&rename_lock); 2621 path_put(&root); 2622 return res; 2623 } 2624 EXPORT_SYMBOL(d_path); 2625 2626 /** 2627 * d_path_with_unreachable - return the path of a dentry 2628 * @path: path to report 2629 * @buf: buffer to return value in 2630 * @buflen: buffer length 2631 * 2632 * The difference from d_path() is that this prepends "(unreachable)" 2633 * to paths which are unreachable from the current process' root. 2634 */ 2635 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) 2636 { 2637 char *res = buf + buflen; 2638 struct path root; 2639 struct path tmp; 2640 int error; 2641 2642 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2643 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2644 2645 get_fs_root(current->fs, &root); 2646 write_seqlock(&rename_lock); 2647 tmp = root; 2648 error = path_with_deleted(path, &tmp, &res, &buflen); 2649 if (!error && !path_equal(&tmp, &root)) 2650 error = prepend_unreachable(&res, &buflen); 2651 write_sequnlock(&rename_lock); 2652 path_put(&root); 2653 if (error) 2654 res = ERR_PTR(error); 2655 2656 return res; 2657 } 2658 2659 /* 2660 * Helper function for dentry_operations.d_dname() members 2661 */ 2662 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2663 const char *fmt, ...) 2664 { 2665 va_list args; 2666 char temp[64]; 2667 int sz; 2668 2669 va_start(args, fmt); 2670 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2671 va_end(args); 2672 2673 if (sz > sizeof(temp) || sz > buflen) 2674 return ERR_PTR(-ENAMETOOLONG); 2675 2676 buffer += buflen - sz; 2677 return memcpy(buffer, temp, sz); 2678 } 2679 2680 /* 2681 * Write full pathname from the root of the filesystem into the buffer. 2682 */ 2683 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2684 { 2685 char *end = buf + buflen; 2686 char *retval; 2687 2688 prepend(&end, &buflen, "\0", 1); 2689 if (buflen < 1) 2690 goto Elong; 2691 /* Get '/' right */ 2692 retval = end-1; 2693 *retval = '/'; 2694 2695 while (!IS_ROOT(dentry)) { 2696 struct dentry *parent = dentry->d_parent; 2697 int error; 2698 2699 prefetch(parent); 2700 spin_lock(&dentry->d_lock); 2701 error = prepend_name(&end, &buflen, &dentry->d_name); 2702 spin_unlock(&dentry->d_lock); 2703 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) 2704 goto Elong; 2705 2706 retval = end; 2707 dentry = parent; 2708 } 2709 return retval; 2710 Elong: 2711 return ERR_PTR(-ENAMETOOLONG); 2712 } 2713 2714 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2715 { 2716 char *retval; 2717 2718 write_seqlock(&rename_lock); 2719 retval = __dentry_path(dentry, buf, buflen); 2720 write_sequnlock(&rename_lock); 2721 2722 return retval; 2723 } 2724 EXPORT_SYMBOL(dentry_path_raw); 2725 2726 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2727 { 2728 char *p = NULL; 2729 char *retval; 2730 2731 write_seqlock(&rename_lock); 2732 if (d_unlinked(dentry)) { 2733 p = buf + buflen; 2734 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2735 goto Elong; 2736 buflen++; 2737 } 2738 retval = __dentry_path(dentry, buf, buflen); 2739 write_sequnlock(&rename_lock); 2740 if (!IS_ERR(retval) && p) 2741 *p = '/'; /* restore '/' overriden with '\0' */ 2742 return retval; 2743 Elong: 2744 return ERR_PTR(-ENAMETOOLONG); 2745 } 2746 2747 /* 2748 * NOTE! The user-level library version returns a 2749 * character pointer. The kernel system call just 2750 * returns the length of the buffer filled (which 2751 * includes the ending '\0' character), or a negative 2752 * error value. So libc would do something like 2753 * 2754 * char *getcwd(char * buf, size_t size) 2755 * { 2756 * int retval; 2757 * 2758 * retval = sys_getcwd(buf, size); 2759 * if (retval >= 0) 2760 * return buf; 2761 * errno = -retval; 2762 * return NULL; 2763 * } 2764 */ 2765 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 2766 { 2767 int error; 2768 struct path pwd, root; 2769 char *page = (char *) __get_free_page(GFP_USER); 2770 2771 if (!page) 2772 return -ENOMEM; 2773 2774 get_fs_root_and_pwd(current->fs, &root, &pwd); 2775 2776 error = -ENOENT; 2777 write_seqlock(&rename_lock); 2778 if (!d_unlinked(pwd.dentry)) { 2779 unsigned long len; 2780 struct path tmp = root; 2781 char *cwd = page + PAGE_SIZE; 2782 int buflen = PAGE_SIZE; 2783 2784 prepend(&cwd, &buflen, "\0", 1); 2785 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2786 write_sequnlock(&rename_lock); 2787 2788 if (error) 2789 goto out; 2790 2791 /* Unreachable from current root */ 2792 if (!path_equal(&tmp, &root)) { 2793 error = prepend_unreachable(&cwd, &buflen); 2794 if (error) 2795 goto out; 2796 } 2797 2798 error = -ERANGE; 2799 len = PAGE_SIZE + page - cwd; 2800 if (len <= size) { 2801 error = len; 2802 if (copy_to_user(buf, cwd, len)) 2803 error = -EFAULT; 2804 } 2805 } else { 2806 write_sequnlock(&rename_lock); 2807 } 2808 2809 out: 2810 path_put(&pwd); 2811 path_put(&root); 2812 free_page((unsigned long) page); 2813 return error; 2814 } 2815 2816 /* 2817 * Test whether new_dentry is a subdirectory of old_dentry. 2818 * 2819 * Trivially implemented using the dcache structure 2820 */ 2821 2822 /** 2823 * is_subdir - is new dentry a subdirectory of old_dentry 2824 * @new_dentry: new dentry 2825 * @old_dentry: old dentry 2826 * 2827 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 2828 * Returns 0 otherwise. 2829 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2830 */ 2831 2832 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2833 { 2834 int result; 2835 unsigned seq; 2836 2837 if (new_dentry == old_dentry) 2838 return 1; 2839 2840 do { 2841 /* for restarting inner loop in case of seq retry */ 2842 seq = read_seqbegin(&rename_lock); 2843 /* 2844 * Need rcu_readlock to protect against the d_parent trashing 2845 * due to d_move 2846 */ 2847 rcu_read_lock(); 2848 if (d_ancestor(old_dentry, new_dentry)) 2849 result = 1; 2850 else 2851 result = 0; 2852 rcu_read_unlock(); 2853 } while (read_seqretry(&rename_lock, seq)); 2854 2855 return result; 2856 } 2857 2858 int path_is_under(struct path *path1, struct path *path2) 2859 { 2860 struct vfsmount *mnt = path1->mnt; 2861 struct dentry *dentry = path1->dentry; 2862 int res; 2863 2864 br_read_lock(vfsmount_lock); 2865 if (mnt != path2->mnt) { 2866 for (;;) { 2867 if (mnt->mnt_parent == mnt) { 2868 br_read_unlock(vfsmount_lock); 2869 return 0; 2870 } 2871 if (mnt->mnt_parent == path2->mnt) 2872 break; 2873 mnt = mnt->mnt_parent; 2874 } 2875 dentry = mnt->mnt_mountpoint; 2876 } 2877 res = is_subdir(dentry, path2->dentry); 2878 br_read_unlock(vfsmount_lock); 2879 return res; 2880 } 2881 EXPORT_SYMBOL(path_is_under); 2882 2883 void d_genocide(struct dentry *root) 2884 { 2885 struct dentry *this_parent; 2886 struct list_head *next; 2887 unsigned seq; 2888 int locked = 0; 2889 2890 seq = read_seqbegin(&rename_lock); 2891 again: 2892 this_parent = root; 2893 spin_lock(&this_parent->d_lock); 2894 repeat: 2895 next = this_parent->d_subdirs.next; 2896 resume: 2897 while (next != &this_parent->d_subdirs) { 2898 struct list_head *tmp = next; 2899 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2900 next = tmp->next; 2901 2902 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2903 if (d_unhashed(dentry) || !dentry->d_inode) { 2904 spin_unlock(&dentry->d_lock); 2905 continue; 2906 } 2907 if (!list_empty(&dentry->d_subdirs)) { 2908 spin_unlock(&this_parent->d_lock); 2909 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 2910 this_parent = dentry; 2911 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 2912 goto repeat; 2913 } 2914 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 2915 dentry->d_flags |= DCACHE_GENOCIDE; 2916 dentry->d_count--; 2917 } 2918 spin_unlock(&dentry->d_lock); 2919 } 2920 if (this_parent != root) { 2921 struct dentry *tmp; 2922 struct dentry *child; 2923 2924 tmp = this_parent->d_parent; 2925 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2926 this_parent->d_flags |= DCACHE_GENOCIDE; 2927 this_parent->d_count--; 2928 } 2929 rcu_read_lock(); 2930 spin_unlock(&this_parent->d_lock); 2931 child = this_parent; 2932 this_parent = tmp; 2933 spin_lock(&this_parent->d_lock); 2934 /* might go back up the wrong parent if we have had a rename 2935 * or deletion */ 2936 if (this_parent != child->d_parent || 2937 (!locked && read_seqretry(&rename_lock, seq))) { 2938 spin_unlock(&this_parent->d_lock); 2939 rcu_read_unlock(); 2940 goto rename_retry; 2941 } 2942 rcu_read_unlock(); 2943 next = child->d_u.d_child.next; 2944 goto resume; 2945 } 2946 spin_unlock(&this_parent->d_lock); 2947 if (!locked && read_seqretry(&rename_lock, seq)) 2948 goto rename_retry; 2949 if (locked) 2950 write_sequnlock(&rename_lock); 2951 return; 2952 2953 rename_retry: 2954 locked = 1; 2955 write_seqlock(&rename_lock); 2956 goto again; 2957 } 2958 2959 /** 2960 * find_inode_number - check for dentry with name 2961 * @dir: directory to check 2962 * @name: Name to find. 2963 * 2964 * Check whether a dentry already exists for the given name, 2965 * and return the inode number if it has an inode. Otherwise 2966 * 0 is returned. 2967 * 2968 * This routine is used to post-process directory listings for 2969 * filesystems using synthetic inode numbers, and is necessary 2970 * to keep getcwd() working. 2971 */ 2972 2973 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 2974 { 2975 struct dentry * dentry; 2976 ino_t ino = 0; 2977 2978 dentry = d_hash_and_lookup(dir, name); 2979 if (dentry) { 2980 if (dentry->d_inode) 2981 ino = dentry->d_inode->i_ino; 2982 dput(dentry); 2983 } 2984 return ino; 2985 } 2986 EXPORT_SYMBOL(find_inode_number); 2987 2988 static __initdata unsigned long dhash_entries; 2989 static int __init set_dhash_entries(char *str) 2990 { 2991 if (!str) 2992 return 0; 2993 dhash_entries = simple_strtoul(str, &str, 0); 2994 return 1; 2995 } 2996 __setup("dhash_entries=", set_dhash_entries); 2997 2998 static void __init dcache_init_early(void) 2999 { 3000 int loop; 3001 3002 /* If hashes are distributed across NUMA nodes, defer 3003 * hash allocation until vmalloc space is available. 3004 */ 3005 if (hashdist) 3006 return; 3007 3008 dentry_hashtable = 3009 alloc_large_system_hash("Dentry cache", 3010 sizeof(struct dcache_hash_bucket), 3011 dhash_entries, 3012 13, 3013 HASH_EARLY, 3014 &d_hash_shift, 3015 &d_hash_mask, 3016 0); 3017 3018 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3019 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3020 } 3021 3022 static void __init dcache_init(void) 3023 { 3024 int loop; 3025 3026 /* 3027 * A constructor could be added for stable state like the lists, 3028 * but it is probably not worth it because of the cache nature 3029 * of the dcache. 3030 */ 3031 dentry_cache = KMEM_CACHE(dentry, 3032 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3033 3034 register_shrinker(&dcache_shrinker); 3035 3036 /* Hash may have been set up in dcache_init_early */ 3037 if (!hashdist) 3038 return; 3039 3040 dentry_hashtable = 3041 alloc_large_system_hash("Dentry cache", 3042 sizeof(struct dcache_hash_bucket), 3043 dhash_entries, 3044 13, 3045 0, 3046 &d_hash_shift, 3047 &d_hash_mask, 3048 0); 3049 3050 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3051 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3052 } 3053 3054 /* SLAB cache for __getname() consumers */ 3055 struct kmem_cache *names_cachep __read_mostly; 3056 EXPORT_SYMBOL(names_cachep); 3057 3058 EXPORT_SYMBOL(d_genocide); 3059 3060 void __init vfs_caches_init_early(void) 3061 { 3062 dcache_init_early(); 3063 inode_init_early(); 3064 } 3065 3066 void __init vfs_caches_init(unsigned long mempages) 3067 { 3068 unsigned long reserve; 3069 3070 /* Base hash sizes on available memory, with a reserve equal to 3071 150% of current kernel size */ 3072 3073 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3074 mempages -= reserve; 3075 3076 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3077 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3078 3079 dcache_init(); 3080 inode_init(); 3081 files_init(mempages); 3082 mnt_init(); 3083 bdev_cache_init(); 3084 chrdev_init(); 3085 } 3086