1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/module.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include <linux/fs_struct.h> 35 #include <linux/hardirq.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/rculist_bl.h> 38 #include "internal.h" 39 40 /* 41 * Usage: 42 * dcache->d_inode->i_lock protects: 43 * - i_dentry, d_alias, d_inode of aliases 44 * dcache_hash_bucket lock protects: 45 * - the dcache hash table 46 * s_anon bl list spinlock protects: 47 * - the s_anon list (see __d_drop) 48 * dcache_lru_lock protects: 49 * - the dcache lru lists and counters 50 * d_lock protects: 51 * - d_flags 52 * - d_name 53 * - d_lru 54 * - d_count 55 * - d_unhashed() 56 * - d_parent and d_subdirs 57 * - childrens' d_child and d_parent 58 * - d_alias, d_inode 59 * 60 * Ordering: 61 * dentry->d_inode->i_lock 62 * dentry->d_lock 63 * dcache_lru_lock 64 * dcache_hash_bucket lock 65 * s_anon lock 66 * 67 * If there is an ancestor relationship: 68 * dentry->d_parent->...->d_parent->d_lock 69 * ... 70 * dentry->d_parent->d_lock 71 * dentry->d_lock 72 * 73 * If no ancestor relationship: 74 * if (dentry1 < dentry2) 75 * dentry1->d_lock 76 * dentry2->d_lock 77 */ 78 int sysctl_vfs_cache_pressure __read_mostly = 100; 79 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 80 81 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 82 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 83 84 EXPORT_SYMBOL(rename_lock); 85 86 static struct kmem_cache *dentry_cache __read_mostly; 87 88 /* 89 * This is the single most critical data structure when it comes 90 * to the dcache: the hashtable for lookups. Somebody should try 91 * to make this good - I've just made it work. 92 * 93 * This hash-function tries to avoid losing too many bits of hash 94 * information, yet avoid using a prime hash-size or similar. 95 */ 96 #define D_HASHBITS d_hash_shift 97 #define D_HASHMASK d_hash_mask 98 99 static unsigned int d_hash_mask __read_mostly; 100 static unsigned int d_hash_shift __read_mostly; 101 102 struct dcache_hash_bucket { 103 struct hlist_bl_head head; 104 }; 105 static struct dcache_hash_bucket *dentry_hashtable __read_mostly; 106 107 static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, 108 unsigned long hash) 109 { 110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); 112 return dentry_hashtable + (hash & D_HASHMASK); 113 } 114 115 static inline void spin_lock_bucket(struct dcache_hash_bucket *b) 116 { 117 bit_spin_lock(0, (unsigned long *)&b->head.first); 118 } 119 120 static inline void spin_unlock_bucket(struct dcache_hash_bucket *b) 121 { 122 __bit_spin_unlock(0, (unsigned long *)&b->head.first); 123 } 124 125 /* Statistics gathering. */ 126 struct dentry_stat_t dentry_stat = { 127 .age_limit = 45, 128 }; 129 130 static DEFINE_PER_CPU(unsigned int, nr_dentry); 131 132 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 133 static int get_nr_dentry(void) 134 { 135 int i; 136 int sum = 0; 137 for_each_possible_cpu(i) 138 sum += per_cpu(nr_dentry, i); 139 return sum < 0 ? 0 : sum; 140 } 141 142 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 143 size_t *lenp, loff_t *ppos) 144 { 145 dentry_stat.nr_dentry = get_nr_dentry(); 146 return proc_dointvec(table, write, buffer, lenp, ppos); 147 } 148 #endif 149 150 static void __d_free(struct rcu_head *head) 151 { 152 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 153 154 WARN_ON(!list_empty(&dentry->d_alias)); 155 if (dname_external(dentry)) 156 kfree(dentry->d_name.name); 157 kmem_cache_free(dentry_cache, dentry); 158 } 159 160 /* 161 * no locks, please. 162 */ 163 static void d_free(struct dentry *dentry) 164 { 165 BUG_ON(dentry->d_count); 166 this_cpu_dec(nr_dentry); 167 if (dentry->d_op && dentry->d_op->d_release) 168 dentry->d_op->d_release(dentry); 169 170 /* if dentry was never inserted into hash, immediate free is OK */ 171 if (hlist_bl_unhashed(&dentry->d_hash)) 172 __d_free(&dentry->d_u.d_rcu); 173 else 174 call_rcu(&dentry->d_u.d_rcu, __d_free); 175 } 176 177 /** 178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 179 * @dentry: the target dentry 180 * After this call, in-progress rcu-walk path lookup will fail. This 181 * should be called after unhashing, and after changing d_inode (if 182 * the dentry has not already been unhashed). 183 */ 184 static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 185 { 186 assert_spin_locked(&dentry->d_lock); 187 /* Go through a barrier */ 188 write_seqcount_barrier(&dentry->d_seq); 189 } 190 191 /* 192 * Release the dentry's inode, using the filesystem 193 * d_iput() operation if defined. Dentry has no refcount 194 * and is unhashed. 195 */ 196 static void dentry_iput(struct dentry * dentry) 197 __releases(dentry->d_lock) 198 __releases(dentry->d_inode->i_lock) 199 { 200 struct inode *inode = dentry->d_inode; 201 if (inode) { 202 dentry->d_inode = NULL; 203 list_del_init(&dentry->d_alias); 204 spin_unlock(&dentry->d_lock); 205 spin_unlock(&inode->i_lock); 206 if (!inode->i_nlink) 207 fsnotify_inoderemove(inode); 208 if (dentry->d_op && dentry->d_op->d_iput) 209 dentry->d_op->d_iput(dentry, inode); 210 else 211 iput(inode); 212 } else { 213 spin_unlock(&dentry->d_lock); 214 } 215 } 216 217 /* 218 * Release the dentry's inode, using the filesystem 219 * d_iput() operation if defined. dentry remains in-use. 220 */ 221 static void dentry_unlink_inode(struct dentry * dentry) 222 __releases(dentry->d_lock) 223 __releases(dentry->d_inode->i_lock) 224 { 225 struct inode *inode = dentry->d_inode; 226 dentry->d_inode = NULL; 227 list_del_init(&dentry->d_alias); 228 dentry_rcuwalk_barrier(dentry); 229 spin_unlock(&dentry->d_lock); 230 spin_unlock(&inode->i_lock); 231 if (!inode->i_nlink) 232 fsnotify_inoderemove(inode); 233 if (dentry->d_op && dentry->d_op->d_iput) 234 dentry->d_op->d_iput(dentry, inode); 235 else 236 iput(inode); 237 } 238 239 /* 240 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 241 */ 242 static void dentry_lru_add(struct dentry *dentry) 243 { 244 if (list_empty(&dentry->d_lru)) { 245 spin_lock(&dcache_lru_lock); 246 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 247 dentry->d_sb->s_nr_dentry_unused++; 248 dentry_stat.nr_unused++; 249 spin_unlock(&dcache_lru_lock); 250 } 251 } 252 253 static void __dentry_lru_del(struct dentry *dentry) 254 { 255 list_del_init(&dentry->d_lru); 256 dentry->d_sb->s_nr_dentry_unused--; 257 dentry_stat.nr_unused--; 258 } 259 260 static void dentry_lru_del(struct dentry *dentry) 261 { 262 if (!list_empty(&dentry->d_lru)) { 263 spin_lock(&dcache_lru_lock); 264 __dentry_lru_del(dentry); 265 spin_unlock(&dcache_lru_lock); 266 } 267 } 268 269 static void dentry_lru_move_tail(struct dentry *dentry) 270 { 271 spin_lock(&dcache_lru_lock); 272 if (list_empty(&dentry->d_lru)) { 273 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 274 dentry->d_sb->s_nr_dentry_unused++; 275 dentry_stat.nr_unused++; 276 } else { 277 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 278 } 279 spin_unlock(&dcache_lru_lock); 280 } 281 282 /** 283 * d_kill - kill dentry and return parent 284 * @dentry: dentry to kill 285 * @parent: parent dentry 286 * 287 * The dentry must already be unhashed and removed from the LRU. 288 * 289 * If this is the root of the dentry tree, return NULL. 290 * 291 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 292 * d_kill. 293 */ 294 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 295 __releases(dentry->d_lock) 296 __releases(parent->d_lock) 297 __releases(dentry->d_inode->i_lock) 298 { 299 list_del(&dentry->d_u.d_child); 300 /* 301 * Inform try_to_ascend() that we are no longer attached to the 302 * dentry tree 303 */ 304 dentry->d_flags |= DCACHE_DISCONNECTED; 305 if (parent) 306 spin_unlock(&parent->d_lock); 307 dentry_iput(dentry); 308 /* 309 * dentry_iput drops the locks, at which point nobody (except 310 * transient RCU lookups) can reach this dentry. 311 */ 312 d_free(dentry); 313 return parent; 314 } 315 316 /** 317 * d_drop - drop a dentry 318 * @dentry: dentry to drop 319 * 320 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 321 * be found through a VFS lookup any more. Note that this is different from 322 * deleting the dentry - d_delete will try to mark the dentry negative if 323 * possible, giving a successful _negative_ lookup, while d_drop will 324 * just make the cache lookup fail. 325 * 326 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 327 * reason (NFS timeouts or autofs deletes). 328 * 329 * __d_drop requires dentry->d_lock. 330 */ 331 void __d_drop(struct dentry *dentry) 332 { 333 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 334 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { 335 bit_spin_lock(0, 336 (unsigned long *)&dentry->d_sb->s_anon.first); 337 dentry->d_flags |= DCACHE_UNHASHED; 338 hlist_bl_del_init(&dentry->d_hash); 339 __bit_spin_unlock(0, 340 (unsigned long *)&dentry->d_sb->s_anon.first); 341 } else { 342 struct dcache_hash_bucket *b; 343 b = d_hash(dentry->d_parent, dentry->d_name.hash); 344 spin_lock_bucket(b); 345 /* 346 * We may not actually need to put DCACHE_UNHASHED 347 * manipulations under the hash lock, but follow 348 * the principle of least surprise. 349 */ 350 dentry->d_flags |= DCACHE_UNHASHED; 351 hlist_bl_del_rcu(&dentry->d_hash); 352 spin_unlock_bucket(b); 353 dentry_rcuwalk_barrier(dentry); 354 } 355 } 356 } 357 EXPORT_SYMBOL(__d_drop); 358 359 void d_drop(struct dentry *dentry) 360 { 361 spin_lock(&dentry->d_lock); 362 __d_drop(dentry); 363 spin_unlock(&dentry->d_lock); 364 } 365 EXPORT_SYMBOL(d_drop); 366 367 /* 368 * Finish off a dentry we've decided to kill. 369 * dentry->d_lock must be held, returns with it unlocked. 370 * If ref is non-zero, then decrement the refcount too. 371 * Returns dentry requiring refcount drop, or NULL if we're done. 372 */ 373 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) 374 __releases(dentry->d_lock) 375 { 376 struct inode *inode; 377 struct dentry *parent; 378 379 inode = dentry->d_inode; 380 if (inode && !spin_trylock(&inode->i_lock)) { 381 relock: 382 spin_unlock(&dentry->d_lock); 383 cpu_relax(); 384 return dentry; /* try again with same dentry */ 385 } 386 if (IS_ROOT(dentry)) 387 parent = NULL; 388 else 389 parent = dentry->d_parent; 390 if (parent && !spin_trylock(&parent->d_lock)) { 391 if (inode) 392 spin_unlock(&inode->i_lock); 393 goto relock; 394 } 395 396 if (ref) 397 dentry->d_count--; 398 /* if dentry was on the d_lru list delete it from there */ 399 dentry_lru_del(dentry); 400 /* if it was on the hash then remove it */ 401 __d_drop(dentry); 402 return d_kill(dentry, parent); 403 } 404 405 /* 406 * This is dput 407 * 408 * This is complicated by the fact that we do not want to put 409 * dentries that are no longer on any hash chain on the unused 410 * list: we'd much rather just get rid of them immediately. 411 * 412 * However, that implies that we have to traverse the dentry 413 * tree upwards to the parents which might _also_ now be 414 * scheduled for deletion (it may have been only waiting for 415 * its last child to go away). 416 * 417 * This tail recursion is done by hand as we don't want to depend 418 * on the compiler to always get this right (gcc generally doesn't). 419 * Real recursion would eat up our stack space. 420 */ 421 422 /* 423 * dput - release a dentry 424 * @dentry: dentry to release 425 * 426 * Release a dentry. This will drop the usage count and if appropriate 427 * call the dentry unlink method as well as removing it from the queues and 428 * releasing its resources. If the parent dentries were scheduled for release 429 * they too may now get deleted. 430 */ 431 void dput(struct dentry *dentry) 432 { 433 if (!dentry) 434 return; 435 436 repeat: 437 if (dentry->d_count == 1) 438 might_sleep(); 439 spin_lock(&dentry->d_lock); 440 BUG_ON(!dentry->d_count); 441 if (dentry->d_count > 1) { 442 dentry->d_count--; 443 spin_unlock(&dentry->d_lock); 444 return; 445 } 446 447 if (dentry->d_flags & DCACHE_OP_DELETE) { 448 if (dentry->d_op->d_delete(dentry)) 449 goto kill_it; 450 } 451 452 /* Unreachable? Get rid of it */ 453 if (d_unhashed(dentry)) 454 goto kill_it; 455 456 /* Otherwise leave it cached and ensure it's on the LRU */ 457 dentry->d_flags |= DCACHE_REFERENCED; 458 dentry_lru_add(dentry); 459 460 dentry->d_count--; 461 spin_unlock(&dentry->d_lock); 462 return; 463 464 kill_it: 465 dentry = dentry_kill(dentry, 1); 466 if (dentry) 467 goto repeat; 468 } 469 EXPORT_SYMBOL(dput); 470 471 /** 472 * d_invalidate - invalidate a dentry 473 * @dentry: dentry to invalidate 474 * 475 * Try to invalidate the dentry if it turns out to be 476 * possible. If there are other dentries that can be 477 * reached through this one we can't delete it and we 478 * return -EBUSY. On success we return 0. 479 * 480 * no dcache lock. 481 */ 482 483 int d_invalidate(struct dentry * dentry) 484 { 485 /* 486 * If it's already been dropped, return OK. 487 */ 488 spin_lock(&dentry->d_lock); 489 if (d_unhashed(dentry)) { 490 spin_unlock(&dentry->d_lock); 491 return 0; 492 } 493 /* 494 * Check whether to do a partial shrink_dcache 495 * to get rid of unused child entries. 496 */ 497 if (!list_empty(&dentry->d_subdirs)) { 498 spin_unlock(&dentry->d_lock); 499 shrink_dcache_parent(dentry); 500 spin_lock(&dentry->d_lock); 501 } 502 503 /* 504 * Somebody else still using it? 505 * 506 * If it's a directory, we can't drop it 507 * for fear of somebody re-populating it 508 * with children (even though dropping it 509 * would make it unreachable from the root, 510 * we might still populate it if it was a 511 * working directory or similar). 512 */ 513 if (dentry->d_count > 1) { 514 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 515 spin_unlock(&dentry->d_lock); 516 return -EBUSY; 517 } 518 } 519 520 __d_drop(dentry); 521 spin_unlock(&dentry->d_lock); 522 return 0; 523 } 524 EXPORT_SYMBOL(d_invalidate); 525 526 /* This must be called with d_lock held */ 527 static inline void __dget_dlock(struct dentry *dentry) 528 { 529 dentry->d_count++; 530 } 531 532 static inline void __dget(struct dentry *dentry) 533 { 534 spin_lock(&dentry->d_lock); 535 __dget_dlock(dentry); 536 spin_unlock(&dentry->d_lock); 537 } 538 539 struct dentry *dget_parent(struct dentry *dentry) 540 { 541 struct dentry *ret; 542 543 repeat: 544 /* 545 * Don't need rcu_dereference because we re-check it was correct under 546 * the lock. 547 */ 548 rcu_read_lock(); 549 ret = dentry->d_parent; 550 if (!ret) { 551 rcu_read_unlock(); 552 goto out; 553 } 554 spin_lock(&ret->d_lock); 555 if (unlikely(ret != dentry->d_parent)) { 556 spin_unlock(&ret->d_lock); 557 rcu_read_unlock(); 558 goto repeat; 559 } 560 rcu_read_unlock(); 561 BUG_ON(!ret->d_count); 562 ret->d_count++; 563 spin_unlock(&ret->d_lock); 564 out: 565 return ret; 566 } 567 EXPORT_SYMBOL(dget_parent); 568 569 /** 570 * d_find_alias - grab a hashed alias of inode 571 * @inode: inode in question 572 * @want_discon: flag, used by d_splice_alias, to request 573 * that only a DISCONNECTED alias be returned. 574 * 575 * If inode has a hashed alias, or is a directory and has any alias, 576 * acquire the reference to alias and return it. Otherwise return NULL. 577 * Notice that if inode is a directory there can be only one alias and 578 * it can be unhashed only if it has no children, or if it is the root 579 * of a filesystem. 580 * 581 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 582 * any other hashed alias over that one unless @want_discon is set, 583 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 584 */ 585 static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 586 { 587 struct dentry *alias, *discon_alias; 588 589 again: 590 discon_alias = NULL; 591 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 592 spin_lock(&alias->d_lock); 593 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 594 if (IS_ROOT(alias) && 595 (alias->d_flags & DCACHE_DISCONNECTED)) { 596 discon_alias = alias; 597 } else if (!want_discon) { 598 __dget_dlock(alias); 599 spin_unlock(&alias->d_lock); 600 return alias; 601 } 602 } 603 spin_unlock(&alias->d_lock); 604 } 605 if (discon_alias) { 606 alias = discon_alias; 607 spin_lock(&alias->d_lock); 608 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 609 if (IS_ROOT(alias) && 610 (alias->d_flags & DCACHE_DISCONNECTED)) { 611 __dget_dlock(alias); 612 spin_unlock(&alias->d_lock); 613 return alias; 614 } 615 } 616 spin_unlock(&alias->d_lock); 617 goto again; 618 } 619 return NULL; 620 } 621 622 struct dentry *d_find_alias(struct inode *inode) 623 { 624 struct dentry *de = NULL; 625 626 if (!list_empty(&inode->i_dentry)) { 627 spin_lock(&inode->i_lock); 628 de = __d_find_alias(inode, 0); 629 spin_unlock(&inode->i_lock); 630 } 631 return de; 632 } 633 EXPORT_SYMBOL(d_find_alias); 634 635 /* 636 * Try to kill dentries associated with this inode. 637 * WARNING: you must own a reference to inode. 638 */ 639 void d_prune_aliases(struct inode *inode) 640 { 641 struct dentry *dentry; 642 restart: 643 spin_lock(&inode->i_lock); 644 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 645 spin_lock(&dentry->d_lock); 646 if (!dentry->d_count) { 647 __dget_dlock(dentry); 648 __d_drop(dentry); 649 spin_unlock(&dentry->d_lock); 650 spin_unlock(&inode->i_lock); 651 dput(dentry); 652 goto restart; 653 } 654 spin_unlock(&dentry->d_lock); 655 } 656 spin_unlock(&inode->i_lock); 657 } 658 EXPORT_SYMBOL(d_prune_aliases); 659 660 /* 661 * Try to throw away a dentry - free the inode, dput the parent. 662 * Requires dentry->d_lock is held, and dentry->d_count == 0. 663 * Releases dentry->d_lock. 664 * 665 * This may fail if locks cannot be acquired no problem, just try again. 666 */ 667 static void try_prune_one_dentry(struct dentry *dentry) 668 __releases(dentry->d_lock) 669 { 670 struct dentry *parent; 671 672 parent = dentry_kill(dentry, 0); 673 /* 674 * If dentry_kill returns NULL, we have nothing more to do. 675 * if it returns the same dentry, trylocks failed. In either 676 * case, just loop again. 677 * 678 * Otherwise, we need to prune ancestors too. This is necessary 679 * to prevent quadratic behavior of shrink_dcache_parent(), but 680 * is also expected to be beneficial in reducing dentry cache 681 * fragmentation. 682 */ 683 if (!parent) 684 return; 685 if (parent == dentry) 686 return; 687 688 /* Prune ancestors. */ 689 dentry = parent; 690 while (dentry) { 691 spin_lock(&dentry->d_lock); 692 if (dentry->d_count > 1) { 693 dentry->d_count--; 694 spin_unlock(&dentry->d_lock); 695 return; 696 } 697 dentry = dentry_kill(dentry, 1); 698 } 699 } 700 701 static void shrink_dentry_list(struct list_head *list) 702 { 703 struct dentry *dentry; 704 705 rcu_read_lock(); 706 for (;;) { 707 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 708 if (&dentry->d_lru == list) 709 break; /* empty */ 710 spin_lock(&dentry->d_lock); 711 if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 712 spin_unlock(&dentry->d_lock); 713 continue; 714 } 715 716 /* 717 * We found an inuse dentry which was not removed from 718 * the LRU because of laziness during lookup. Do not free 719 * it - just keep it off the LRU list. 720 */ 721 if (dentry->d_count) { 722 dentry_lru_del(dentry); 723 spin_unlock(&dentry->d_lock); 724 continue; 725 } 726 727 rcu_read_unlock(); 728 729 try_prune_one_dentry(dentry); 730 731 rcu_read_lock(); 732 } 733 rcu_read_unlock(); 734 } 735 736 /** 737 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock 738 * @sb: superblock to shrink dentry LRU. 739 * @count: number of entries to prune 740 * @flags: flags to control the dentry processing 741 * 742 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 743 */ 744 static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) 745 { 746 /* called from prune_dcache() and shrink_dcache_parent() */ 747 struct dentry *dentry; 748 LIST_HEAD(referenced); 749 LIST_HEAD(tmp); 750 int cnt = *count; 751 752 relock: 753 spin_lock(&dcache_lru_lock); 754 while (!list_empty(&sb->s_dentry_lru)) { 755 dentry = list_entry(sb->s_dentry_lru.prev, 756 struct dentry, d_lru); 757 BUG_ON(dentry->d_sb != sb); 758 759 if (!spin_trylock(&dentry->d_lock)) { 760 spin_unlock(&dcache_lru_lock); 761 cpu_relax(); 762 goto relock; 763 } 764 765 /* 766 * If we are honouring the DCACHE_REFERENCED flag and the 767 * dentry has this flag set, don't free it. Clear the flag 768 * and put it back on the LRU. 769 */ 770 if (flags & DCACHE_REFERENCED && 771 dentry->d_flags & DCACHE_REFERENCED) { 772 dentry->d_flags &= ~DCACHE_REFERENCED; 773 list_move(&dentry->d_lru, &referenced); 774 spin_unlock(&dentry->d_lock); 775 } else { 776 list_move_tail(&dentry->d_lru, &tmp); 777 spin_unlock(&dentry->d_lock); 778 if (!--cnt) 779 break; 780 } 781 cond_resched_lock(&dcache_lru_lock); 782 } 783 if (!list_empty(&referenced)) 784 list_splice(&referenced, &sb->s_dentry_lru); 785 spin_unlock(&dcache_lru_lock); 786 787 shrink_dentry_list(&tmp); 788 789 *count = cnt; 790 } 791 792 /** 793 * prune_dcache - shrink the dcache 794 * @count: number of entries to try to free 795 * 796 * Shrink the dcache. This is done when we need more memory, or simply when we 797 * need to unmount something (at which point we need to unuse all dentries). 798 * 799 * This function may fail to free any resources if all the dentries are in use. 800 */ 801 static void prune_dcache(int count) 802 { 803 struct super_block *sb, *p = NULL; 804 int w_count; 805 int unused = dentry_stat.nr_unused; 806 int prune_ratio; 807 int pruned; 808 809 if (unused == 0 || count == 0) 810 return; 811 if (count >= unused) 812 prune_ratio = 1; 813 else 814 prune_ratio = unused / count; 815 spin_lock(&sb_lock); 816 list_for_each_entry(sb, &super_blocks, s_list) { 817 if (list_empty(&sb->s_instances)) 818 continue; 819 if (sb->s_nr_dentry_unused == 0) 820 continue; 821 sb->s_count++; 822 /* Now, we reclaim unused dentrins with fairness. 823 * We reclaim them same percentage from each superblock. 824 * We calculate number of dentries to scan on this sb 825 * as follows, but the implementation is arranged to avoid 826 * overflows: 827 * number of dentries to scan on this sb = 828 * count * (number of dentries on this sb / 829 * number of dentries in the machine) 830 */ 831 spin_unlock(&sb_lock); 832 if (prune_ratio != 1) 833 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; 834 else 835 w_count = sb->s_nr_dentry_unused; 836 pruned = w_count; 837 /* 838 * We need to be sure this filesystem isn't being unmounted, 839 * otherwise we could race with generic_shutdown_super(), and 840 * end up holding a reference to an inode while the filesystem 841 * is unmounted. So we try to get s_umount, and make sure 842 * s_root isn't NULL. 843 */ 844 if (down_read_trylock(&sb->s_umount)) { 845 if ((sb->s_root != NULL) && 846 (!list_empty(&sb->s_dentry_lru))) { 847 __shrink_dcache_sb(sb, &w_count, 848 DCACHE_REFERENCED); 849 pruned -= w_count; 850 } 851 up_read(&sb->s_umount); 852 } 853 spin_lock(&sb_lock); 854 if (p) 855 __put_super(p); 856 count -= pruned; 857 p = sb; 858 /* more work left to do? */ 859 if (count <= 0) 860 break; 861 } 862 if (p) 863 __put_super(p); 864 spin_unlock(&sb_lock); 865 } 866 867 /** 868 * shrink_dcache_sb - shrink dcache for a superblock 869 * @sb: superblock 870 * 871 * Shrink the dcache for the specified super block. This is used to free 872 * the dcache before unmounting a file system. 873 */ 874 void shrink_dcache_sb(struct super_block *sb) 875 { 876 LIST_HEAD(tmp); 877 878 spin_lock(&dcache_lru_lock); 879 while (!list_empty(&sb->s_dentry_lru)) { 880 list_splice_init(&sb->s_dentry_lru, &tmp); 881 spin_unlock(&dcache_lru_lock); 882 shrink_dentry_list(&tmp); 883 spin_lock(&dcache_lru_lock); 884 } 885 spin_unlock(&dcache_lru_lock); 886 } 887 EXPORT_SYMBOL(shrink_dcache_sb); 888 889 /* 890 * destroy a single subtree of dentries for unmount 891 * - see the comments on shrink_dcache_for_umount() for a description of the 892 * locking 893 */ 894 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 895 { 896 struct dentry *parent; 897 unsigned detached = 0; 898 899 BUG_ON(!IS_ROOT(dentry)); 900 901 /* detach this root from the system */ 902 spin_lock(&dentry->d_lock); 903 dentry_lru_del(dentry); 904 __d_drop(dentry); 905 spin_unlock(&dentry->d_lock); 906 907 for (;;) { 908 /* descend to the first leaf in the current subtree */ 909 while (!list_empty(&dentry->d_subdirs)) { 910 struct dentry *loop; 911 912 /* this is a branch with children - detach all of them 913 * from the system in one go */ 914 spin_lock(&dentry->d_lock); 915 list_for_each_entry(loop, &dentry->d_subdirs, 916 d_u.d_child) { 917 spin_lock_nested(&loop->d_lock, 918 DENTRY_D_LOCK_NESTED); 919 dentry_lru_del(loop); 920 __d_drop(loop); 921 spin_unlock(&loop->d_lock); 922 } 923 spin_unlock(&dentry->d_lock); 924 925 /* move to the first child */ 926 dentry = list_entry(dentry->d_subdirs.next, 927 struct dentry, d_u.d_child); 928 } 929 930 /* consume the dentries from this leaf up through its parents 931 * until we find one with children or run out altogether */ 932 do { 933 struct inode *inode; 934 935 if (dentry->d_count != 0) { 936 printk(KERN_ERR 937 "BUG: Dentry %p{i=%lx,n=%s}" 938 " still in use (%d)" 939 " [unmount of %s %s]\n", 940 dentry, 941 dentry->d_inode ? 942 dentry->d_inode->i_ino : 0UL, 943 dentry->d_name.name, 944 dentry->d_count, 945 dentry->d_sb->s_type->name, 946 dentry->d_sb->s_id); 947 BUG(); 948 } 949 950 if (IS_ROOT(dentry)) { 951 parent = NULL; 952 list_del(&dentry->d_u.d_child); 953 } else { 954 parent = dentry->d_parent; 955 spin_lock(&parent->d_lock); 956 parent->d_count--; 957 list_del(&dentry->d_u.d_child); 958 spin_unlock(&parent->d_lock); 959 } 960 961 detached++; 962 963 inode = dentry->d_inode; 964 if (inode) { 965 dentry->d_inode = NULL; 966 list_del_init(&dentry->d_alias); 967 if (dentry->d_op && dentry->d_op->d_iput) 968 dentry->d_op->d_iput(dentry, inode); 969 else 970 iput(inode); 971 } 972 973 d_free(dentry); 974 975 /* finished when we fall off the top of the tree, 976 * otherwise we ascend to the parent and move to the 977 * next sibling if there is one */ 978 if (!parent) 979 return; 980 dentry = parent; 981 } while (list_empty(&dentry->d_subdirs)); 982 983 dentry = list_entry(dentry->d_subdirs.next, 984 struct dentry, d_u.d_child); 985 } 986 } 987 988 /* 989 * destroy the dentries attached to a superblock on unmounting 990 * - we don't need to use dentry->d_lock because: 991 * - the superblock is detached from all mountings and open files, so the 992 * dentry trees will not be rearranged by the VFS 993 * - s_umount is write-locked, so the memory pressure shrinker will ignore 994 * any dentries belonging to this superblock that it comes across 995 * - the filesystem itself is no longer permitted to rearrange the dentries 996 * in this superblock 997 */ 998 void shrink_dcache_for_umount(struct super_block *sb) 999 { 1000 struct dentry *dentry; 1001 1002 if (down_read_trylock(&sb->s_umount)) 1003 BUG(); 1004 1005 dentry = sb->s_root; 1006 sb->s_root = NULL; 1007 spin_lock(&dentry->d_lock); 1008 dentry->d_count--; 1009 spin_unlock(&dentry->d_lock); 1010 shrink_dcache_for_umount_subtree(dentry); 1011 1012 while (!hlist_bl_empty(&sb->s_anon)) { 1013 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 1014 shrink_dcache_for_umount_subtree(dentry); 1015 } 1016 } 1017 1018 /* 1019 * This tries to ascend one level of parenthood, but 1020 * we can race with renaming, so we need to re-check 1021 * the parenthood after dropping the lock and check 1022 * that the sequence number still matches. 1023 */ 1024 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) 1025 { 1026 struct dentry *new = old->d_parent; 1027 1028 rcu_read_lock(); 1029 spin_unlock(&old->d_lock); 1030 spin_lock(&new->d_lock); 1031 1032 /* 1033 * might go back up the wrong parent if we have had a rename 1034 * or deletion 1035 */ 1036 if (new != old->d_parent || 1037 (old->d_flags & DCACHE_DISCONNECTED) || 1038 (!locked && read_seqretry(&rename_lock, seq))) { 1039 spin_unlock(&new->d_lock); 1040 new = NULL; 1041 } 1042 rcu_read_unlock(); 1043 return new; 1044 } 1045 1046 1047 /* 1048 * Search for at least 1 mount point in the dentry's subdirs. 1049 * We descend to the next level whenever the d_subdirs 1050 * list is non-empty and continue searching. 1051 */ 1052 1053 /** 1054 * have_submounts - check for mounts over a dentry 1055 * @parent: dentry to check. 1056 * 1057 * Return true if the parent or its subdirectories contain 1058 * a mount point 1059 */ 1060 int have_submounts(struct dentry *parent) 1061 { 1062 struct dentry *this_parent; 1063 struct list_head *next; 1064 unsigned seq; 1065 int locked = 0; 1066 1067 seq = read_seqbegin(&rename_lock); 1068 again: 1069 this_parent = parent; 1070 1071 if (d_mountpoint(parent)) 1072 goto positive; 1073 spin_lock(&this_parent->d_lock); 1074 repeat: 1075 next = this_parent->d_subdirs.next; 1076 resume: 1077 while (next != &this_parent->d_subdirs) { 1078 struct list_head *tmp = next; 1079 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1080 next = tmp->next; 1081 1082 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1083 /* Have we found a mount point ? */ 1084 if (d_mountpoint(dentry)) { 1085 spin_unlock(&dentry->d_lock); 1086 spin_unlock(&this_parent->d_lock); 1087 goto positive; 1088 } 1089 if (!list_empty(&dentry->d_subdirs)) { 1090 spin_unlock(&this_parent->d_lock); 1091 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1092 this_parent = dentry; 1093 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1094 goto repeat; 1095 } 1096 spin_unlock(&dentry->d_lock); 1097 } 1098 /* 1099 * All done at this level ... ascend and resume the search. 1100 */ 1101 if (this_parent != parent) { 1102 struct dentry *child = this_parent; 1103 this_parent = try_to_ascend(this_parent, locked, seq); 1104 if (!this_parent) 1105 goto rename_retry; 1106 next = child->d_u.d_child.next; 1107 goto resume; 1108 } 1109 spin_unlock(&this_parent->d_lock); 1110 if (!locked && read_seqretry(&rename_lock, seq)) 1111 goto rename_retry; 1112 if (locked) 1113 write_sequnlock(&rename_lock); 1114 return 0; /* No mount points found in tree */ 1115 positive: 1116 if (!locked && read_seqretry(&rename_lock, seq)) 1117 goto rename_retry; 1118 if (locked) 1119 write_sequnlock(&rename_lock); 1120 return 1; 1121 1122 rename_retry: 1123 locked = 1; 1124 write_seqlock(&rename_lock); 1125 goto again; 1126 } 1127 EXPORT_SYMBOL(have_submounts); 1128 1129 /* 1130 * Search the dentry child list for the specified parent, 1131 * and move any unused dentries to the end of the unused 1132 * list for prune_dcache(). We descend to the next level 1133 * whenever the d_subdirs list is non-empty and continue 1134 * searching. 1135 * 1136 * It returns zero iff there are no unused children, 1137 * otherwise it returns the number of children moved to 1138 * the end of the unused list. This may not be the total 1139 * number of unused children, because select_parent can 1140 * drop the lock and return early due to latency 1141 * constraints. 1142 */ 1143 static int select_parent(struct dentry * parent) 1144 { 1145 struct dentry *this_parent; 1146 struct list_head *next; 1147 unsigned seq; 1148 int found = 0; 1149 int locked = 0; 1150 1151 seq = read_seqbegin(&rename_lock); 1152 again: 1153 this_parent = parent; 1154 spin_lock(&this_parent->d_lock); 1155 repeat: 1156 next = this_parent->d_subdirs.next; 1157 resume: 1158 while (next != &this_parent->d_subdirs) { 1159 struct list_head *tmp = next; 1160 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1161 next = tmp->next; 1162 1163 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1164 1165 /* 1166 * move only zero ref count dentries to the end 1167 * of the unused list for prune_dcache 1168 */ 1169 if (!dentry->d_count) { 1170 dentry_lru_move_tail(dentry); 1171 found++; 1172 } else { 1173 dentry_lru_del(dentry); 1174 } 1175 1176 /* 1177 * We can return to the caller if we have found some (this 1178 * ensures forward progress). We'll be coming back to find 1179 * the rest. 1180 */ 1181 if (found && need_resched()) { 1182 spin_unlock(&dentry->d_lock); 1183 goto out; 1184 } 1185 1186 /* 1187 * Descend a level if the d_subdirs list is non-empty. 1188 */ 1189 if (!list_empty(&dentry->d_subdirs)) { 1190 spin_unlock(&this_parent->d_lock); 1191 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1192 this_parent = dentry; 1193 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1194 goto repeat; 1195 } 1196 1197 spin_unlock(&dentry->d_lock); 1198 } 1199 /* 1200 * All done at this level ... ascend and resume the search. 1201 */ 1202 if (this_parent != parent) { 1203 struct dentry *child = this_parent; 1204 this_parent = try_to_ascend(this_parent, locked, seq); 1205 if (!this_parent) 1206 goto rename_retry; 1207 next = child->d_u.d_child.next; 1208 goto resume; 1209 } 1210 out: 1211 spin_unlock(&this_parent->d_lock); 1212 if (!locked && read_seqretry(&rename_lock, seq)) 1213 goto rename_retry; 1214 if (locked) 1215 write_sequnlock(&rename_lock); 1216 return found; 1217 1218 rename_retry: 1219 if (found) 1220 return found; 1221 locked = 1; 1222 write_seqlock(&rename_lock); 1223 goto again; 1224 } 1225 1226 /** 1227 * shrink_dcache_parent - prune dcache 1228 * @parent: parent of entries to prune 1229 * 1230 * Prune the dcache to remove unused children of the parent dentry. 1231 */ 1232 1233 void shrink_dcache_parent(struct dentry * parent) 1234 { 1235 struct super_block *sb = parent->d_sb; 1236 int found; 1237 1238 while ((found = select_parent(parent)) != 0) 1239 __shrink_dcache_sb(sb, &found, 0); 1240 } 1241 EXPORT_SYMBOL(shrink_dcache_parent); 1242 1243 /* 1244 * Scan `nr' dentries and return the number which remain. 1245 * 1246 * We need to avoid reentering the filesystem if the caller is performing a 1247 * GFP_NOFS allocation attempt. One example deadlock is: 1248 * 1249 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> 1250 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> 1251 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. 1252 * 1253 * In this case we return -1 to tell the caller that we baled. 1254 */ 1255 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1256 { 1257 if (nr) { 1258 if (!(gfp_mask & __GFP_FS)) 1259 return -1; 1260 prune_dcache(nr); 1261 } 1262 1263 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 1264 } 1265 1266 static struct shrinker dcache_shrinker = { 1267 .shrink = shrink_dcache_memory, 1268 .seeks = DEFAULT_SEEKS, 1269 }; 1270 1271 /** 1272 * d_alloc - allocate a dcache entry 1273 * @parent: parent of entry to allocate 1274 * @name: qstr of the name 1275 * 1276 * Allocates a dentry. It returns %NULL if there is insufficient memory 1277 * available. On a success the dentry is returned. The name passed in is 1278 * copied and the copy passed in may be reused after this call. 1279 */ 1280 1281 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1282 { 1283 struct dentry *dentry; 1284 char *dname; 1285 1286 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1287 if (!dentry) 1288 return NULL; 1289 1290 if (name->len > DNAME_INLINE_LEN-1) { 1291 dname = kmalloc(name->len + 1, GFP_KERNEL); 1292 if (!dname) { 1293 kmem_cache_free(dentry_cache, dentry); 1294 return NULL; 1295 } 1296 } else { 1297 dname = dentry->d_iname; 1298 } 1299 dentry->d_name.name = dname; 1300 1301 dentry->d_name.len = name->len; 1302 dentry->d_name.hash = name->hash; 1303 memcpy(dname, name->name, name->len); 1304 dname[name->len] = 0; 1305 1306 dentry->d_count = 1; 1307 dentry->d_flags = DCACHE_UNHASHED; 1308 spin_lock_init(&dentry->d_lock); 1309 seqcount_init(&dentry->d_seq); 1310 dentry->d_inode = NULL; 1311 dentry->d_parent = NULL; 1312 dentry->d_sb = NULL; 1313 dentry->d_op = NULL; 1314 dentry->d_fsdata = NULL; 1315 INIT_HLIST_BL_NODE(&dentry->d_hash); 1316 INIT_LIST_HEAD(&dentry->d_lru); 1317 INIT_LIST_HEAD(&dentry->d_subdirs); 1318 INIT_LIST_HEAD(&dentry->d_alias); 1319 INIT_LIST_HEAD(&dentry->d_u.d_child); 1320 1321 if (parent) { 1322 spin_lock(&parent->d_lock); 1323 /* 1324 * don't need child lock because it is not subject 1325 * to concurrency here 1326 */ 1327 __dget_dlock(parent); 1328 dentry->d_parent = parent; 1329 dentry->d_sb = parent->d_sb; 1330 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1331 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1332 spin_unlock(&parent->d_lock); 1333 } 1334 1335 this_cpu_inc(nr_dentry); 1336 1337 return dentry; 1338 } 1339 EXPORT_SYMBOL(d_alloc); 1340 1341 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1342 { 1343 struct dentry *dentry = d_alloc(NULL, name); 1344 if (dentry) { 1345 dentry->d_sb = sb; 1346 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1347 dentry->d_parent = dentry; 1348 dentry->d_flags |= DCACHE_DISCONNECTED; 1349 } 1350 return dentry; 1351 } 1352 EXPORT_SYMBOL(d_alloc_pseudo); 1353 1354 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1355 { 1356 struct qstr q; 1357 1358 q.name = name; 1359 q.len = strlen(name); 1360 q.hash = full_name_hash(q.name, q.len); 1361 return d_alloc(parent, &q); 1362 } 1363 EXPORT_SYMBOL(d_alloc_name); 1364 1365 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 1366 { 1367 WARN_ON_ONCE(dentry->d_op); 1368 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1369 DCACHE_OP_COMPARE | 1370 DCACHE_OP_REVALIDATE | 1371 DCACHE_OP_DELETE )); 1372 dentry->d_op = op; 1373 if (!op) 1374 return; 1375 if (op->d_hash) 1376 dentry->d_flags |= DCACHE_OP_HASH; 1377 if (op->d_compare) 1378 dentry->d_flags |= DCACHE_OP_COMPARE; 1379 if (op->d_revalidate) 1380 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1381 if (op->d_delete) 1382 dentry->d_flags |= DCACHE_OP_DELETE; 1383 1384 } 1385 EXPORT_SYMBOL(d_set_d_op); 1386 1387 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1388 { 1389 spin_lock(&dentry->d_lock); 1390 if (inode) { 1391 if (unlikely(IS_AUTOMOUNT(inode))) 1392 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1393 list_add(&dentry->d_alias, &inode->i_dentry); 1394 } 1395 dentry->d_inode = inode; 1396 dentry_rcuwalk_barrier(dentry); 1397 spin_unlock(&dentry->d_lock); 1398 fsnotify_d_instantiate(dentry, inode); 1399 } 1400 1401 /** 1402 * d_instantiate - fill in inode information for a dentry 1403 * @entry: dentry to complete 1404 * @inode: inode to attach to this dentry 1405 * 1406 * Fill in inode information in the entry. 1407 * 1408 * This turns negative dentries into productive full members 1409 * of society. 1410 * 1411 * NOTE! This assumes that the inode count has been incremented 1412 * (or otherwise set) by the caller to indicate that it is now 1413 * in use by the dcache. 1414 */ 1415 1416 void d_instantiate(struct dentry *entry, struct inode * inode) 1417 { 1418 BUG_ON(!list_empty(&entry->d_alias)); 1419 if (inode) 1420 spin_lock(&inode->i_lock); 1421 __d_instantiate(entry, inode); 1422 if (inode) 1423 spin_unlock(&inode->i_lock); 1424 security_d_instantiate(entry, inode); 1425 } 1426 EXPORT_SYMBOL(d_instantiate); 1427 1428 /** 1429 * d_instantiate_unique - instantiate a non-aliased dentry 1430 * @entry: dentry to instantiate 1431 * @inode: inode to attach to this dentry 1432 * 1433 * Fill in inode information in the entry. On success, it returns NULL. 1434 * If an unhashed alias of "entry" already exists, then we return the 1435 * aliased dentry instead and drop one reference to inode. 1436 * 1437 * Note that in order to avoid conflicts with rename() etc, the caller 1438 * had better be holding the parent directory semaphore. 1439 * 1440 * This also assumes that the inode count has been incremented 1441 * (or otherwise set) by the caller to indicate that it is now 1442 * in use by the dcache. 1443 */ 1444 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1445 struct inode *inode) 1446 { 1447 struct dentry *alias; 1448 int len = entry->d_name.len; 1449 const char *name = entry->d_name.name; 1450 unsigned int hash = entry->d_name.hash; 1451 1452 if (!inode) { 1453 __d_instantiate(entry, NULL); 1454 return NULL; 1455 } 1456 1457 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1458 struct qstr *qstr = &alias->d_name; 1459 1460 /* 1461 * Don't need alias->d_lock here, because aliases with 1462 * d_parent == entry->d_parent are not subject to name or 1463 * parent changes, because the parent inode i_mutex is held. 1464 */ 1465 if (qstr->hash != hash) 1466 continue; 1467 if (alias->d_parent != entry->d_parent) 1468 continue; 1469 if (dentry_cmp(qstr->name, qstr->len, name, len)) 1470 continue; 1471 __dget(alias); 1472 return alias; 1473 } 1474 1475 __d_instantiate(entry, inode); 1476 return NULL; 1477 } 1478 1479 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1480 { 1481 struct dentry *result; 1482 1483 BUG_ON(!list_empty(&entry->d_alias)); 1484 1485 if (inode) 1486 spin_lock(&inode->i_lock); 1487 result = __d_instantiate_unique(entry, inode); 1488 if (inode) 1489 spin_unlock(&inode->i_lock); 1490 1491 if (!result) { 1492 security_d_instantiate(entry, inode); 1493 return NULL; 1494 } 1495 1496 BUG_ON(!d_unhashed(result)); 1497 iput(inode); 1498 return result; 1499 } 1500 1501 EXPORT_SYMBOL(d_instantiate_unique); 1502 1503 /** 1504 * d_alloc_root - allocate root dentry 1505 * @root_inode: inode to allocate the root for 1506 * 1507 * Allocate a root ("/") dentry for the inode given. The inode is 1508 * instantiated and returned. %NULL is returned if there is insufficient 1509 * memory or the inode passed is %NULL. 1510 */ 1511 1512 struct dentry * d_alloc_root(struct inode * root_inode) 1513 { 1514 struct dentry *res = NULL; 1515 1516 if (root_inode) { 1517 static const struct qstr name = { .name = "/", .len = 1 }; 1518 1519 res = d_alloc(NULL, &name); 1520 if (res) { 1521 res->d_sb = root_inode->i_sb; 1522 d_set_d_op(res, res->d_sb->s_d_op); 1523 res->d_parent = res; 1524 d_instantiate(res, root_inode); 1525 } 1526 } 1527 return res; 1528 } 1529 EXPORT_SYMBOL(d_alloc_root); 1530 1531 static struct dentry * __d_find_any_alias(struct inode *inode) 1532 { 1533 struct dentry *alias; 1534 1535 if (list_empty(&inode->i_dentry)) 1536 return NULL; 1537 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); 1538 __dget(alias); 1539 return alias; 1540 } 1541 1542 static struct dentry * d_find_any_alias(struct inode *inode) 1543 { 1544 struct dentry *de; 1545 1546 spin_lock(&inode->i_lock); 1547 de = __d_find_any_alias(inode); 1548 spin_unlock(&inode->i_lock); 1549 return de; 1550 } 1551 1552 1553 /** 1554 * d_obtain_alias - find or allocate a dentry for a given inode 1555 * @inode: inode to allocate the dentry for 1556 * 1557 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1558 * similar open by handle operations. The returned dentry may be anonymous, 1559 * or may have a full name (if the inode was already in the cache). 1560 * 1561 * When called on a directory inode, we must ensure that the inode only ever 1562 * has one dentry. If a dentry is found, that is returned instead of 1563 * allocating a new one. 1564 * 1565 * On successful return, the reference to the inode has been transferred 1566 * to the dentry. In case of an error the reference on the inode is released. 1567 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1568 * be passed in and will be the error will be propagate to the return value, 1569 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1570 */ 1571 struct dentry *d_obtain_alias(struct inode *inode) 1572 { 1573 static const struct qstr anonstring = { .name = "" }; 1574 struct dentry *tmp; 1575 struct dentry *res; 1576 1577 if (!inode) 1578 return ERR_PTR(-ESTALE); 1579 if (IS_ERR(inode)) 1580 return ERR_CAST(inode); 1581 1582 res = d_find_any_alias(inode); 1583 if (res) 1584 goto out_iput; 1585 1586 tmp = d_alloc(NULL, &anonstring); 1587 if (!tmp) { 1588 res = ERR_PTR(-ENOMEM); 1589 goto out_iput; 1590 } 1591 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1592 1593 1594 spin_lock(&inode->i_lock); 1595 res = __d_find_any_alias(inode); 1596 if (res) { 1597 spin_unlock(&inode->i_lock); 1598 dput(tmp); 1599 goto out_iput; 1600 } 1601 1602 /* attach a disconnected dentry */ 1603 spin_lock(&tmp->d_lock); 1604 tmp->d_sb = inode->i_sb; 1605 d_set_d_op(tmp, tmp->d_sb->s_d_op); 1606 tmp->d_inode = inode; 1607 tmp->d_flags |= DCACHE_DISCONNECTED; 1608 list_add(&tmp->d_alias, &inode->i_dentry); 1609 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1610 tmp->d_flags &= ~DCACHE_UNHASHED; 1611 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1612 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1613 spin_unlock(&tmp->d_lock); 1614 spin_unlock(&inode->i_lock); 1615 security_d_instantiate(tmp, inode); 1616 1617 return tmp; 1618 1619 out_iput: 1620 if (res && !IS_ERR(res)) 1621 security_d_instantiate(res, inode); 1622 iput(inode); 1623 return res; 1624 } 1625 EXPORT_SYMBOL(d_obtain_alias); 1626 1627 /** 1628 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1629 * @inode: the inode which may have a disconnected dentry 1630 * @dentry: a negative dentry which we want to point to the inode. 1631 * 1632 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1633 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1634 * and return it, else simply d_add the inode to the dentry and return NULL. 1635 * 1636 * This is needed in the lookup routine of any filesystem that is exportable 1637 * (via knfsd) so that we can build dcache paths to directories effectively. 1638 * 1639 * If a dentry was found and moved, then it is returned. Otherwise NULL 1640 * is returned. This matches the expected return value of ->lookup. 1641 * 1642 */ 1643 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1644 { 1645 struct dentry *new = NULL; 1646 1647 if (inode && S_ISDIR(inode->i_mode)) { 1648 spin_lock(&inode->i_lock); 1649 new = __d_find_alias(inode, 1); 1650 if (new) { 1651 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1652 spin_unlock(&inode->i_lock); 1653 security_d_instantiate(new, inode); 1654 d_move(new, dentry); 1655 iput(inode); 1656 } else { 1657 /* already taking inode->i_lock, so d_add() by hand */ 1658 __d_instantiate(dentry, inode); 1659 spin_unlock(&inode->i_lock); 1660 security_d_instantiate(dentry, inode); 1661 d_rehash(dentry); 1662 } 1663 } else 1664 d_add(dentry, inode); 1665 return new; 1666 } 1667 EXPORT_SYMBOL(d_splice_alias); 1668 1669 /** 1670 * d_add_ci - lookup or allocate new dentry with case-exact name 1671 * @inode: the inode case-insensitive lookup has found 1672 * @dentry: the negative dentry that was passed to the parent's lookup func 1673 * @name: the case-exact name to be associated with the returned dentry 1674 * 1675 * This is to avoid filling the dcache with case-insensitive names to the 1676 * same inode, only the actual correct case is stored in the dcache for 1677 * case-insensitive filesystems. 1678 * 1679 * For a case-insensitive lookup match and if the the case-exact dentry 1680 * already exists in in the dcache, use it and return it. 1681 * 1682 * If no entry exists with the exact case name, allocate new dentry with 1683 * the exact case, and return the spliced entry. 1684 */ 1685 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1686 struct qstr *name) 1687 { 1688 int error; 1689 struct dentry *found; 1690 struct dentry *new; 1691 1692 /* 1693 * First check if a dentry matching the name already exists, 1694 * if not go ahead and create it now. 1695 */ 1696 found = d_hash_and_lookup(dentry->d_parent, name); 1697 if (!found) { 1698 new = d_alloc(dentry->d_parent, name); 1699 if (!new) { 1700 error = -ENOMEM; 1701 goto err_out; 1702 } 1703 1704 found = d_splice_alias(inode, new); 1705 if (found) { 1706 dput(new); 1707 return found; 1708 } 1709 return new; 1710 } 1711 1712 /* 1713 * If a matching dentry exists, and it's not negative use it. 1714 * 1715 * Decrement the reference count to balance the iget() done 1716 * earlier on. 1717 */ 1718 if (found->d_inode) { 1719 if (unlikely(found->d_inode != inode)) { 1720 /* This can't happen because bad inodes are unhashed. */ 1721 BUG_ON(!is_bad_inode(inode)); 1722 BUG_ON(!is_bad_inode(found->d_inode)); 1723 } 1724 iput(inode); 1725 return found; 1726 } 1727 1728 /* 1729 * Negative dentry: instantiate it unless the inode is a directory and 1730 * already has a dentry. 1731 */ 1732 spin_lock(&inode->i_lock); 1733 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1734 __d_instantiate(found, inode); 1735 spin_unlock(&inode->i_lock); 1736 security_d_instantiate(found, inode); 1737 return found; 1738 } 1739 1740 /* 1741 * In case a directory already has a (disconnected) entry grab a 1742 * reference to it, move it in place and use it. 1743 */ 1744 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1745 __dget(new); 1746 spin_unlock(&inode->i_lock); 1747 security_d_instantiate(found, inode); 1748 d_move(new, found); 1749 iput(inode); 1750 dput(found); 1751 return new; 1752 1753 err_out: 1754 iput(inode); 1755 return ERR_PTR(error); 1756 } 1757 EXPORT_SYMBOL(d_add_ci); 1758 1759 /** 1760 * __d_lookup_rcu - search for a dentry (racy, store-free) 1761 * @parent: parent dentry 1762 * @name: qstr of name we wish to find 1763 * @seq: returns d_seq value at the point where the dentry was found 1764 * @inode: returns dentry->d_inode when the inode was found valid. 1765 * Returns: dentry, or NULL 1766 * 1767 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 1768 * resolution (store-free path walking) design described in 1769 * Documentation/filesystems/path-lookup.txt. 1770 * 1771 * This is not to be used outside core vfs. 1772 * 1773 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 1774 * held, and rcu_read_lock held. The returned dentry must not be stored into 1775 * without taking d_lock and checking d_seq sequence count against @seq 1776 * returned here. 1777 * 1778 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount 1779 * function. 1780 * 1781 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1782 * the returned dentry, so long as its parent's seqlock is checked after the 1783 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1784 * is formed, giving integrity down the path walk. 1785 */ 1786 struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, 1787 unsigned *seq, struct inode **inode) 1788 { 1789 unsigned int len = name->len; 1790 unsigned int hash = name->hash; 1791 const unsigned char *str = name->name; 1792 struct dcache_hash_bucket *b = d_hash(parent, hash); 1793 struct hlist_bl_node *node; 1794 struct dentry *dentry; 1795 1796 /* 1797 * Note: There is significant duplication with __d_lookup_rcu which is 1798 * required to prevent single threaded performance regressions 1799 * especially on architectures where smp_rmb (in seqcounts) are costly. 1800 * Keep the two functions in sync. 1801 */ 1802 1803 /* 1804 * The hash list is protected using RCU. 1805 * 1806 * Carefully use d_seq when comparing a candidate dentry, to avoid 1807 * races with d_move(). 1808 * 1809 * It is possible that concurrent renames can mess up our list 1810 * walk here and result in missing our dentry, resulting in the 1811 * false-negative result. d_lookup() protects against concurrent 1812 * renames using rename_lock seqlock. 1813 * 1814 * See Documentation/filesystems/path-lookup.txt for more details. 1815 */ 1816 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1817 struct inode *i; 1818 const char *tname; 1819 int tlen; 1820 1821 if (dentry->d_name.hash != hash) 1822 continue; 1823 1824 seqretry: 1825 *seq = read_seqcount_begin(&dentry->d_seq); 1826 if (dentry->d_parent != parent) 1827 continue; 1828 if (d_unhashed(dentry)) 1829 continue; 1830 tlen = dentry->d_name.len; 1831 tname = dentry->d_name.name; 1832 i = dentry->d_inode; 1833 prefetch(tname); 1834 if (i) 1835 prefetch(i); 1836 /* 1837 * This seqcount check is required to ensure name and 1838 * len are loaded atomically, so as not to walk off the 1839 * edge of memory when walking. If we could load this 1840 * atomically some other way, we could drop this check. 1841 */ 1842 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1843 goto seqretry; 1844 if (parent->d_flags & DCACHE_OP_COMPARE) { 1845 if (parent->d_op->d_compare(parent, *inode, 1846 dentry, i, 1847 tlen, tname, name)) 1848 continue; 1849 } else { 1850 if (dentry_cmp(tname, tlen, str, len)) 1851 continue; 1852 } 1853 /* 1854 * No extra seqcount check is required after the name 1855 * compare. The caller must perform a seqcount check in 1856 * order to do anything useful with the returned dentry 1857 * anyway. 1858 */ 1859 *inode = i; 1860 return dentry; 1861 } 1862 return NULL; 1863 } 1864 1865 /** 1866 * d_lookup - search for a dentry 1867 * @parent: parent dentry 1868 * @name: qstr of name we wish to find 1869 * Returns: dentry, or NULL 1870 * 1871 * d_lookup searches the children of the parent dentry for the name in 1872 * question. If the dentry is found its reference count is incremented and the 1873 * dentry is returned. The caller must use dput to free the entry when it has 1874 * finished using it. %NULL is returned if the dentry does not exist. 1875 */ 1876 struct dentry *d_lookup(struct dentry *parent, struct qstr *name) 1877 { 1878 struct dentry *dentry; 1879 unsigned seq; 1880 1881 do { 1882 seq = read_seqbegin(&rename_lock); 1883 dentry = __d_lookup(parent, name); 1884 if (dentry) 1885 break; 1886 } while (read_seqretry(&rename_lock, seq)); 1887 return dentry; 1888 } 1889 EXPORT_SYMBOL(d_lookup); 1890 1891 /** 1892 * __d_lookup - search for a dentry (racy) 1893 * @parent: parent dentry 1894 * @name: qstr of name we wish to find 1895 * Returns: dentry, or NULL 1896 * 1897 * __d_lookup is like d_lookup, however it may (rarely) return a 1898 * false-negative result due to unrelated rename activity. 1899 * 1900 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 1901 * however it must be used carefully, eg. with a following d_lookup in 1902 * the case of failure. 1903 * 1904 * __d_lookup callers must be commented. 1905 */ 1906 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) 1907 { 1908 unsigned int len = name->len; 1909 unsigned int hash = name->hash; 1910 const unsigned char *str = name->name; 1911 struct dcache_hash_bucket *b = d_hash(parent, hash); 1912 struct hlist_bl_node *node; 1913 struct dentry *found = NULL; 1914 struct dentry *dentry; 1915 1916 /* 1917 * Note: There is significant duplication with __d_lookup_rcu which is 1918 * required to prevent single threaded performance regressions 1919 * especially on architectures where smp_rmb (in seqcounts) are costly. 1920 * Keep the two functions in sync. 1921 */ 1922 1923 /* 1924 * The hash list is protected using RCU. 1925 * 1926 * Take d_lock when comparing a candidate dentry, to avoid races 1927 * with d_move(). 1928 * 1929 * It is possible that concurrent renames can mess up our list 1930 * walk here and result in missing our dentry, resulting in the 1931 * false-negative result. d_lookup() protects against concurrent 1932 * renames using rename_lock seqlock. 1933 * 1934 * See Documentation/filesystems/path-lookup.txt for more details. 1935 */ 1936 rcu_read_lock(); 1937 1938 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1939 const char *tname; 1940 int tlen; 1941 1942 if (dentry->d_name.hash != hash) 1943 continue; 1944 1945 spin_lock(&dentry->d_lock); 1946 if (dentry->d_parent != parent) 1947 goto next; 1948 if (d_unhashed(dentry)) 1949 goto next; 1950 1951 /* 1952 * It is safe to compare names since d_move() cannot 1953 * change the qstr (protected by d_lock). 1954 */ 1955 tlen = dentry->d_name.len; 1956 tname = dentry->d_name.name; 1957 if (parent->d_flags & DCACHE_OP_COMPARE) { 1958 if (parent->d_op->d_compare(parent, parent->d_inode, 1959 dentry, dentry->d_inode, 1960 tlen, tname, name)) 1961 goto next; 1962 } else { 1963 if (dentry_cmp(tname, tlen, str, len)) 1964 goto next; 1965 } 1966 1967 dentry->d_count++; 1968 found = dentry; 1969 spin_unlock(&dentry->d_lock); 1970 break; 1971 next: 1972 spin_unlock(&dentry->d_lock); 1973 } 1974 rcu_read_unlock(); 1975 1976 return found; 1977 } 1978 1979 /** 1980 * d_hash_and_lookup - hash the qstr then search for a dentry 1981 * @dir: Directory to search in 1982 * @name: qstr of name we wish to find 1983 * 1984 * On hash failure or on lookup failure NULL is returned. 1985 */ 1986 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 1987 { 1988 struct dentry *dentry = NULL; 1989 1990 /* 1991 * Check for a fs-specific hash function. Note that we must 1992 * calculate the standard hash first, as the d_op->d_hash() 1993 * routine may choose to leave the hash value unchanged. 1994 */ 1995 name->hash = full_name_hash(name->name, name->len); 1996 if (dir->d_flags & DCACHE_OP_HASH) { 1997 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) 1998 goto out; 1999 } 2000 dentry = d_lookup(dir, name); 2001 out: 2002 return dentry; 2003 } 2004 2005 /** 2006 * d_validate - verify dentry provided from insecure source (deprecated) 2007 * @dentry: The dentry alleged to be valid child of @dparent 2008 * @dparent: The parent dentry (known to be valid) 2009 * 2010 * An insecure source has sent us a dentry, here we verify it and dget() it. 2011 * This is used by ncpfs in its readdir implementation. 2012 * Zero is returned in the dentry is invalid. 2013 * 2014 * This function is slow for big directories, and deprecated, do not use it. 2015 */ 2016 int d_validate(struct dentry *dentry, struct dentry *dparent) 2017 { 2018 struct dentry *child; 2019 2020 spin_lock(&dparent->d_lock); 2021 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 2022 if (dentry == child) { 2023 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2024 __dget_dlock(dentry); 2025 spin_unlock(&dentry->d_lock); 2026 spin_unlock(&dparent->d_lock); 2027 return 1; 2028 } 2029 } 2030 spin_unlock(&dparent->d_lock); 2031 2032 return 0; 2033 } 2034 EXPORT_SYMBOL(d_validate); 2035 2036 /* 2037 * When a file is deleted, we have two options: 2038 * - turn this dentry into a negative dentry 2039 * - unhash this dentry and free it. 2040 * 2041 * Usually, we want to just turn this into 2042 * a negative dentry, but if anybody else is 2043 * currently using the dentry or the inode 2044 * we can't do that and we fall back on removing 2045 * it from the hash queues and waiting for 2046 * it to be deleted later when it has no users 2047 */ 2048 2049 /** 2050 * d_delete - delete a dentry 2051 * @dentry: The dentry to delete 2052 * 2053 * Turn the dentry into a negative dentry if possible, otherwise 2054 * remove it from the hash queues so it can be deleted later 2055 */ 2056 2057 void d_delete(struct dentry * dentry) 2058 { 2059 struct inode *inode; 2060 int isdir = 0; 2061 /* 2062 * Are we the only user? 2063 */ 2064 again: 2065 spin_lock(&dentry->d_lock); 2066 inode = dentry->d_inode; 2067 isdir = S_ISDIR(inode->i_mode); 2068 if (dentry->d_count == 1) { 2069 if (inode && !spin_trylock(&inode->i_lock)) { 2070 spin_unlock(&dentry->d_lock); 2071 cpu_relax(); 2072 goto again; 2073 } 2074 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2075 dentry_unlink_inode(dentry); 2076 fsnotify_nameremove(dentry, isdir); 2077 return; 2078 } 2079 2080 if (!d_unhashed(dentry)) 2081 __d_drop(dentry); 2082 2083 spin_unlock(&dentry->d_lock); 2084 2085 fsnotify_nameremove(dentry, isdir); 2086 } 2087 EXPORT_SYMBOL(d_delete); 2088 2089 static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) 2090 { 2091 BUG_ON(!d_unhashed(entry)); 2092 spin_lock_bucket(b); 2093 entry->d_flags &= ~DCACHE_UNHASHED; 2094 hlist_bl_add_head_rcu(&entry->d_hash, &b->head); 2095 spin_unlock_bucket(b); 2096 } 2097 2098 static void _d_rehash(struct dentry * entry) 2099 { 2100 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 2101 } 2102 2103 /** 2104 * d_rehash - add an entry back to the hash 2105 * @entry: dentry to add to the hash 2106 * 2107 * Adds a dentry to the hash according to its name. 2108 */ 2109 2110 void d_rehash(struct dentry * entry) 2111 { 2112 spin_lock(&entry->d_lock); 2113 _d_rehash(entry); 2114 spin_unlock(&entry->d_lock); 2115 } 2116 EXPORT_SYMBOL(d_rehash); 2117 2118 /** 2119 * dentry_update_name_case - update case insensitive dentry with a new name 2120 * @dentry: dentry to be updated 2121 * @name: new name 2122 * 2123 * Update a case insensitive dentry with new case of name. 2124 * 2125 * dentry must have been returned by d_lookup with name @name. Old and new 2126 * name lengths must match (ie. no d_compare which allows mismatched name 2127 * lengths). 2128 * 2129 * Parent inode i_mutex must be held over d_lookup and into this call (to 2130 * keep renames and concurrent inserts, and readdir(2) away). 2131 */ 2132 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2133 { 2134 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2136 2137 spin_lock(&dentry->d_lock); 2138 write_seqcount_begin(&dentry->d_seq); 2139 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 2140 write_seqcount_end(&dentry->d_seq); 2141 spin_unlock(&dentry->d_lock); 2142 } 2143 EXPORT_SYMBOL(dentry_update_name_case); 2144 2145 static void switch_names(struct dentry *dentry, struct dentry *target) 2146 { 2147 if (dname_external(target)) { 2148 if (dname_external(dentry)) { 2149 /* 2150 * Both external: swap the pointers 2151 */ 2152 swap(target->d_name.name, dentry->d_name.name); 2153 } else { 2154 /* 2155 * dentry:internal, target:external. Steal target's 2156 * storage and make target internal. 2157 */ 2158 memcpy(target->d_iname, dentry->d_name.name, 2159 dentry->d_name.len + 1); 2160 dentry->d_name.name = target->d_name.name; 2161 target->d_name.name = target->d_iname; 2162 } 2163 } else { 2164 if (dname_external(dentry)) { 2165 /* 2166 * dentry:external, target:internal. Give dentry's 2167 * storage to target and make dentry internal 2168 */ 2169 memcpy(dentry->d_iname, target->d_name.name, 2170 target->d_name.len + 1); 2171 target->d_name.name = dentry->d_name.name; 2172 dentry->d_name.name = dentry->d_iname; 2173 } else { 2174 /* 2175 * Both are internal. Just copy target to dentry 2176 */ 2177 memcpy(dentry->d_iname, target->d_name.name, 2178 target->d_name.len + 1); 2179 dentry->d_name.len = target->d_name.len; 2180 return; 2181 } 2182 } 2183 swap(dentry->d_name.len, target->d_name.len); 2184 } 2185 2186 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) 2187 { 2188 /* 2189 * XXXX: do we really need to take target->d_lock? 2190 */ 2191 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) 2192 spin_lock(&target->d_parent->d_lock); 2193 else { 2194 if (d_ancestor(dentry->d_parent, target->d_parent)) { 2195 spin_lock(&dentry->d_parent->d_lock); 2196 spin_lock_nested(&target->d_parent->d_lock, 2197 DENTRY_D_LOCK_NESTED); 2198 } else { 2199 spin_lock(&target->d_parent->d_lock); 2200 spin_lock_nested(&dentry->d_parent->d_lock, 2201 DENTRY_D_LOCK_NESTED); 2202 } 2203 } 2204 if (target < dentry) { 2205 spin_lock_nested(&target->d_lock, 2); 2206 spin_lock_nested(&dentry->d_lock, 3); 2207 } else { 2208 spin_lock_nested(&dentry->d_lock, 2); 2209 spin_lock_nested(&target->d_lock, 3); 2210 } 2211 } 2212 2213 static void dentry_unlock_parents_for_move(struct dentry *dentry, 2214 struct dentry *target) 2215 { 2216 if (target->d_parent != dentry->d_parent) 2217 spin_unlock(&dentry->d_parent->d_lock); 2218 if (target->d_parent != target) 2219 spin_unlock(&target->d_parent->d_lock); 2220 } 2221 2222 /* 2223 * When switching names, the actual string doesn't strictly have to 2224 * be preserved in the target - because we're dropping the target 2225 * anyway. As such, we can just do a simple memcpy() to copy over 2226 * the new name before we switch. 2227 * 2228 * Note that we have to be a lot more careful about getting the hash 2229 * switched - we have to switch the hash value properly even if it 2230 * then no longer matches the actual (corrupted) string of the target. 2231 * The hash value has to match the hash queue that the dentry is on.. 2232 */ 2233 /* 2234 * d_move - move a dentry 2235 * @dentry: entry to move 2236 * @target: new dentry 2237 * 2238 * Update the dcache to reflect the move of a file name. Negative 2239 * dcache entries should not be moved in this way. 2240 */ 2241 void d_move(struct dentry * dentry, struct dentry * target) 2242 { 2243 if (!dentry->d_inode) 2244 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2245 2246 BUG_ON(d_ancestor(dentry, target)); 2247 BUG_ON(d_ancestor(target, dentry)); 2248 2249 write_seqlock(&rename_lock); 2250 2251 dentry_lock_for_move(dentry, target); 2252 2253 write_seqcount_begin(&dentry->d_seq); 2254 write_seqcount_begin(&target->d_seq); 2255 2256 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2257 2258 /* 2259 * Move the dentry to the target hash queue. Don't bother checking 2260 * for the same hash queue because of how unlikely it is. 2261 */ 2262 __d_drop(dentry); 2263 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 2264 2265 /* Unhash the target: dput() will then get rid of it */ 2266 __d_drop(target); 2267 2268 list_del(&dentry->d_u.d_child); 2269 list_del(&target->d_u.d_child); 2270 2271 /* Switch the names.. */ 2272 switch_names(dentry, target); 2273 swap(dentry->d_name.hash, target->d_name.hash); 2274 2275 /* ... and switch the parents */ 2276 if (IS_ROOT(dentry)) { 2277 dentry->d_parent = target->d_parent; 2278 target->d_parent = target; 2279 INIT_LIST_HEAD(&target->d_u.d_child); 2280 } else { 2281 swap(dentry->d_parent, target->d_parent); 2282 2283 /* And add them back to the (new) parent lists */ 2284 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 2285 } 2286 2287 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2288 2289 write_seqcount_end(&target->d_seq); 2290 write_seqcount_end(&dentry->d_seq); 2291 2292 dentry_unlock_parents_for_move(dentry, target); 2293 spin_unlock(&target->d_lock); 2294 fsnotify_d_move(dentry); 2295 spin_unlock(&dentry->d_lock); 2296 write_sequnlock(&rename_lock); 2297 } 2298 EXPORT_SYMBOL(d_move); 2299 2300 /** 2301 * d_ancestor - search for an ancestor 2302 * @p1: ancestor dentry 2303 * @p2: child dentry 2304 * 2305 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 2306 * an ancestor of p2, else NULL. 2307 */ 2308 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 2309 { 2310 struct dentry *p; 2311 2312 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 2313 if (p->d_parent == p1) 2314 return p; 2315 } 2316 return NULL; 2317 } 2318 2319 /* 2320 * This helper attempts to cope with remotely renamed directories 2321 * 2322 * It assumes that the caller is already holding 2323 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock 2324 * 2325 * Note: If ever the locking in lock_rename() changes, then please 2326 * remember to update this too... 2327 */ 2328 static struct dentry *__d_unalias(struct inode *inode, 2329 struct dentry *dentry, struct dentry *alias) 2330 { 2331 struct mutex *m1 = NULL, *m2 = NULL; 2332 struct dentry *ret; 2333 2334 /* If alias and dentry share a parent, then no extra locks required */ 2335 if (alias->d_parent == dentry->d_parent) 2336 goto out_unalias; 2337 2338 /* Check for loops */ 2339 ret = ERR_PTR(-ELOOP); 2340 if (d_ancestor(alias, dentry)) 2341 goto out_err; 2342 2343 /* See lock_rename() */ 2344 ret = ERR_PTR(-EBUSY); 2345 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2346 goto out_err; 2347 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2348 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2349 goto out_err; 2350 m2 = &alias->d_parent->d_inode->i_mutex; 2351 out_unalias: 2352 d_move(alias, dentry); 2353 ret = alias; 2354 out_err: 2355 spin_unlock(&inode->i_lock); 2356 if (m2) 2357 mutex_unlock(m2); 2358 if (m1) 2359 mutex_unlock(m1); 2360 return ret; 2361 } 2362 2363 /* 2364 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2365 * named dentry in place of the dentry to be replaced. 2366 * returns with anon->d_lock held! 2367 */ 2368 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2369 { 2370 struct dentry *dparent, *aparent; 2371 2372 dentry_lock_for_move(anon, dentry); 2373 2374 write_seqcount_begin(&dentry->d_seq); 2375 write_seqcount_begin(&anon->d_seq); 2376 2377 dparent = dentry->d_parent; 2378 aparent = anon->d_parent; 2379 2380 switch_names(dentry, anon); 2381 swap(dentry->d_name.hash, anon->d_name.hash); 2382 2383 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2384 list_del(&dentry->d_u.d_child); 2385 if (!IS_ROOT(dentry)) 2386 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2387 else 2388 INIT_LIST_HEAD(&dentry->d_u.d_child); 2389 2390 anon->d_parent = (dparent == dentry) ? anon : dparent; 2391 list_del(&anon->d_u.d_child); 2392 if (!IS_ROOT(anon)) 2393 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs); 2394 else 2395 INIT_LIST_HEAD(&anon->d_u.d_child); 2396 2397 write_seqcount_end(&dentry->d_seq); 2398 write_seqcount_end(&anon->d_seq); 2399 2400 dentry_unlock_parents_for_move(anon, dentry); 2401 spin_unlock(&dentry->d_lock); 2402 2403 /* anon->d_lock still locked, returns locked */ 2404 anon->d_flags &= ~DCACHE_DISCONNECTED; 2405 } 2406 2407 /** 2408 * d_materialise_unique - introduce an inode into the tree 2409 * @dentry: candidate dentry 2410 * @inode: inode to bind to the dentry, to which aliases may be attached 2411 * 2412 * Introduces an dentry into the tree, substituting an extant disconnected 2413 * root directory alias in its place if there is one 2414 */ 2415 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2416 { 2417 struct dentry *actual; 2418 2419 BUG_ON(!d_unhashed(dentry)); 2420 2421 if (!inode) { 2422 actual = dentry; 2423 __d_instantiate(dentry, NULL); 2424 d_rehash(actual); 2425 goto out_nolock; 2426 } 2427 2428 spin_lock(&inode->i_lock); 2429 2430 if (S_ISDIR(inode->i_mode)) { 2431 struct dentry *alias; 2432 2433 /* Does an aliased dentry already exist? */ 2434 alias = __d_find_alias(inode, 0); 2435 if (alias) { 2436 actual = alias; 2437 /* Is this an anonymous mountpoint that we could splice 2438 * into our tree? */ 2439 if (IS_ROOT(alias)) { 2440 __d_materialise_dentry(dentry, alias); 2441 __d_drop(alias); 2442 goto found; 2443 } 2444 /* Nope, but we must(!) avoid directory aliasing */ 2445 actual = __d_unalias(inode, dentry, alias); 2446 if (IS_ERR(actual)) 2447 dput(alias); 2448 goto out_nolock; 2449 } 2450 } 2451 2452 /* Add a unique reference */ 2453 actual = __d_instantiate_unique(dentry, inode); 2454 if (!actual) 2455 actual = dentry; 2456 else 2457 BUG_ON(!d_unhashed(actual)); 2458 2459 spin_lock(&actual->d_lock); 2460 found: 2461 _d_rehash(actual); 2462 spin_unlock(&actual->d_lock); 2463 spin_unlock(&inode->i_lock); 2464 out_nolock: 2465 if (actual == dentry) { 2466 security_d_instantiate(dentry, inode); 2467 return NULL; 2468 } 2469 2470 iput(inode); 2471 return actual; 2472 } 2473 EXPORT_SYMBOL_GPL(d_materialise_unique); 2474 2475 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2476 { 2477 *buflen -= namelen; 2478 if (*buflen < 0) 2479 return -ENAMETOOLONG; 2480 *buffer -= namelen; 2481 memcpy(*buffer, str, namelen); 2482 return 0; 2483 } 2484 2485 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2486 { 2487 return prepend(buffer, buflen, name->name, name->len); 2488 } 2489 2490 /** 2491 * prepend_path - Prepend path string to a buffer 2492 * @path: the dentry/vfsmount to report 2493 * @root: root vfsmnt/dentry (may be modified by this function) 2494 * @buffer: pointer to the end of the buffer 2495 * @buflen: pointer to buffer length 2496 * 2497 * Caller holds the rename_lock. 2498 * 2499 * If path is not reachable from the supplied root, then the value of 2500 * root is changed (without modifying refcounts). 2501 */ 2502 static int prepend_path(const struct path *path, struct path *root, 2503 char **buffer, int *buflen) 2504 { 2505 struct dentry *dentry = path->dentry; 2506 struct vfsmount *vfsmnt = path->mnt; 2507 bool slash = false; 2508 int error = 0; 2509 2510 br_read_lock(vfsmount_lock); 2511 while (dentry != root->dentry || vfsmnt != root->mnt) { 2512 struct dentry * parent; 2513 2514 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2515 /* Global root? */ 2516 if (vfsmnt->mnt_parent == vfsmnt) { 2517 goto global_root; 2518 } 2519 dentry = vfsmnt->mnt_mountpoint; 2520 vfsmnt = vfsmnt->mnt_parent; 2521 continue; 2522 } 2523 parent = dentry->d_parent; 2524 prefetch(parent); 2525 spin_lock(&dentry->d_lock); 2526 error = prepend_name(buffer, buflen, &dentry->d_name); 2527 spin_unlock(&dentry->d_lock); 2528 if (!error) 2529 error = prepend(buffer, buflen, "/", 1); 2530 if (error) 2531 break; 2532 2533 slash = true; 2534 dentry = parent; 2535 } 2536 2537 out: 2538 if (!error && !slash) 2539 error = prepend(buffer, buflen, "/", 1); 2540 2541 br_read_unlock(vfsmount_lock); 2542 return error; 2543 2544 global_root: 2545 /* 2546 * Filesystems needing to implement special "root names" 2547 * should do so with ->d_dname() 2548 */ 2549 if (IS_ROOT(dentry) && 2550 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { 2551 WARN(1, "Root dentry has weird name <%.*s>\n", 2552 (int) dentry->d_name.len, dentry->d_name.name); 2553 } 2554 root->mnt = vfsmnt; 2555 root->dentry = dentry; 2556 goto out; 2557 } 2558 2559 /** 2560 * __d_path - return the path of a dentry 2561 * @path: the dentry/vfsmount to report 2562 * @root: root vfsmnt/dentry (may be modified by this function) 2563 * @buf: buffer to return value in 2564 * @buflen: buffer length 2565 * 2566 * Convert a dentry into an ASCII path name. 2567 * 2568 * Returns a pointer into the buffer or an error code if the 2569 * path was too long. 2570 * 2571 * "buflen" should be positive. 2572 * 2573 * If path is not reachable from the supplied root, then the value of 2574 * root is changed (without modifying refcounts). 2575 */ 2576 char *__d_path(const struct path *path, struct path *root, 2577 char *buf, int buflen) 2578 { 2579 char *res = buf + buflen; 2580 int error; 2581 2582 prepend(&res, &buflen, "\0", 1); 2583 write_seqlock(&rename_lock); 2584 error = prepend_path(path, root, &res, &buflen); 2585 write_sequnlock(&rename_lock); 2586 2587 if (error) 2588 return ERR_PTR(error); 2589 return res; 2590 } 2591 2592 /* 2593 * same as __d_path but appends "(deleted)" for unlinked files. 2594 */ 2595 static int path_with_deleted(const struct path *path, struct path *root, 2596 char **buf, int *buflen) 2597 { 2598 prepend(buf, buflen, "\0", 1); 2599 if (d_unlinked(path->dentry)) { 2600 int error = prepend(buf, buflen, " (deleted)", 10); 2601 if (error) 2602 return error; 2603 } 2604 2605 return prepend_path(path, root, buf, buflen); 2606 } 2607 2608 static int prepend_unreachable(char **buffer, int *buflen) 2609 { 2610 return prepend(buffer, buflen, "(unreachable)", 13); 2611 } 2612 2613 /** 2614 * d_path - return the path of a dentry 2615 * @path: path to report 2616 * @buf: buffer to return value in 2617 * @buflen: buffer length 2618 * 2619 * Convert a dentry into an ASCII path name. If the entry has been deleted 2620 * the string " (deleted)" is appended. Note that this is ambiguous. 2621 * 2622 * Returns a pointer into the buffer or an error code if the path was 2623 * too long. Note: Callers should use the returned pointer, not the passed 2624 * in buffer, to use the name! The implementation often starts at an offset 2625 * into the buffer, and may leave 0 bytes at the start. 2626 * 2627 * "buflen" should be positive. 2628 */ 2629 char *d_path(const struct path *path, char *buf, int buflen) 2630 { 2631 char *res = buf + buflen; 2632 struct path root; 2633 struct path tmp; 2634 int error; 2635 2636 /* 2637 * We have various synthetic filesystems that never get mounted. On 2638 * these filesystems dentries are never used for lookup purposes, and 2639 * thus don't need to be hashed. They also don't need a name until a 2640 * user wants to identify the object in /proc/pid/fd/. The little hack 2641 * below allows us to generate a name for these objects on demand: 2642 */ 2643 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2644 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2645 2646 get_fs_root(current->fs, &root); 2647 write_seqlock(&rename_lock); 2648 tmp = root; 2649 error = path_with_deleted(path, &tmp, &res, &buflen); 2650 if (error) 2651 res = ERR_PTR(error); 2652 write_sequnlock(&rename_lock); 2653 path_put(&root); 2654 return res; 2655 } 2656 EXPORT_SYMBOL(d_path); 2657 2658 /** 2659 * d_path_with_unreachable - return the path of a dentry 2660 * @path: path to report 2661 * @buf: buffer to return value in 2662 * @buflen: buffer length 2663 * 2664 * The difference from d_path() is that this prepends "(unreachable)" 2665 * to paths which are unreachable from the current process' root. 2666 */ 2667 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) 2668 { 2669 char *res = buf + buflen; 2670 struct path root; 2671 struct path tmp; 2672 int error; 2673 2674 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2675 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2676 2677 get_fs_root(current->fs, &root); 2678 write_seqlock(&rename_lock); 2679 tmp = root; 2680 error = path_with_deleted(path, &tmp, &res, &buflen); 2681 if (!error && !path_equal(&tmp, &root)) 2682 error = prepend_unreachable(&res, &buflen); 2683 write_sequnlock(&rename_lock); 2684 path_put(&root); 2685 if (error) 2686 res = ERR_PTR(error); 2687 2688 return res; 2689 } 2690 2691 /* 2692 * Helper function for dentry_operations.d_dname() members 2693 */ 2694 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2695 const char *fmt, ...) 2696 { 2697 va_list args; 2698 char temp[64]; 2699 int sz; 2700 2701 va_start(args, fmt); 2702 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2703 va_end(args); 2704 2705 if (sz > sizeof(temp) || sz > buflen) 2706 return ERR_PTR(-ENAMETOOLONG); 2707 2708 buffer += buflen - sz; 2709 return memcpy(buffer, temp, sz); 2710 } 2711 2712 /* 2713 * Write full pathname from the root of the filesystem into the buffer. 2714 */ 2715 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2716 { 2717 char *end = buf + buflen; 2718 char *retval; 2719 2720 prepend(&end, &buflen, "\0", 1); 2721 if (buflen < 1) 2722 goto Elong; 2723 /* Get '/' right */ 2724 retval = end-1; 2725 *retval = '/'; 2726 2727 while (!IS_ROOT(dentry)) { 2728 struct dentry *parent = dentry->d_parent; 2729 int error; 2730 2731 prefetch(parent); 2732 spin_lock(&dentry->d_lock); 2733 error = prepend_name(&end, &buflen, &dentry->d_name); 2734 spin_unlock(&dentry->d_lock); 2735 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) 2736 goto Elong; 2737 2738 retval = end; 2739 dentry = parent; 2740 } 2741 return retval; 2742 Elong: 2743 return ERR_PTR(-ENAMETOOLONG); 2744 } 2745 2746 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2747 { 2748 char *retval; 2749 2750 write_seqlock(&rename_lock); 2751 retval = __dentry_path(dentry, buf, buflen); 2752 write_sequnlock(&rename_lock); 2753 2754 return retval; 2755 } 2756 EXPORT_SYMBOL(dentry_path_raw); 2757 2758 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2759 { 2760 char *p = NULL; 2761 char *retval; 2762 2763 write_seqlock(&rename_lock); 2764 if (d_unlinked(dentry)) { 2765 p = buf + buflen; 2766 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2767 goto Elong; 2768 buflen++; 2769 } 2770 retval = __dentry_path(dentry, buf, buflen); 2771 write_sequnlock(&rename_lock); 2772 if (!IS_ERR(retval) && p) 2773 *p = '/'; /* restore '/' overriden with '\0' */ 2774 return retval; 2775 Elong: 2776 return ERR_PTR(-ENAMETOOLONG); 2777 } 2778 2779 /* 2780 * NOTE! The user-level library version returns a 2781 * character pointer. The kernel system call just 2782 * returns the length of the buffer filled (which 2783 * includes the ending '\0' character), or a negative 2784 * error value. So libc would do something like 2785 * 2786 * char *getcwd(char * buf, size_t size) 2787 * { 2788 * int retval; 2789 * 2790 * retval = sys_getcwd(buf, size); 2791 * if (retval >= 0) 2792 * return buf; 2793 * errno = -retval; 2794 * return NULL; 2795 * } 2796 */ 2797 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 2798 { 2799 int error; 2800 struct path pwd, root; 2801 char *page = (char *) __get_free_page(GFP_USER); 2802 2803 if (!page) 2804 return -ENOMEM; 2805 2806 get_fs_root_and_pwd(current->fs, &root, &pwd); 2807 2808 error = -ENOENT; 2809 write_seqlock(&rename_lock); 2810 if (!d_unlinked(pwd.dentry)) { 2811 unsigned long len; 2812 struct path tmp = root; 2813 char *cwd = page + PAGE_SIZE; 2814 int buflen = PAGE_SIZE; 2815 2816 prepend(&cwd, &buflen, "\0", 1); 2817 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2818 write_sequnlock(&rename_lock); 2819 2820 if (error) 2821 goto out; 2822 2823 /* Unreachable from current root */ 2824 if (!path_equal(&tmp, &root)) { 2825 error = prepend_unreachable(&cwd, &buflen); 2826 if (error) 2827 goto out; 2828 } 2829 2830 error = -ERANGE; 2831 len = PAGE_SIZE + page - cwd; 2832 if (len <= size) { 2833 error = len; 2834 if (copy_to_user(buf, cwd, len)) 2835 error = -EFAULT; 2836 } 2837 } else { 2838 write_sequnlock(&rename_lock); 2839 } 2840 2841 out: 2842 path_put(&pwd); 2843 path_put(&root); 2844 free_page((unsigned long) page); 2845 return error; 2846 } 2847 2848 /* 2849 * Test whether new_dentry is a subdirectory of old_dentry. 2850 * 2851 * Trivially implemented using the dcache structure 2852 */ 2853 2854 /** 2855 * is_subdir - is new dentry a subdirectory of old_dentry 2856 * @new_dentry: new dentry 2857 * @old_dentry: old dentry 2858 * 2859 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 2860 * Returns 0 otherwise. 2861 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2862 */ 2863 2864 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2865 { 2866 int result; 2867 unsigned seq; 2868 2869 if (new_dentry == old_dentry) 2870 return 1; 2871 2872 do { 2873 /* for restarting inner loop in case of seq retry */ 2874 seq = read_seqbegin(&rename_lock); 2875 /* 2876 * Need rcu_readlock to protect against the d_parent trashing 2877 * due to d_move 2878 */ 2879 rcu_read_lock(); 2880 if (d_ancestor(old_dentry, new_dentry)) 2881 result = 1; 2882 else 2883 result = 0; 2884 rcu_read_unlock(); 2885 } while (read_seqretry(&rename_lock, seq)); 2886 2887 return result; 2888 } 2889 2890 int path_is_under(struct path *path1, struct path *path2) 2891 { 2892 struct vfsmount *mnt = path1->mnt; 2893 struct dentry *dentry = path1->dentry; 2894 int res; 2895 2896 br_read_lock(vfsmount_lock); 2897 if (mnt != path2->mnt) { 2898 for (;;) { 2899 if (mnt->mnt_parent == mnt) { 2900 br_read_unlock(vfsmount_lock); 2901 return 0; 2902 } 2903 if (mnt->mnt_parent == path2->mnt) 2904 break; 2905 mnt = mnt->mnt_parent; 2906 } 2907 dentry = mnt->mnt_mountpoint; 2908 } 2909 res = is_subdir(dentry, path2->dentry); 2910 br_read_unlock(vfsmount_lock); 2911 return res; 2912 } 2913 EXPORT_SYMBOL(path_is_under); 2914 2915 void d_genocide(struct dentry *root) 2916 { 2917 struct dentry *this_parent; 2918 struct list_head *next; 2919 unsigned seq; 2920 int locked = 0; 2921 2922 seq = read_seqbegin(&rename_lock); 2923 again: 2924 this_parent = root; 2925 spin_lock(&this_parent->d_lock); 2926 repeat: 2927 next = this_parent->d_subdirs.next; 2928 resume: 2929 while (next != &this_parent->d_subdirs) { 2930 struct list_head *tmp = next; 2931 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2932 next = tmp->next; 2933 2934 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2935 if (d_unhashed(dentry) || !dentry->d_inode) { 2936 spin_unlock(&dentry->d_lock); 2937 continue; 2938 } 2939 if (!list_empty(&dentry->d_subdirs)) { 2940 spin_unlock(&this_parent->d_lock); 2941 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 2942 this_parent = dentry; 2943 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 2944 goto repeat; 2945 } 2946 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 2947 dentry->d_flags |= DCACHE_GENOCIDE; 2948 dentry->d_count--; 2949 } 2950 spin_unlock(&dentry->d_lock); 2951 } 2952 if (this_parent != root) { 2953 struct dentry *child = this_parent; 2954 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2955 this_parent->d_flags |= DCACHE_GENOCIDE; 2956 this_parent->d_count--; 2957 } 2958 this_parent = try_to_ascend(this_parent, locked, seq); 2959 if (!this_parent) 2960 goto rename_retry; 2961 next = child->d_u.d_child.next; 2962 goto resume; 2963 } 2964 spin_unlock(&this_parent->d_lock); 2965 if (!locked && read_seqretry(&rename_lock, seq)) 2966 goto rename_retry; 2967 if (locked) 2968 write_sequnlock(&rename_lock); 2969 return; 2970 2971 rename_retry: 2972 locked = 1; 2973 write_seqlock(&rename_lock); 2974 goto again; 2975 } 2976 2977 /** 2978 * find_inode_number - check for dentry with name 2979 * @dir: directory to check 2980 * @name: Name to find. 2981 * 2982 * Check whether a dentry already exists for the given name, 2983 * and return the inode number if it has an inode. Otherwise 2984 * 0 is returned. 2985 * 2986 * This routine is used to post-process directory listings for 2987 * filesystems using synthetic inode numbers, and is necessary 2988 * to keep getcwd() working. 2989 */ 2990 2991 ino_t find_inode_number(struct dentry *dir, struct qstr *name) 2992 { 2993 struct dentry * dentry; 2994 ino_t ino = 0; 2995 2996 dentry = d_hash_and_lookup(dir, name); 2997 if (dentry) { 2998 if (dentry->d_inode) 2999 ino = dentry->d_inode->i_ino; 3000 dput(dentry); 3001 } 3002 return ino; 3003 } 3004 EXPORT_SYMBOL(find_inode_number); 3005 3006 static __initdata unsigned long dhash_entries; 3007 static int __init set_dhash_entries(char *str) 3008 { 3009 if (!str) 3010 return 0; 3011 dhash_entries = simple_strtoul(str, &str, 0); 3012 return 1; 3013 } 3014 __setup("dhash_entries=", set_dhash_entries); 3015 3016 static void __init dcache_init_early(void) 3017 { 3018 int loop; 3019 3020 /* If hashes are distributed across NUMA nodes, defer 3021 * hash allocation until vmalloc space is available. 3022 */ 3023 if (hashdist) 3024 return; 3025 3026 dentry_hashtable = 3027 alloc_large_system_hash("Dentry cache", 3028 sizeof(struct dcache_hash_bucket), 3029 dhash_entries, 3030 13, 3031 HASH_EARLY, 3032 &d_hash_shift, 3033 &d_hash_mask, 3034 0); 3035 3036 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3037 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3038 } 3039 3040 static void __init dcache_init(void) 3041 { 3042 int loop; 3043 3044 /* 3045 * A constructor could be added for stable state like the lists, 3046 * but it is probably not worth it because of the cache nature 3047 * of the dcache. 3048 */ 3049 dentry_cache = KMEM_CACHE(dentry, 3050 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3051 3052 register_shrinker(&dcache_shrinker); 3053 3054 /* Hash may have been set up in dcache_init_early */ 3055 if (!hashdist) 3056 return; 3057 3058 dentry_hashtable = 3059 alloc_large_system_hash("Dentry cache", 3060 sizeof(struct dcache_hash_bucket), 3061 dhash_entries, 3062 13, 3063 0, 3064 &d_hash_shift, 3065 &d_hash_mask, 3066 0); 3067 3068 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3069 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3070 } 3071 3072 /* SLAB cache for __getname() consumers */ 3073 struct kmem_cache *names_cachep __read_mostly; 3074 EXPORT_SYMBOL(names_cachep); 3075 3076 EXPORT_SYMBOL(d_genocide); 3077 3078 void __init vfs_caches_init_early(void) 3079 { 3080 dcache_init_early(); 3081 inode_init_early(); 3082 } 3083 3084 void __init vfs_caches_init(unsigned long mempages) 3085 { 3086 unsigned long reserve; 3087 3088 /* Base hash sizes on available memory, with a reserve equal to 3089 150% of current kernel size */ 3090 3091 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3092 mempages -= reserve; 3093 3094 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3095 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3096 3097 dcache_init(); 3098 inode_init(); 3099 files_init(mempages); 3100 mnt_init(); 3101 bdev_cache_init(); 3102 chrdev_init(); 3103 } 3104