1 /* 2 * fs/dcache.c 3 * 4 * Complete reimplementation 5 * (C) 1997 Thomas Schoebel-Theuer, 6 * with heavy changes by Linus Torvalds 7 */ 8 9 /* 10 * Notes on the allocation strategy: 11 * 12 * The dcache is a master of the icache - whenever a dcache entry 13 * exists, the inode will always exist. "iput()" is done either when 14 * the dcache entry is deleted or garbage collected. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/string.h> 19 #include <linux/mm.h> 20 #include <linux/fs.h> 21 #include <linux/fsnotify.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/hash.h> 25 #include <linux/cache.h> 26 #include <linux/export.h> 27 #include <linux/mount.h> 28 #include <linux/file.h> 29 #include <asm/uaccess.h> 30 #include <linux/security.h> 31 #include <linux/seqlock.h> 32 #include <linux/swap.h> 33 #include <linux/bootmem.h> 34 #include <linux/fs_struct.h> 35 #include <linux/hardirq.h> 36 #include <linux/bit_spinlock.h> 37 #include <linux/rculist_bl.h> 38 #include <linux/prefetch.h> 39 #include <linux/ratelimit.h> 40 #include "internal.h" 41 #include "mount.h" 42 43 /* 44 * Usage: 45 * dcache->d_inode->i_lock protects: 46 * - i_dentry, d_alias, d_inode of aliases 47 * dcache_hash_bucket lock protects: 48 * - the dcache hash table 49 * s_anon bl list spinlock protects: 50 * - the s_anon list (see __d_drop) 51 * dcache_lru_lock protects: 52 * - the dcache lru lists and counters 53 * d_lock protects: 54 * - d_flags 55 * - d_name 56 * - d_lru 57 * - d_count 58 * - d_unhashed() 59 * - d_parent and d_subdirs 60 * - childrens' d_child and d_parent 61 * - d_alias, d_inode 62 * 63 * Ordering: 64 * dentry->d_inode->i_lock 65 * dentry->d_lock 66 * dcache_lru_lock 67 * dcache_hash_bucket lock 68 * s_anon lock 69 * 70 * If there is an ancestor relationship: 71 * dentry->d_parent->...->d_parent->d_lock 72 * ... 73 * dentry->d_parent->d_lock 74 * dentry->d_lock 75 * 76 * If no ancestor relationship: 77 * if (dentry1 < dentry2) 78 * dentry1->d_lock 79 * dentry2->d_lock 80 */ 81 int sysctl_vfs_cache_pressure __read_mostly = 100; 82 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 83 84 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); 85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 86 87 EXPORT_SYMBOL(rename_lock); 88 89 static struct kmem_cache *dentry_cache __read_mostly; 90 91 /** 92 * read_seqbegin_or_lock - begin a sequence number check or locking block 93 * lock: sequence lock 94 * seq : sequence number to be checked 95 * 96 * First try it once optimistically without taking the lock. If that fails, 97 * take the lock. The sequence number is also used as a marker for deciding 98 * whether to be a reader (even) or writer (odd). 99 * N.B. seq must be initialized to an even number to begin with. 100 */ 101 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 102 { 103 if (!(*seq & 1)) /* Even */ 104 *seq = read_seqbegin(lock); 105 else /* Odd */ 106 write_seqlock(lock); 107 } 108 109 static inline int need_seqretry(seqlock_t *lock, int seq) 110 { 111 return !(seq & 1) && read_seqretry(lock, seq); 112 } 113 114 static inline void done_seqretry(seqlock_t *lock, int seq) 115 { 116 if (seq & 1) 117 write_sequnlock(lock); 118 } 119 120 /* 121 * This is the single most critical data structure when it comes 122 * to the dcache: the hashtable for lookups. Somebody should try 123 * to make this good - I've just made it work. 124 * 125 * This hash-function tries to avoid losing too many bits of hash 126 * information, yet avoid using a prime hash-size or similar. 127 */ 128 #define D_HASHBITS d_hash_shift 129 #define D_HASHMASK d_hash_mask 130 131 static unsigned int d_hash_mask __read_mostly; 132 static unsigned int d_hash_shift __read_mostly; 133 134 static struct hlist_bl_head *dentry_hashtable __read_mostly; 135 136 static inline struct hlist_bl_head *d_hash(const struct dentry *parent, 137 unsigned int hash) 138 { 139 hash += (unsigned long) parent / L1_CACHE_BYTES; 140 hash = hash + (hash >> D_HASHBITS); 141 return dentry_hashtable + (hash & D_HASHMASK); 142 } 143 144 /* Statistics gathering. */ 145 struct dentry_stat_t dentry_stat = { 146 .age_limit = 45, 147 }; 148 149 static DEFINE_PER_CPU(unsigned int, nr_dentry); 150 151 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 152 static int get_nr_dentry(void) 153 { 154 int i; 155 int sum = 0; 156 for_each_possible_cpu(i) 157 sum += per_cpu(nr_dentry, i); 158 return sum < 0 ? 0 : sum; 159 } 160 161 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 162 size_t *lenp, loff_t *ppos) 163 { 164 dentry_stat.nr_dentry = get_nr_dentry(); 165 return proc_dointvec(table, write, buffer, lenp, ppos); 166 } 167 #endif 168 169 /* 170 * Compare 2 name strings, return 0 if they match, otherwise non-zero. 171 * The strings are both count bytes long, and count is non-zero. 172 */ 173 #ifdef CONFIG_DCACHE_WORD_ACCESS 174 175 #include <asm/word-at-a-time.h> 176 /* 177 * NOTE! 'cs' and 'scount' come from a dentry, so it has a 178 * aligned allocation for this particular component. We don't 179 * strictly need the load_unaligned_zeropad() safety, but it 180 * doesn't hurt either. 181 * 182 * In contrast, 'ct' and 'tcount' can be from a pathname, and do 183 * need the careful unaligned handling. 184 */ 185 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 186 { 187 unsigned long a,b,mask; 188 189 for (;;) { 190 a = *(unsigned long *)cs; 191 b = load_unaligned_zeropad(ct); 192 if (tcount < sizeof(unsigned long)) 193 break; 194 if (unlikely(a != b)) 195 return 1; 196 cs += sizeof(unsigned long); 197 ct += sizeof(unsigned long); 198 tcount -= sizeof(unsigned long); 199 if (!tcount) 200 return 0; 201 } 202 mask = ~(~0ul << tcount*8); 203 return unlikely(!!((a ^ b) & mask)); 204 } 205 206 #else 207 208 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 209 { 210 do { 211 if (*cs != *ct) 212 return 1; 213 cs++; 214 ct++; 215 tcount--; 216 } while (tcount); 217 return 0; 218 } 219 220 #endif 221 222 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) 223 { 224 const unsigned char *cs; 225 /* 226 * Be careful about RCU walk racing with rename: 227 * use ACCESS_ONCE to fetch the name pointer. 228 * 229 * NOTE! Even if a rename will mean that the length 230 * was not loaded atomically, we don't care. The 231 * RCU walk will check the sequence count eventually, 232 * and catch it. And we won't overrun the buffer, 233 * because we're reading the name pointer atomically, 234 * and a dentry name is guaranteed to be properly 235 * terminated with a NUL byte. 236 * 237 * End result: even if 'len' is wrong, we'll exit 238 * early because the data cannot match (there can 239 * be no NUL in the ct/tcount data) 240 */ 241 cs = ACCESS_ONCE(dentry->d_name.name); 242 smp_read_barrier_depends(); 243 return dentry_string_cmp(cs, ct, tcount); 244 } 245 246 static void __d_free(struct rcu_head *head) 247 { 248 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 249 250 WARN_ON(!hlist_unhashed(&dentry->d_alias)); 251 if (dname_external(dentry)) 252 kfree(dentry->d_name.name); 253 kmem_cache_free(dentry_cache, dentry); 254 } 255 256 /* 257 * no locks, please. 258 */ 259 static void d_free(struct dentry *dentry) 260 { 261 BUG_ON((int)dentry->d_lockref.count > 0); 262 this_cpu_dec(nr_dentry); 263 if (dentry->d_op && dentry->d_op->d_release) 264 dentry->d_op->d_release(dentry); 265 266 /* if dentry was never visible to RCU, immediate free is OK */ 267 if (!(dentry->d_flags & DCACHE_RCUACCESS)) 268 __d_free(&dentry->d_u.d_rcu); 269 else 270 call_rcu(&dentry->d_u.d_rcu, __d_free); 271 } 272 273 /** 274 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 275 * @dentry: the target dentry 276 * After this call, in-progress rcu-walk path lookup will fail. This 277 * should be called after unhashing, and after changing d_inode (if 278 * the dentry has not already been unhashed). 279 */ 280 static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 281 { 282 assert_spin_locked(&dentry->d_lock); 283 /* Go through a barrier */ 284 write_seqcount_barrier(&dentry->d_seq); 285 } 286 287 /* 288 * Release the dentry's inode, using the filesystem 289 * d_iput() operation if defined. Dentry has no refcount 290 * and is unhashed. 291 */ 292 static void dentry_iput(struct dentry * dentry) 293 __releases(dentry->d_lock) 294 __releases(dentry->d_inode->i_lock) 295 { 296 struct inode *inode = dentry->d_inode; 297 if (inode) { 298 dentry->d_inode = NULL; 299 hlist_del_init(&dentry->d_alias); 300 spin_unlock(&dentry->d_lock); 301 spin_unlock(&inode->i_lock); 302 if (!inode->i_nlink) 303 fsnotify_inoderemove(inode); 304 if (dentry->d_op && dentry->d_op->d_iput) 305 dentry->d_op->d_iput(dentry, inode); 306 else 307 iput(inode); 308 } else { 309 spin_unlock(&dentry->d_lock); 310 } 311 } 312 313 /* 314 * Release the dentry's inode, using the filesystem 315 * d_iput() operation if defined. dentry remains in-use. 316 */ 317 static void dentry_unlink_inode(struct dentry * dentry) 318 __releases(dentry->d_lock) 319 __releases(dentry->d_inode->i_lock) 320 { 321 struct inode *inode = dentry->d_inode; 322 dentry->d_inode = NULL; 323 hlist_del_init(&dentry->d_alias); 324 dentry_rcuwalk_barrier(dentry); 325 spin_unlock(&dentry->d_lock); 326 spin_unlock(&inode->i_lock); 327 if (!inode->i_nlink) 328 fsnotify_inoderemove(inode); 329 if (dentry->d_op && dentry->d_op->d_iput) 330 dentry->d_op->d_iput(dentry, inode); 331 else 332 iput(inode); 333 } 334 335 /* 336 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. 337 */ 338 static void dentry_lru_add(struct dentry *dentry) 339 { 340 if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) { 341 spin_lock(&dcache_lru_lock); 342 dentry->d_flags |= DCACHE_LRU_LIST; 343 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 344 dentry->d_sb->s_nr_dentry_unused++; 345 dentry_stat.nr_unused++; 346 spin_unlock(&dcache_lru_lock); 347 } 348 } 349 350 static void __dentry_lru_del(struct dentry *dentry) 351 { 352 list_del_init(&dentry->d_lru); 353 dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); 354 dentry->d_sb->s_nr_dentry_unused--; 355 dentry_stat.nr_unused--; 356 } 357 358 /* 359 * Remove a dentry with references from the LRU. 360 */ 361 static void dentry_lru_del(struct dentry *dentry) 362 { 363 if (!list_empty(&dentry->d_lru)) { 364 spin_lock(&dcache_lru_lock); 365 __dentry_lru_del(dentry); 366 spin_unlock(&dcache_lru_lock); 367 } 368 } 369 370 static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) 371 { 372 spin_lock(&dcache_lru_lock); 373 if (list_empty(&dentry->d_lru)) { 374 dentry->d_flags |= DCACHE_LRU_LIST; 375 list_add_tail(&dentry->d_lru, list); 376 dentry->d_sb->s_nr_dentry_unused++; 377 dentry_stat.nr_unused++; 378 } else { 379 list_move_tail(&dentry->d_lru, list); 380 } 381 spin_unlock(&dcache_lru_lock); 382 } 383 384 /** 385 * d_kill - kill dentry and return parent 386 * @dentry: dentry to kill 387 * @parent: parent dentry 388 * 389 * The dentry must already be unhashed and removed from the LRU. 390 * 391 * If this is the root of the dentry tree, return NULL. 392 * 393 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by 394 * d_kill. 395 */ 396 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) 397 __releases(dentry->d_lock) 398 __releases(parent->d_lock) 399 __releases(dentry->d_inode->i_lock) 400 { 401 list_del(&dentry->d_u.d_child); 402 /* 403 * Inform try_to_ascend() that we are no longer attached to the 404 * dentry tree 405 */ 406 dentry->d_flags |= DCACHE_DENTRY_KILLED; 407 if (parent) 408 spin_unlock(&parent->d_lock); 409 dentry_iput(dentry); 410 /* 411 * dentry_iput drops the locks, at which point nobody (except 412 * transient RCU lookups) can reach this dentry. 413 */ 414 d_free(dentry); 415 return parent; 416 } 417 418 /* 419 * Unhash a dentry without inserting an RCU walk barrier or checking that 420 * dentry->d_lock is locked. The caller must take care of that, if 421 * appropriate. 422 */ 423 static void __d_shrink(struct dentry *dentry) 424 { 425 if (!d_unhashed(dentry)) { 426 struct hlist_bl_head *b; 427 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) 428 b = &dentry->d_sb->s_anon; 429 else 430 b = d_hash(dentry->d_parent, dentry->d_name.hash); 431 432 hlist_bl_lock(b); 433 __hlist_bl_del(&dentry->d_hash); 434 dentry->d_hash.pprev = NULL; 435 hlist_bl_unlock(b); 436 } 437 } 438 439 /** 440 * d_drop - drop a dentry 441 * @dentry: dentry to drop 442 * 443 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 444 * be found through a VFS lookup any more. Note that this is different from 445 * deleting the dentry - d_delete will try to mark the dentry negative if 446 * possible, giving a successful _negative_ lookup, while d_drop will 447 * just make the cache lookup fail. 448 * 449 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 450 * reason (NFS timeouts or autofs deletes). 451 * 452 * __d_drop requires dentry->d_lock. 453 */ 454 void __d_drop(struct dentry *dentry) 455 { 456 if (!d_unhashed(dentry)) { 457 __d_shrink(dentry); 458 dentry_rcuwalk_barrier(dentry); 459 } 460 } 461 EXPORT_SYMBOL(__d_drop); 462 463 void d_drop(struct dentry *dentry) 464 { 465 spin_lock(&dentry->d_lock); 466 __d_drop(dentry); 467 spin_unlock(&dentry->d_lock); 468 } 469 EXPORT_SYMBOL(d_drop); 470 471 /* 472 * Finish off a dentry we've decided to kill. 473 * dentry->d_lock must be held, returns with it unlocked. 474 * If ref is non-zero, then decrement the refcount too. 475 * Returns dentry requiring refcount drop, or NULL if we're done. 476 */ 477 static inline struct dentry *dentry_kill(struct dentry *dentry) 478 __releases(dentry->d_lock) 479 { 480 struct inode *inode; 481 struct dentry *parent; 482 483 inode = dentry->d_inode; 484 if (inode && !spin_trylock(&inode->i_lock)) { 485 relock: 486 spin_unlock(&dentry->d_lock); 487 cpu_relax(); 488 return dentry; /* try again with same dentry */ 489 } 490 if (IS_ROOT(dentry)) 491 parent = NULL; 492 else 493 parent = dentry->d_parent; 494 if (parent && !spin_trylock(&parent->d_lock)) { 495 if (inode) 496 spin_unlock(&inode->i_lock); 497 goto relock; 498 } 499 500 /* 501 * The dentry is now unrecoverably dead to the world. 502 */ 503 lockref_mark_dead(&dentry->d_lockref); 504 505 /* 506 * inform the fs via d_prune that this dentry is about to be 507 * unhashed and destroyed. 508 */ 509 if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) 510 dentry->d_op->d_prune(dentry); 511 512 dentry_lru_del(dentry); 513 /* if it was on the hash then remove it */ 514 __d_drop(dentry); 515 return d_kill(dentry, parent); 516 } 517 518 /* 519 * This is dput 520 * 521 * This is complicated by the fact that we do not want to put 522 * dentries that are no longer on any hash chain on the unused 523 * list: we'd much rather just get rid of them immediately. 524 * 525 * However, that implies that we have to traverse the dentry 526 * tree upwards to the parents which might _also_ now be 527 * scheduled for deletion (it may have been only waiting for 528 * its last child to go away). 529 * 530 * This tail recursion is done by hand as we don't want to depend 531 * on the compiler to always get this right (gcc generally doesn't). 532 * Real recursion would eat up our stack space. 533 */ 534 535 /* 536 * dput - release a dentry 537 * @dentry: dentry to release 538 * 539 * Release a dentry. This will drop the usage count and if appropriate 540 * call the dentry unlink method as well as removing it from the queues and 541 * releasing its resources. If the parent dentries were scheduled for release 542 * they too may now get deleted. 543 */ 544 void dput(struct dentry *dentry) 545 { 546 if (unlikely(!dentry)) 547 return; 548 549 repeat: 550 if (lockref_put_or_lock(&dentry->d_lockref)) 551 return; 552 553 /* Unreachable? Get rid of it */ 554 if (unlikely(d_unhashed(dentry))) 555 goto kill_it; 556 557 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { 558 if (dentry->d_op->d_delete(dentry)) 559 goto kill_it; 560 } 561 562 dentry->d_flags |= DCACHE_REFERENCED; 563 dentry_lru_add(dentry); 564 565 dentry->d_lockref.count--; 566 spin_unlock(&dentry->d_lock); 567 return; 568 569 kill_it: 570 dentry = dentry_kill(dentry); 571 if (dentry) 572 goto repeat; 573 } 574 EXPORT_SYMBOL(dput); 575 576 /** 577 * d_invalidate - invalidate a dentry 578 * @dentry: dentry to invalidate 579 * 580 * Try to invalidate the dentry if it turns out to be 581 * possible. If there are other dentries that can be 582 * reached through this one we can't delete it and we 583 * return -EBUSY. On success we return 0. 584 * 585 * no dcache lock. 586 */ 587 588 int d_invalidate(struct dentry * dentry) 589 { 590 /* 591 * If it's already been dropped, return OK. 592 */ 593 spin_lock(&dentry->d_lock); 594 if (d_unhashed(dentry)) { 595 spin_unlock(&dentry->d_lock); 596 return 0; 597 } 598 /* 599 * Check whether to do a partial shrink_dcache 600 * to get rid of unused child entries. 601 */ 602 if (!list_empty(&dentry->d_subdirs)) { 603 spin_unlock(&dentry->d_lock); 604 shrink_dcache_parent(dentry); 605 spin_lock(&dentry->d_lock); 606 } 607 608 /* 609 * Somebody else still using it? 610 * 611 * If it's a directory, we can't drop it 612 * for fear of somebody re-populating it 613 * with children (even though dropping it 614 * would make it unreachable from the root, 615 * we might still populate it if it was a 616 * working directory or similar). 617 * We also need to leave mountpoints alone, 618 * directory or not. 619 */ 620 if (dentry->d_lockref.count > 1 && dentry->d_inode) { 621 if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { 622 spin_unlock(&dentry->d_lock); 623 return -EBUSY; 624 } 625 } 626 627 __d_drop(dentry); 628 spin_unlock(&dentry->d_lock); 629 return 0; 630 } 631 EXPORT_SYMBOL(d_invalidate); 632 633 /* This must be called with d_lock held */ 634 static inline void __dget_dlock(struct dentry *dentry) 635 { 636 dentry->d_lockref.count++; 637 } 638 639 static inline void __dget(struct dentry *dentry) 640 { 641 lockref_get(&dentry->d_lockref); 642 } 643 644 struct dentry *dget_parent(struct dentry *dentry) 645 { 646 int gotref; 647 struct dentry *ret; 648 649 /* 650 * Do optimistic parent lookup without any 651 * locking. 652 */ 653 rcu_read_lock(); 654 ret = ACCESS_ONCE(dentry->d_parent); 655 gotref = lockref_get_not_zero(&ret->d_lockref); 656 rcu_read_unlock(); 657 if (likely(gotref)) { 658 if (likely(ret == ACCESS_ONCE(dentry->d_parent))) 659 return ret; 660 dput(ret); 661 } 662 663 repeat: 664 /* 665 * Don't need rcu_dereference because we re-check it was correct under 666 * the lock. 667 */ 668 rcu_read_lock(); 669 ret = dentry->d_parent; 670 spin_lock(&ret->d_lock); 671 if (unlikely(ret != dentry->d_parent)) { 672 spin_unlock(&ret->d_lock); 673 rcu_read_unlock(); 674 goto repeat; 675 } 676 rcu_read_unlock(); 677 BUG_ON(!ret->d_lockref.count); 678 ret->d_lockref.count++; 679 spin_unlock(&ret->d_lock); 680 return ret; 681 } 682 EXPORT_SYMBOL(dget_parent); 683 684 /** 685 * d_find_alias - grab a hashed alias of inode 686 * @inode: inode in question 687 * @want_discon: flag, used by d_splice_alias, to request 688 * that only a DISCONNECTED alias be returned. 689 * 690 * If inode has a hashed alias, or is a directory and has any alias, 691 * acquire the reference to alias and return it. Otherwise return NULL. 692 * Notice that if inode is a directory there can be only one alias and 693 * it can be unhashed only if it has no children, or if it is the root 694 * of a filesystem. 695 * 696 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 697 * any other hashed alias over that one unless @want_discon is set, 698 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 699 */ 700 static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 701 { 702 struct dentry *alias, *discon_alias; 703 704 again: 705 discon_alias = NULL; 706 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { 707 spin_lock(&alias->d_lock); 708 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 709 if (IS_ROOT(alias) && 710 (alias->d_flags & DCACHE_DISCONNECTED)) { 711 discon_alias = alias; 712 } else if (!want_discon) { 713 __dget_dlock(alias); 714 spin_unlock(&alias->d_lock); 715 return alias; 716 } 717 } 718 spin_unlock(&alias->d_lock); 719 } 720 if (discon_alias) { 721 alias = discon_alias; 722 spin_lock(&alias->d_lock); 723 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 724 if (IS_ROOT(alias) && 725 (alias->d_flags & DCACHE_DISCONNECTED)) { 726 __dget_dlock(alias); 727 spin_unlock(&alias->d_lock); 728 return alias; 729 } 730 } 731 spin_unlock(&alias->d_lock); 732 goto again; 733 } 734 return NULL; 735 } 736 737 struct dentry *d_find_alias(struct inode *inode) 738 { 739 struct dentry *de = NULL; 740 741 if (!hlist_empty(&inode->i_dentry)) { 742 spin_lock(&inode->i_lock); 743 de = __d_find_alias(inode, 0); 744 spin_unlock(&inode->i_lock); 745 } 746 return de; 747 } 748 EXPORT_SYMBOL(d_find_alias); 749 750 /* 751 * Try to kill dentries associated with this inode. 752 * WARNING: you must own a reference to inode. 753 */ 754 void d_prune_aliases(struct inode *inode) 755 { 756 struct dentry *dentry; 757 restart: 758 spin_lock(&inode->i_lock); 759 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { 760 spin_lock(&dentry->d_lock); 761 if (!dentry->d_lockref.count) { 762 /* 763 * inform the fs via d_prune that this dentry 764 * is about to be unhashed and destroyed. 765 */ 766 if ((dentry->d_flags & DCACHE_OP_PRUNE) && 767 !d_unhashed(dentry)) 768 dentry->d_op->d_prune(dentry); 769 770 __dget_dlock(dentry); 771 __d_drop(dentry); 772 spin_unlock(&dentry->d_lock); 773 spin_unlock(&inode->i_lock); 774 dput(dentry); 775 goto restart; 776 } 777 spin_unlock(&dentry->d_lock); 778 } 779 spin_unlock(&inode->i_lock); 780 } 781 EXPORT_SYMBOL(d_prune_aliases); 782 783 /* 784 * Try to throw away a dentry - free the inode, dput the parent. 785 * Requires dentry->d_lock is held, and dentry->d_count == 0. 786 * Releases dentry->d_lock. 787 * 788 * This may fail if locks cannot be acquired no problem, just try again. 789 */ 790 static void try_prune_one_dentry(struct dentry *dentry) 791 __releases(dentry->d_lock) 792 { 793 struct dentry *parent; 794 795 parent = dentry_kill(dentry); 796 /* 797 * If dentry_kill returns NULL, we have nothing more to do. 798 * if it returns the same dentry, trylocks failed. In either 799 * case, just loop again. 800 * 801 * Otherwise, we need to prune ancestors too. This is necessary 802 * to prevent quadratic behavior of shrink_dcache_parent(), but 803 * is also expected to be beneficial in reducing dentry cache 804 * fragmentation. 805 */ 806 if (!parent) 807 return; 808 if (parent == dentry) 809 return; 810 811 /* Prune ancestors. */ 812 dentry = parent; 813 while (dentry) { 814 if (lockref_put_or_lock(&dentry->d_lockref)) 815 return; 816 dentry = dentry_kill(dentry); 817 } 818 } 819 820 static void shrink_dentry_list(struct list_head *list) 821 { 822 struct dentry *dentry; 823 824 rcu_read_lock(); 825 for (;;) { 826 dentry = list_entry_rcu(list->prev, struct dentry, d_lru); 827 if (&dentry->d_lru == list) 828 break; /* empty */ 829 spin_lock(&dentry->d_lock); 830 if (dentry != list_entry(list->prev, struct dentry, d_lru)) { 831 spin_unlock(&dentry->d_lock); 832 continue; 833 } 834 835 /* 836 * We found an inuse dentry which was not removed from 837 * the LRU because of laziness during lookup. Do not free 838 * it - just keep it off the LRU list. 839 */ 840 if (dentry->d_lockref.count) { 841 dentry_lru_del(dentry); 842 spin_unlock(&dentry->d_lock); 843 continue; 844 } 845 846 rcu_read_unlock(); 847 848 try_prune_one_dentry(dentry); 849 850 rcu_read_lock(); 851 } 852 rcu_read_unlock(); 853 } 854 855 /** 856 * prune_dcache_sb - shrink the dcache 857 * @sb: superblock 858 * @count: number of entries to try to free 859 * 860 * Attempt to shrink the superblock dcache LRU by @count entries. This is 861 * done when we need more memory an called from the superblock shrinker 862 * function. 863 * 864 * This function may fail to free any resources if all the dentries are in 865 * use. 866 */ 867 void prune_dcache_sb(struct super_block *sb, int count) 868 { 869 struct dentry *dentry; 870 LIST_HEAD(referenced); 871 LIST_HEAD(tmp); 872 873 relock: 874 spin_lock(&dcache_lru_lock); 875 while (!list_empty(&sb->s_dentry_lru)) { 876 dentry = list_entry(sb->s_dentry_lru.prev, 877 struct dentry, d_lru); 878 BUG_ON(dentry->d_sb != sb); 879 880 if (!spin_trylock(&dentry->d_lock)) { 881 spin_unlock(&dcache_lru_lock); 882 cpu_relax(); 883 goto relock; 884 } 885 886 if (dentry->d_flags & DCACHE_REFERENCED) { 887 dentry->d_flags &= ~DCACHE_REFERENCED; 888 list_move(&dentry->d_lru, &referenced); 889 spin_unlock(&dentry->d_lock); 890 } else { 891 list_move_tail(&dentry->d_lru, &tmp); 892 dentry->d_flags |= DCACHE_SHRINK_LIST; 893 spin_unlock(&dentry->d_lock); 894 if (!--count) 895 break; 896 } 897 cond_resched_lock(&dcache_lru_lock); 898 } 899 if (!list_empty(&referenced)) 900 list_splice(&referenced, &sb->s_dentry_lru); 901 spin_unlock(&dcache_lru_lock); 902 903 shrink_dentry_list(&tmp); 904 } 905 906 /** 907 * shrink_dcache_sb - shrink dcache for a superblock 908 * @sb: superblock 909 * 910 * Shrink the dcache for the specified super block. This is used to free 911 * the dcache before unmounting a file system. 912 */ 913 void shrink_dcache_sb(struct super_block *sb) 914 { 915 LIST_HEAD(tmp); 916 917 spin_lock(&dcache_lru_lock); 918 while (!list_empty(&sb->s_dentry_lru)) { 919 list_splice_init(&sb->s_dentry_lru, &tmp); 920 spin_unlock(&dcache_lru_lock); 921 shrink_dentry_list(&tmp); 922 spin_lock(&dcache_lru_lock); 923 } 924 spin_unlock(&dcache_lru_lock); 925 } 926 EXPORT_SYMBOL(shrink_dcache_sb); 927 928 /* 929 * destroy a single subtree of dentries for unmount 930 * - see the comments on shrink_dcache_for_umount() for a description of the 931 * locking 932 */ 933 static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 934 { 935 struct dentry *parent; 936 937 BUG_ON(!IS_ROOT(dentry)); 938 939 for (;;) { 940 /* descend to the first leaf in the current subtree */ 941 while (!list_empty(&dentry->d_subdirs)) 942 dentry = list_entry(dentry->d_subdirs.next, 943 struct dentry, d_u.d_child); 944 945 /* consume the dentries from this leaf up through its parents 946 * until we find one with children or run out altogether */ 947 do { 948 struct inode *inode; 949 950 /* 951 * inform the fs that this dentry is about to be 952 * unhashed and destroyed. 953 */ 954 if ((dentry->d_flags & DCACHE_OP_PRUNE) && 955 !d_unhashed(dentry)) 956 dentry->d_op->d_prune(dentry); 957 958 dentry_lru_del(dentry); 959 __d_shrink(dentry); 960 961 if (dentry->d_lockref.count != 0) { 962 printk(KERN_ERR 963 "BUG: Dentry %p{i=%lx,n=%s}" 964 " still in use (%d)" 965 " [unmount of %s %s]\n", 966 dentry, 967 dentry->d_inode ? 968 dentry->d_inode->i_ino : 0UL, 969 dentry->d_name.name, 970 dentry->d_lockref.count, 971 dentry->d_sb->s_type->name, 972 dentry->d_sb->s_id); 973 BUG(); 974 } 975 976 if (IS_ROOT(dentry)) { 977 parent = NULL; 978 list_del(&dentry->d_u.d_child); 979 } else { 980 parent = dentry->d_parent; 981 parent->d_lockref.count--; 982 list_del(&dentry->d_u.d_child); 983 } 984 985 inode = dentry->d_inode; 986 if (inode) { 987 dentry->d_inode = NULL; 988 hlist_del_init(&dentry->d_alias); 989 if (dentry->d_op && dentry->d_op->d_iput) 990 dentry->d_op->d_iput(dentry, inode); 991 else 992 iput(inode); 993 } 994 995 d_free(dentry); 996 997 /* finished when we fall off the top of the tree, 998 * otherwise we ascend to the parent and move to the 999 * next sibling if there is one */ 1000 if (!parent) 1001 return; 1002 dentry = parent; 1003 } while (list_empty(&dentry->d_subdirs)); 1004 1005 dentry = list_entry(dentry->d_subdirs.next, 1006 struct dentry, d_u.d_child); 1007 } 1008 } 1009 1010 /* 1011 * destroy the dentries attached to a superblock on unmounting 1012 * - we don't need to use dentry->d_lock because: 1013 * - the superblock is detached from all mountings and open files, so the 1014 * dentry trees will not be rearranged by the VFS 1015 * - s_umount is write-locked, so the memory pressure shrinker will ignore 1016 * any dentries belonging to this superblock that it comes across 1017 * - the filesystem itself is no longer permitted to rearrange the dentries 1018 * in this superblock 1019 */ 1020 void shrink_dcache_for_umount(struct super_block *sb) 1021 { 1022 struct dentry *dentry; 1023 1024 if (down_read_trylock(&sb->s_umount)) 1025 BUG(); 1026 1027 dentry = sb->s_root; 1028 sb->s_root = NULL; 1029 dentry->d_lockref.count--; 1030 shrink_dcache_for_umount_subtree(dentry); 1031 1032 while (!hlist_bl_empty(&sb->s_anon)) { 1033 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 1034 shrink_dcache_for_umount_subtree(dentry); 1035 } 1036 } 1037 1038 /* 1039 * This tries to ascend one level of parenthood, but 1040 * we can race with renaming, so we need to re-check 1041 * the parenthood after dropping the lock and check 1042 * that the sequence number still matches. 1043 */ 1044 static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) 1045 { 1046 struct dentry *new = old->d_parent; 1047 1048 rcu_read_lock(); 1049 spin_unlock(&old->d_lock); 1050 spin_lock(&new->d_lock); 1051 1052 /* 1053 * might go back up the wrong parent if we have had a rename 1054 * or deletion 1055 */ 1056 if (new != old->d_parent || 1057 (old->d_flags & DCACHE_DENTRY_KILLED) || 1058 need_seqretry(&rename_lock, seq)) { 1059 spin_unlock(&new->d_lock); 1060 new = NULL; 1061 } 1062 rcu_read_unlock(); 1063 return new; 1064 } 1065 1066 /** 1067 * enum d_walk_ret - action to talke during tree walk 1068 * @D_WALK_CONTINUE: contrinue walk 1069 * @D_WALK_QUIT: quit walk 1070 * @D_WALK_NORETRY: quit when retry is needed 1071 * @D_WALK_SKIP: skip this dentry and its children 1072 */ 1073 enum d_walk_ret { 1074 D_WALK_CONTINUE, 1075 D_WALK_QUIT, 1076 D_WALK_NORETRY, 1077 D_WALK_SKIP, 1078 }; 1079 1080 /** 1081 * d_walk - walk the dentry tree 1082 * @parent: start of walk 1083 * @data: data passed to @enter() and @finish() 1084 * @enter: callback when first entering the dentry 1085 * @finish: callback when successfully finished the walk 1086 * 1087 * The @enter() and @finish() callbacks are called with d_lock held. 1088 */ 1089 static void d_walk(struct dentry *parent, void *data, 1090 enum d_walk_ret (*enter)(void *, struct dentry *), 1091 void (*finish)(void *)) 1092 { 1093 struct dentry *this_parent; 1094 struct list_head *next; 1095 unsigned seq = 0; 1096 enum d_walk_ret ret; 1097 bool retry = true; 1098 1099 again: 1100 read_seqbegin_or_lock(&rename_lock, &seq); 1101 this_parent = parent; 1102 spin_lock(&this_parent->d_lock); 1103 1104 ret = enter(data, this_parent); 1105 switch (ret) { 1106 case D_WALK_CONTINUE: 1107 break; 1108 case D_WALK_QUIT: 1109 case D_WALK_SKIP: 1110 goto out_unlock; 1111 case D_WALK_NORETRY: 1112 retry = false; 1113 break; 1114 } 1115 repeat: 1116 next = this_parent->d_subdirs.next; 1117 resume: 1118 while (next != &this_parent->d_subdirs) { 1119 struct list_head *tmp = next; 1120 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1121 next = tmp->next; 1122 1123 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1124 1125 ret = enter(data, dentry); 1126 switch (ret) { 1127 case D_WALK_CONTINUE: 1128 break; 1129 case D_WALK_QUIT: 1130 spin_unlock(&dentry->d_lock); 1131 goto out_unlock; 1132 case D_WALK_NORETRY: 1133 retry = false; 1134 break; 1135 case D_WALK_SKIP: 1136 spin_unlock(&dentry->d_lock); 1137 continue; 1138 } 1139 1140 if (!list_empty(&dentry->d_subdirs)) { 1141 spin_unlock(&this_parent->d_lock); 1142 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); 1143 this_parent = dentry; 1144 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1145 goto repeat; 1146 } 1147 spin_unlock(&dentry->d_lock); 1148 } 1149 /* 1150 * All done at this level ... ascend and resume the search. 1151 */ 1152 if (this_parent != parent) { 1153 struct dentry *child = this_parent; 1154 this_parent = try_to_ascend(this_parent, seq); 1155 if (!this_parent) 1156 goto rename_retry; 1157 next = child->d_u.d_child.next; 1158 goto resume; 1159 } 1160 if (need_seqretry(&rename_lock, seq)) { 1161 spin_unlock(&this_parent->d_lock); 1162 goto rename_retry; 1163 } 1164 if (finish) 1165 finish(data); 1166 1167 out_unlock: 1168 spin_unlock(&this_parent->d_lock); 1169 done_seqretry(&rename_lock, seq); 1170 return; 1171 1172 rename_retry: 1173 if (!retry) 1174 return; 1175 seq = 1; 1176 goto again; 1177 } 1178 1179 /* 1180 * Search for at least 1 mount point in the dentry's subdirs. 1181 * We descend to the next level whenever the d_subdirs 1182 * list is non-empty and continue searching. 1183 */ 1184 1185 /** 1186 * have_submounts - check for mounts over a dentry 1187 * @parent: dentry to check. 1188 * 1189 * Return true if the parent or its subdirectories contain 1190 * a mount point 1191 */ 1192 1193 static enum d_walk_ret check_mount(void *data, struct dentry *dentry) 1194 { 1195 int *ret = data; 1196 if (d_mountpoint(dentry)) { 1197 *ret = 1; 1198 return D_WALK_QUIT; 1199 } 1200 return D_WALK_CONTINUE; 1201 } 1202 1203 int have_submounts(struct dentry *parent) 1204 { 1205 int ret = 0; 1206 1207 d_walk(parent, &ret, check_mount, NULL); 1208 1209 return ret; 1210 } 1211 EXPORT_SYMBOL(have_submounts); 1212 1213 /* 1214 * Called by mount code to set a mountpoint and check if the mountpoint is 1215 * reachable (e.g. NFS can unhash a directory dentry and then the complete 1216 * subtree can become unreachable). 1217 * 1218 * Only one of check_submounts_and_drop() and d_set_mounted() must succeed. For 1219 * this reason take rename_lock and d_lock on dentry and ancestors. 1220 */ 1221 int d_set_mounted(struct dentry *dentry) 1222 { 1223 struct dentry *p; 1224 int ret = -ENOENT; 1225 write_seqlock(&rename_lock); 1226 for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { 1227 /* Need exclusion wrt. check_submounts_and_drop() */ 1228 spin_lock(&p->d_lock); 1229 if (unlikely(d_unhashed(p))) { 1230 spin_unlock(&p->d_lock); 1231 goto out; 1232 } 1233 spin_unlock(&p->d_lock); 1234 } 1235 spin_lock(&dentry->d_lock); 1236 if (!d_unlinked(dentry)) { 1237 dentry->d_flags |= DCACHE_MOUNTED; 1238 ret = 0; 1239 } 1240 spin_unlock(&dentry->d_lock); 1241 out: 1242 write_sequnlock(&rename_lock); 1243 return ret; 1244 } 1245 1246 /* 1247 * Search the dentry child list of the specified parent, 1248 * and move any unused dentries to the end of the unused 1249 * list for prune_dcache(). We descend to the next level 1250 * whenever the d_subdirs list is non-empty and continue 1251 * searching. 1252 * 1253 * It returns zero iff there are no unused children, 1254 * otherwise it returns the number of children moved to 1255 * the end of the unused list. This may not be the total 1256 * number of unused children, because select_parent can 1257 * drop the lock and return early due to latency 1258 * constraints. 1259 */ 1260 1261 struct select_data { 1262 struct dentry *start; 1263 struct list_head dispose; 1264 int found; 1265 }; 1266 1267 static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) 1268 { 1269 struct select_data *data = _data; 1270 enum d_walk_ret ret = D_WALK_CONTINUE; 1271 1272 if (data->start == dentry) 1273 goto out; 1274 1275 /* 1276 * move only zero ref count dentries to the dispose list. 1277 * 1278 * Those which are presently on the shrink list, being processed 1279 * by shrink_dentry_list(), shouldn't be moved. Otherwise the 1280 * loop in shrink_dcache_parent() might not make any progress 1281 * and loop forever. 1282 */ 1283 if (dentry->d_lockref.count) { 1284 dentry_lru_del(dentry); 1285 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { 1286 dentry_lru_move_list(dentry, &data->dispose); 1287 dentry->d_flags |= DCACHE_SHRINK_LIST; 1288 data->found++; 1289 ret = D_WALK_NORETRY; 1290 } 1291 /* 1292 * We can return to the caller if we have found some (this 1293 * ensures forward progress). We'll be coming back to find 1294 * the rest. 1295 */ 1296 if (data->found && need_resched()) 1297 ret = D_WALK_QUIT; 1298 out: 1299 return ret; 1300 } 1301 1302 /** 1303 * shrink_dcache_parent - prune dcache 1304 * @parent: parent of entries to prune 1305 * 1306 * Prune the dcache to remove unused children of the parent dentry. 1307 */ 1308 void shrink_dcache_parent(struct dentry *parent) 1309 { 1310 for (;;) { 1311 struct select_data data; 1312 1313 INIT_LIST_HEAD(&data.dispose); 1314 data.start = parent; 1315 data.found = 0; 1316 1317 d_walk(parent, &data, select_collect, NULL); 1318 if (!data.found) 1319 break; 1320 1321 shrink_dentry_list(&data.dispose); 1322 cond_resched(); 1323 } 1324 } 1325 EXPORT_SYMBOL(shrink_dcache_parent); 1326 1327 static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) 1328 { 1329 struct select_data *data = _data; 1330 1331 if (d_mountpoint(dentry)) { 1332 data->found = -EBUSY; 1333 return D_WALK_QUIT; 1334 } 1335 1336 return select_collect(_data, dentry); 1337 } 1338 1339 static void check_and_drop(void *_data) 1340 { 1341 struct select_data *data = _data; 1342 1343 if (d_mountpoint(data->start)) 1344 data->found = -EBUSY; 1345 if (!data->found) 1346 __d_drop(data->start); 1347 } 1348 1349 /** 1350 * check_submounts_and_drop - prune dcache, check for submounts and drop 1351 * 1352 * All done as a single atomic operation relative to has_unlinked_ancestor(). 1353 * Returns 0 if successfully unhashed @parent. If there were submounts then 1354 * return -EBUSY. 1355 * 1356 * @dentry: dentry to prune and drop 1357 */ 1358 int check_submounts_and_drop(struct dentry *dentry) 1359 { 1360 int ret = 0; 1361 1362 /* Negative dentries can be dropped without further checks */ 1363 if (!dentry->d_inode) { 1364 d_drop(dentry); 1365 goto out; 1366 } 1367 1368 for (;;) { 1369 struct select_data data; 1370 1371 INIT_LIST_HEAD(&data.dispose); 1372 data.start = dentry; 1373 data.found = 0; 1374 1375 d_walk(dentry, &data, check_and_collect, check_and_drop); 1376 ret = data.found; 1377 1378 if (!list_empty(&data.dispose)) 1379 shrink_dentry_list(&data.dispose); 1380 1381 if (ret <= 0) 1382 break; 1383 1384 cond_resched(); 1385 } 1386 1387 out: 1388 return ret; 1389 } 1390 EXPORT_SYMBOL(check_submounts_and_drop); 1391 1392 /** 1393 * __d_alloc - allocate a dcache entry 1394 * @sb: filesystem it will belong to 1395 * @name: qstr of the name 1396 * 1397 * Allocates a dentry. It returns %NULL if there is insufficient memory 1398 * available. On a success the dentry is returned. The name passed in is 1399 * copied and the copy passed in may be reused after this call. 1400 */ 1401 1402 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) 1403 { 1404 struct dentry *dentry; 1405 char *dname; 1406 1407 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 1408 if (!dentry) 1409 return NULL; 1410 1411 /* 1412 * We guarantee that the inline name is always NUL-terminated. 1413 * This way the memcpy() done by the name switching in rename 1414 * will still always have a NUL at the end, even if we might 1415 * be overwriting an internal NUL character 1416 */ 1417 dentry->d_iname[DNAME_INLINE_LEN-1] = 0; 1418 if (name->len > DNAME_INLINE_LEN-1) { 1419 dname = kmalloc(name->len + 1, GFP_KERNEL); 1420 if (!dname) { 1421 kmem_cache_free(dentry_cache, dentry); 1422 return NULL; 1423 } 1424 } else { 1425 dname = dentry->d_iname; 1426 } 1427 1428 dentry->d_name.len = name->len; 1429 dentry->d_name.hash = name->hash; 1430 memcpy(dname, name->name, name->len); 1431 dname[name->len] = 0; 1432 1433 /* Make sure we always see the terminating NUL character */ 1434 smp_wmb(); 1435 dentry->d_name.name = dname; 1436 1437 dentry->d_lockref.count = 1; 1438 dentry->d_flags = 0; 1439 spin_lock_init(&dentry->d_lock); 1440 seqcount_init(&dentry->d_seq); 1441 dentry->d_inode = NULL; 1442 dentry->d_parent = dentry; 1443 dentry->d_sb = sb; 1444 dentry->d_op = NULL; 1445 dentry->d_fsdata = NULL; 1446 INIT_HLIST_BL_NODE(&dentry->d_hash); 1447 INIT_LIST_HEAD(&dentry->d_lru); 1448 INIT_LIST_HEAD(&dentry->d_subdirs); 1449 INIT_HLIST_NODE(&dentry->d_alias); 1450 INIT_LIST_HEAD(&dentry->d_u.d_child); 1451 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1452 1453 this_cpu_inc(nr_dentry); 1454 1455 return dentry; 1456 } 1457 1458 /** 1459 * d_alloc - allocate a dcache entry 1460 * @parent: parent of entry to allocate 1461 * @name: qstr of the name 1462 * 1463 * Allocates a dentry. It returns %NULL if there is insufficient memory 1464 * available. On a success the dentry is returned. The name passed in is 1465 * copied and the copy passed in may be reused after this call. 1466 */ 1467 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1468 { 1469 struct dentry *dentry = __d_alloc(parent->d_sb, name); 1470 if (!dentry) 1471 return NULL; 1472 1473 spin_lock(&parent->d_lock); 1474 /* 1475 * don't need child lock because it is not subject 1476 * to concurrency here 1477 */ 1478 __dget_dlock(parent); 1479 dentry->d_parent = parent; 1480 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1481 spin_unlock(&parent->d_lock); 1482 1483 return dentry; 1484 } 1485 EXPORT_SYMBOL(d_alloc); 1486 1487 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1488 { 1489 struct dentry *dentry = __d_alloc(sb, name); 1490 if (dentry) 1491 dentry->d_flags |= DCACHE_DISCONNECTED; 1492 return dentry; 1493 } 1494 EXPORT_SYMBOL(d_alloc_pseudo); 1495 1496 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1497 { 1498 struct qstr q; 1499 1500 q.name = name; 1501 q.len = strlen(name); 1502 q.hash = full_name_hash(q.name, q.len); 1503 return d_alloc(parent, &q); 1504 } 1505 EXPORT_SYMBOL(d_alloc_name); 1506 1507 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 1508 { 1509 WARN_ON_ONCE(dentry->d_op); 1510 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1511 DCACHE_OP_COMPARE | 1512 DCACHE_OP_REVALIDATE | 1513 DCACHE_OP_WEAK_REVALIDATE | 1514 DCACHE_OP_DELETE )); 1515 dentry->d_op = op; 1516 if (!op) 1517 return; 1518 if (op->d_hash) 1519 dentry->d_flags |= DCACHE_OP_HASH; 1520 if (op->d_compare) 1521 dentry->d_flags |= DCACHE_OP_COMPARE; 1522 if (op->d_revalidate) 1523 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1524 if (op->d_weak_revalidate) 1525 dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE; 1526 if (op->d_delete) 1527 dentry->d_flags |= DCACHE_OP_DELETE; 1528 if (op->d_prune) 1529 dentry->d_flags |= DCACHE_OP_PRUNE; 1530 1531 } 1532 EXPORT_SYMBOL(d_set_d_op); 1533 1534 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1535 { 1536 spin_lock(&dentry->d_lock); 1537 if (inode) { 1538 if (unlikely(IS_AUTOMOUNT(inode))) 1539 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1540 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 1541 } 1542 dentry->d_inode = inode; 1543 dentry_rcuwalk_barrier(dentry); 1544 spin_unlock(&dentry->d_lock); 1545 fsnotify_d_instantiate(dentry, inode); 1546 } 1547 1548 /** 1549 * d_instantiate - fill in inode information for a dentry 1550 * @entry: dentry to complete 1551 * @inode: inode to attach to this dentry 1552 * 1553 * Fill in inode information in the entry. 1554 * 1555 * This turns negative dentries into productive full members 1556 * of society. 1557 * 1558 * NOTE! This assumes that the inode count has been incremented 1559 * (or otherwise set) by the caller to indicate that it is now 1560 * in use by the dcache. 1561 */ 1562 1563 void d_instantiate(struct dentry *entry, struct inode * inode) 1564 { 1565 BUG_ON(!hlist_unhashed(&entry->d_alias)); 1566 if (inode) 1567 spin_lock(&inode->i_lock); 1568 __d_instantiate(entry, inode); 1569 if (inode) 1570 spin_unlock(&inode->i_lock); 1571 security_d_instantiate(entry, inode); 1572 } 1573 EXPORT_SYMBOL(d_instantiate); 1574 1575 /** 1576 * d_instantiate_unique - instantiate a non-aliased dentry 1577 * @entry: dentry to instantiate 1578 * @inode: inode to attach to this dentry 1579 * 1580 * Fill in inode information in the entry. On success, it returns NULL. 1581 * If an unhashed alias of "entry" already exists, then we return the 1582 * aliased dentry instead and drop one reference to inode. 1583 * 1584 * Note that in order to avoid conflicts with rename() etc, the caller 1585 * had better be holding the parent directory semaphore. 1586 * 1587 * This also assumes that the inode count has been incremented 1588 * (or otherwise set) by the caller to indicate that it is now 1589 * in use by the dcache. 1590 */ 1591 static struct dentry *__d_instantiate_unique(struct dentry *entry, 1592 struct inode *inode) 1593 { 1594 struct dentry *alias; 1595 int len = entry->d_name.len; 1596 const char *name = entry->d_name.name; 1597 unsigned int hash = entry->d_name.hash; 1598 1599 if (!inode) { 1600 __d_instantiate(entry, NULL); 1601 return NULL; 1602 } 1603 1604 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { 1605 /* 1606 * Don't need alias->d_lock here, because aliases with 1607 * d_parent == entry->d_parent are not subject to name or 1608 * parent changes, because the parent inode i_mutex is held. 1609 */ 1610 if (alias->d_name.hash != hash) 1611 continue; 1612 if (alias->d_parent != entry->d_parent) 1613 continue; 1614 if (alias->d_name.len != len) 1615 continue; 1616 if (dentry_cmp(alias, name, len)) 1617 continue; 1618 __dget(alias); 1619 return alias; 1620 } 1621 1622 __d_instantiate(entry, inode); 1623 return NULL; 1624 } 1625 1626 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 1627 { 1628 struct dentry *result; 1629 1630 BUG_ON(!hlist_unhashed(&entry->d_alias)); 1631 1632 if (inode) 1633 spin_lock(&inode->i_lock); 1634 result = __d_instantiate_unique(entry, inode); 1635 if (inode) 1636 spin_unlock(&inode->i_lock); 1637 1638 if (!result) { 1639 security_d_instantiate(entry, inode); 1640 return NULL; 1641 } 1642 1643 BUG_ON(!d_unhashed(result)); 1644 iput(inode); 1645 return result; 1646 } 1647 1648 EXPORT_SYMBOL(d_instantiate_unique); 1649 1650 struct dentry *d_make_root(struct inode *root_inode) 1651 { 1652 struct dentry *res = NULL; 1653 1654 if (root_inode) { 1655 static const struct qstr name = QSTR_INIT("/", 1); 1656 1657 res = __d_alloc(root_inode->i_sb, &name); 1658 if (res) 1659 d_instantiate(res, root_inode); 1660 else 1661 iput(root_inode); 1662 } 1663 return res; 1664 } 1665 EXPORT_SYMBOL(d_make_root); 1666 1667 static struct dentry * __d_find_any_alias(struct inode *inode) 1668 { 1669 struct dentry *alias; 1670 1671 if (hlist_empty(&inode->i_dentry)) 1672 return NULL; 1673 alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); 1674 __dget(alias); 1675 return alias; 1676 } 1677 1678 /** 1679 * d_find_any_alias - find any alias for a given inode 1680 * @inode: inode to find an alias for 1681 * 1682 * If any aliases exist for the given inode, take and return a 1683 * reference for one of them. If no aliases exist, return %NULL. 1684 */ 1685 struct dentry *d_find_any_alias(struct inode *inode) 1686 { 1687 struct dentry *de; 1688 1689 spin_lock(&inode->i_lock); 1690 de = __d_find_any_alias(inode); 1691 spin_unlock(&inode->i_lock); 1692 return de; 1693 } 1694 EXPORT_SYMBOL(d_find_any_alias); 1695 1696 /** 1697 * d_obtain_alias - find or allocate a dentry for a given inode 1698 * @inode: inode to allocate the dentry for 1699 * 1700 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 1701 * similar open by handle operations. The returned dentry may be anonymous, 1702 * or may have a full name (if the inode was already in the cache). 1703 * 1704 * When called on a directory inode, we must ensure that the inode only ever 1705 * has one dentry. If a dentry is found, that is returned instead of 1706 * allocating a new one. 1707 * 1708 * On successful return, the reference to the inode has been transferred 1709 * to the dentry. In case of an error the reference on the inode is released. 1710 * To make it easier to use in export operations a %NULL or IS_ERR inode may 1711 * be passed in and will be the error will be propagate to the return value, 1712 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 1713 */ 1714 struct dentry *d_obtain_alias(struct inode *inode) 1715 { 1716 static const struct qstr anonstring = QSTR_INIT("/", 1); 1717 struct dentry *tmp; 1718 struct dentry *res; 1719 1720 if (!inode) 1721 return ERR_PTR(-ESTALE); 1722 if (IS_ERR(inode)) 1723 return ERR_CAST(inode); 1724 1725 res = d_find_any_alias(inode); 1726 if (res) 1727 goto out_iput; 1728 1729 tmp = __d_alloc(inode->i_sb, &anonstring); 1730 if (!tmp) { 1731 res = ERR_PTR(-ENOMEM); 1732 goto out_iput; 1733 } 1734 1735 spin_lock(&inode->i_lock); 1736 res = __d_find_any_alias(inode); 1737 if (res) { 1738 spin_unlock(&inode->i_lock); 1739 dput(tmp); 1740 goto out_iput; 1741 } 1742 1743 /* attach a disconnected dentry */ 1744 spin_lock(&tmp->d_lock); 1745 tmp->d_inode = inode; 1746 tmp->d_flags |= DCACHE_DISCONNECTED; 1747 hlist_add_head(&tmp->d_alias, &inode->i_dentry); 1748 hlist_bl_lock(&tmp->d_sb->s_anon); 1749 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1750 hlist_bl_unlock(&tmp->d_sb->s_anon); 1751 spin_unlock(&tmp->d_lock); 1752 spin_unlock(&inode->i_lock); 1753 security_d_instantiate(tmp, inode); 1754 1755 return tmp; 1756 1757 out_iput: 1758 if (res && !IS_ERR(res)) 1759 security_d_instantiate(res, inode); 1760 iput(inode); 1761 return res; 1762 } 1763 EXPORT_SYMBOL(d_obtain_alias); 1764 1765 /** 1766 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1767 * @inode: the inode which may have a disconnected dentry 1768 * @dentry: a negative dentry which we want to point to the inode. 1769 * 1770 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and 1771 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry 1772 * and return it, else simply d_add the inode to the dentry and return NULL. 1773 * 1774 * This is needed in the lookup routine of any filesystem that is exportable 1775 * (via knfsd) so that we can build dcache paths to directories effectively. 1776 * 1777 * If a dentry was found and moved, then it is returned. Otherwise NULL 1778 * is returned. This matches the expected return value of ->lookup. 1779 * 1780 * Cluster filesystems may call this function with a negative, hashed dentry. 1781 * In that case, we know that the inode will be a regular file, and also this 1782 * will only occur during atomic_open. So we need to check for the dentry 1783 * being already hashed only in the final case. 1784 */ 1785 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1786 { 1787 struct dentry *new = NULL; 1788 1789 if (IS_ERR(inode)) 1790 return ERR_CAST(inode); 1791 1792 if (inode && S_ISDIR(inode->i_mode)) { 1793 spin_lock(&inode->i_lock); 1794 new = __d_find_alias(inode, 1); 1795 if (new) { 1796 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1797 spin_unlock(&inode->i_lock); 1798 security_d_instantiate(new, inode); 1799 d_move(new, dentry); 1800 iput(inode); 1801 } else { 1802 /* already taking inode->i_lock, so d_add() by hand */ 1803 __d_instantiate(dentry, inode); 1804 spin_unlock(&inode->i_lock); 1805 security_d_instantiate(dentry, inode); 1806 d_rehash(dentry); 1807 } 1808 } else { 1809 d_instantiate(dentry, inode); 1810 if (d_unhashed(dentry)) 1811 d_rehash(dentry); 1812 } 1813 return new; 1814 } 1815 EXPORT_SYMBOL(d_splice_alias); 1816 1817 /** 1818 * d_add_ci - lookup or allocate new dentry with case-exact name 1819 * @inode: the inode case-insensitive lookup has found 1820 * @dentry: the negative dentry that was passed to the parent's lookup func 1821 * @name: the case-exact name to be associated with the returned dentry 1822 * 1823 * This is to avoid filling the dcache with case-insensitive names to the 1824 * same inode, only the actual correct case is stored in the dcache for 1825 * case-insensitive filesystems. 1826 * 1827 * For a case-insensitive lookup match and if the the case-exact dentry 1828 * already exists in in the dcache, use it and return it. 1829 * 1830 * If no entry exists with the exact case name, allocate new dentry with 1831 * the exact case, and return the spliced entry. 1832 */ 1833 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1834 struct qstr *name) 1835 { 1836 struct dentry *found; 1837 struct dentry *new; 1838 1839 /* 1840 * First check if a dentry matching the name already exists, 1841 * if not go ahead and create it now. 1842 */ 1843 found = d_hash_and_lookup(dentry->d_parent, name); 1844 if (unlikely(IS_ERR(found))) 1845 goto err_out; 1846 if (!found) { 1847 new = d_alloc(dentry->d_parent, name); 1848 if (!new) { 1849 found = ERR_PTR(-ENOMEM); 1850 goto err_out; 1851 } 1852 1853 found = d_splice_alias(inode, new); 1854 if (found) { 1855 dput(new); 1856 return found; 1857 } 1858 return new; 1859 } 1860 1861 /* 1862 * If a matching dentry exists, and it's not negative use it. 1863 * 1864 * Decrement the reference count to balance the iget() done 1865 * earlier on. 1866 */ 1867 if (found->d_inode) { 1868 if (unlikely(found->d_inode != inode)) { 1869 /* This can't happen because bad inodes are unhashed. */ 1870 BUG_ON(!is_bad_inode(inode)); 1871 BUG_ON(!is_bad_inode(found->d_inode)); 1872 } 1873 iput(inode); 1874 return found; 1875 } 1876 1877 /* 1878 * Negative dentry: instantiate it unless the inode is a directory and 1879 * already has a dentry. 1880 */ 1881 new = d_splice_alias(inode, found); 1882 if (new) { 1883 dput(found); 1884 found = new; 1885 } 1886 return found; 1887 1888 err_out: 1889 iput(inode); 1890 return found; 1891 } 1892 EXPORT_SYMBOL(d_add_ci); 1893 1894 /* 1895 * Do the slow-case of the dentry name compare. 1896 * 1897 * Unlike the dentry_cmp() function, we need to atomically 1898 * load the name and length information, so that the 1899 * filesystem can rely on them, and can use the 'name' and 1900 * 'len' information without worrying about walking off the 1901 * end of memory etc. 1902 * 1903 * Thus the read_seqcount_retry() and the "duplicate" info 1904 * in arguments (the low-level filesystem should not look 1905 * at the dentry inode or name contents directly, since 1906 * rename can change them while we're in RCU mode). 1907 */ 1908 enum slow_d_compare { 1909 D_COMP_OK, 1910 D_COMP_NOMATCH, 1911 D_COMP_SEQRETRY, 1912 }; 1913 1914 static noinline enum slow_d_compare slow_dentry_cmp( 1915 const struct dentry *parent, 1916 struct dentry *dentry, 1917 unsigned int seq, 1918 const struct qstr *name) 1919 { 1920 int tlen = dentry->d_name.len; 1921 const char *tname = dentry->d_name.name; 1922 1923 if (read_seqcount_retry(&dentry->d_seq, seq)) { 1924 cpu_relax(); 1925 return D_COMP_SEQRETRY; 1926 } 1927 if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) 1928 return D_COMP_NOMATCH; 1929 return D_COMP_OK; 1930 } 1931 1932 /** 1933 * __d_lookup_rcu - search for a dentry (racy, store-free) 1934 * @parent: parent dentry 1935 * @name: qstr of name we wish to find 1936 * @seqp: returns d_seq value at the point where the dentry was found 1937 * Returns: dentry, or NULL 1938 * 1939 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 1940 * resolution (store-free path walking) design described in 1941 * Documentation/filesystems/path-lookup.txt. 1942 * 1943 * This is not to be used outside core vfs. 1944 * 1945 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 1946 * held, and rcu_read_lock held. The returned dentry must not be stored into 1947 * without taking d_lock and checking d_seq sequence count against @seq 1948 * returned here. 1949 * 1950 * A refcount may be taken on the found dentry with the d_rcu_to_refcount 1951 * function. 1952 * 1953 * Alternatively, __d_lookup_rcu may be called again to look up the child of 1954 * the returned dentry, so long as its parent's seqlock is checked after the 1955 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1956 * is formed, giving integrity down the path walk. 1957 * 1958 * NOTE! The caller *has* to check the resulting dentry against the sequence 1959 * number we've returned before using any of the resulting dentry state! 1960 */ 1961 struct dentry *__d_lookup_rcu(const struct dentry *parent, 1962 const struct qstr *name, 1963 unsigned *seqp) 1964 { 1965 u64 hashlen = name->hash_len; 1966 const unsigned char *str = name->name; 1967 struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); 1968 struct hlist_bl_node *node; 1969 struct dentry *dentry; 1970 1971 /* 1972 * Note: There is significant duplication with __d_lookup_rcu which is 1973 * required to prevent single threaded performance regressions 1974 * especially on architectures where smp_rmb (in seqcounts) are costly. 1975 * Keep the two functions in sync. 1976 */ 1977 1978 /* 1979 * The hash list is protected using RCU. 1980 * 1981 * Carefully use d_seq when comparing a candidate dentry, to avoid 1982 * races with d_move(). 1983 * 1984 * It is possible that concurrent renames can mess up our list 1985 * walk here and result in missing our dentry, resulting in the 1986 * false-negative result. d_lookup() protects against concurrent 1987 * renames using rename_lock seqlock. 1988 * 1989 * See Documentation/filesystems/path-lookup.txt for more details. 1990 */ 1991 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1992 unsigned seq; 1993 1994 seqretry: 1995 /* 1996 * The dentry sequence count protects us from concurrent 1997 * renames, and thus protects parent and name fields. 1998 * 1999 * The caller must perform a seqcount check in order 2000 * to do anything useful with the returned dentry. 2001 * 2002 * NOTE! We do a "raw" seqcount_begin here. That means that 2003 * we don't wait for the sequence count to stabilize if it 2004 * is in the middle of a sequence change. If we do the slow 2005 * dentry compare, we will do seqretries until it is stable, 2006 * and if we end up with a successful lookup, we actually 2007 * want to exit RCU lookup anyway. 2008 */ 2009 seq = raw_seqcount_begin(&dentry->d_seq); 2010 if (dentry->d_parent != parent) 2011 continue; 2012 if (d_unhashed(dentry)) 2013 continue; 2014 2015 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 2016 if (dentry->d_name.hash != hashlen_hash(hashlen)) 2017 continue; 2018 *seqp = seq; 2019 switch (slow_dentry_cmp(parent, dentry, seq, name)) { 2020 case D_COMP_OK: 2021 return dentry; 2022 case D_COMP_NOMATCH: 2023 continue; 2024 default: 2025 goto seqretry; 2026 } 2027 } 2028 2029 if (dentry->d_name.hash_len != hashlen) 2030 continue; 2031 *seqp = seq; 2032 if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) 2033 return dentry; 2034 } 2035 return NULL; 2036 } 2037 2038 /** 2039 * d_lookup - search for a dentry 2040 * @parent: parent dentry 2041 * @name: qstr of name we wish to find 2042 * Returns: dentry, or NULL 2043 * 2044 * d_lookup searches the children of the parent dentry for the name in 2045 * question. If the dentry is found its reference count is incremented and the 2046 * dentry is returned. The caller must use dput to free the entry when it has 2047 * finished using it. %NULL is returned if the dentry does not exist. 2048 */ 2049 struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) 2050 { 2051 struct dentry *dentry; 2052 unsigned seq; 2053 2054 do { 2055 seq = read_seqbegin(&rename_lock); 2056 dentry = __d_lookup(parent, name); 2057 if (dentry) 2058 break; 2059 } while (read_seqretry(&rename_lock, seq)); 2060 return dentry; 2061 } 2062 EXPORT_SYMBOL(d_lookup); 2063 2064 /** 2065 * __d_lookup - search for a dentry (racy) 2066 * @parent: parent dentry 2067 * @name: qstr of name we wish to find 2068 * Returns: dentry, or NULL 2069 * 2070 * __d_lookup is like d_lookup, however it may (rarely) return a 2071 * false-negative result due to unrelated rename activity. 2072 * 2073 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 2074 * however it must be used carefully, eg. with a following d_lookup in 2075 * the case of failure. 2076 * 2077 * __d_lookup callers must be commented. 2078 */ 2079 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) 2080 { 2081 unsigned int len = name->len; 2082 unsigned int hash = name->hash; 2083 const unsigned char *str = name->name; 2084 struct hlist_bl_head *b = d_hash(parent, hash); 2085 struct hlist_bl_node *node; 2086 struct dentry *found = NULL; 2087 struct dentry *dentry; 2088 2089 /* 2090 * Note: There is significant duplication with __d_lookup_rcu which is 2091 * required to prevent single threaded performance regressions 2092 * especially on architectures where smp_rmb (in seqcounts) are costly. 2093 * Keep the two functions in sync. 2094 */ 2095 2096 /* 2097 * The hash list is protected using RCU. 2098 * 2099 * Take d_lock when comparing a candidate dentry, to avoid races 2100 * with d_move(). 2101 * 2102 * It is possible that concurrent renames can mess up our list 2103 * walk here and result in missing our dentry, resulting in the 2104 * false-negative result. d_lookup() protects against concurrent 2105 * renames using rename_lock seqlock. 2106 * 2107 * See Documentation/filesystems/path-lookup.txt for more details. 2108 */ 2109 rcu_read_lock(); 2110 2111 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 2112 2113 if (dentry->d_name.hash != hash) 2114 continue; 2115 2116 spin_lock(&dentry->d_lock); 2117 if (dentry->d_parent != parent) 2118 goto next; 2119 if (d_unhashed(dentry)) 2120 goto next; 2121 2122 /* 2123 * It is safe to compare names since d_move() cannot 2124 * change the qstr (protected by d_lock). 2125 */ 2126 if (parent->d_flags & DCACHE_OP_COMPARE) { 2127 int tlen = dentry->d_name.len; 2128 const char *tname = dentry->d_name.name; 2129 if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) 2130 goto next; 2131 } else { 2132 if (dentry->d_name.len != len) 2133 goto next; 2134 if (dentry_cmp(dentry, str, len)) 2135 goto next; 2136 } 2137 2138 dentry->d_lockref.count++; 2139 found = dentry; 2140 spin_unlock(&dentry->d_lock); 2141 break; 2142 next: 2143 spin_unlock(&dentry->d_lock); 2144 } 2145 rcu_read_unlock(); 2146 2147 return found; 2148 } 2149 2150 /** 2151 * d_hash_and_lookup - hash the qstr then search for a dentry 2152 * @dir: Directory to search in 2153 * @name: qstr of name we wish to find 2154 * 2155 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error) 2156 */ 2157 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 2158 { 2159 /* 2160 * Check for a fs-specific hash function. Note that we must 2161 * calculate the standard hash first, as the d_op->d_hash() 2162 * routine may choose to leave the hash value unchanged. 2163 */ 2164 name->hash = full_name_hash(name->name, name->len); 2165 if (dir->d_flags & DCACHE_OP_HASH) { 2166 int err = dir->d_op->d_hash(dir, name); 2167 if (unlikely(err < 0)) 2168 return ERR_PTR(err); 2169 } 2170 return d_lookup(dir, name); 2171 } 2172 EXPORT_SYMBOL(d_hash_and_lookup); 2173 2174 /** 2175 * d_validate - verify dentry provided from insecure source (deprecated) 2176 * @dentry: The dentry alleged to be valid child of @dparent 2177 * @dparent: The parent dentry (known to be valid) 2178 * 2179 * An insecure source has sent us a dentry, here we verify it and dget() it. 2180 * This is used by ncpfs in its readdir implementation. 2181 * Zero is returned in the dentry is invalid. 2182 * 2183 * This function is slow for big directories, and deprecated, do not use it. 2184 */ 2185 int d_validate(struct dentry *dentry, struct dentry *dparent) 2186 { 2187 struct dentry *child; 2188 2189 spin_lock(&dparent->d_lock); 2190 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { 2191 if (dentry == child) { 2192 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 2193 __dget_dlock(dentry); 2194 spin_unlock(&dentry->d_lock); 2195 spin_unlock(&dparent->d_lock); 2196 return 1; 2197 } 2198 } 2199 spin_unlock(&dparent->d_lock); 2200 2201 return 0; 2202 } 2203 EXPORT_SYMBOL(d_validate); 2204 2205 /* 2206 * When a file is deleted, we have two options: 2207 * - turn this dentry into a negative dentry 2208 * - unhash this dentry and free it. 2209 * 2210 * Usually, we want to just turn this into 2211 * a negative dentry, but if anybody else is 2212 * currently using the dentry or the inode 2213 * we can't do that and we fall back on removing 2214 * it from the hash queues and waiting for 2215 * it to be deleted later when it has no users 2216 */ 2217 2218 /** 2219 * d_delete - delete a dentry 2220 * @dentry: The dentry to delete 2221 * 2222 * Turn the dentry into a negative dentry if possible, otherwise 2223 * remove it from the hash queues so it can be deleted later 2224 */ 2225 2226 void d_delete(struct dentry * dentry) 2227 { 2228 struct inode *inode; 2229 int isdir = 0; 2230 /* 2231 * Are we the only user? 2232 */ 2233 again: 2234 spin_lock(&dentry->d_lock); 2235 inode = dentry->d_inode; 2236 isdir = S_ISDIR(inode->i_mode); 2237 if (dentry->d_lockref.count == 1) { 2238 if (!spin_trylock(&inode->i_lock)) { 2239 spin_unlock(&dentry->d_lock); 2240 cpu_relax(); 2241 goto again; 2242 } 2243 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2244 dentry_unlink_inode(dentry); 2245 fsnotify_nameremove(dentry, isdir); 2246 return; 2247 } 2248 2249 if (!d_unhashed(dentry)) 2250 __d_drop(dentry); 2251 2252 spin_unlock(&dentry->d_lock); 2253 2254 fsnotify_nameremove(dentry, isdir); 2255 } 2256 EXPORT_SYMBOL(d_delete); 2257 2258 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) 2259 { 2260 BUG_ON(!d_unhashed(entry)); 2261 hlist_bl_lock(b); 2262 entry->d_flags |= DCACHE_RCUACCESS; 2263 hlist_bl_add_head_rcu(&entry->d_hash, b); 2264 hlist_bl_unlock(b); 2265 } 2266 2267 static void _d_rehash(struct dentry * entry) 2268 { 2269 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash)); 2270 } 2271 2272 /** 2273 * d_rehash - add an entry back to the hash 2274 * @entry: dentry to add to the hash 2275 * 2276 * Adds a dentry to the hash according to its name. 2277 */ 2278 2279 void d_rehash(struct dentry * entry) 2280 { 2281 spin_lock(&entry->d_lock); 2282 _d_rehash(entry); 2283 spin_unlock(&entry->d_lock); 2284 } 2285 EXPORT_SYMBOL(d_rehash); 2286 2287 /** 2288 * dentry_update_name_case - update case insensitive dentry with a new name 2289 * @dentry: dentry to be updated 2290 * @name: new name 2291 * 2292 * Update a case insensitive dentry with new case of name. 2293 * 2294 * dentry must have been returned by d_lookup with name @name. Old and new 2295 * name lengths must match (ie. no d_compare which allows mismatched name 2296 * lengths). 2297 * 2298 * Parent inode i_mutex must be held over d_lookup and into this call (to 2299 * keep renames and concurrent inserts, and readdir(2) away). 2300 */ 2301 void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2302 { 2303 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex)); 2304 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2305 2306 spin_lock(&dentry->d_lock); 2307 write_seqcount_begin(&dentry->d_seq); 2308 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); 2309 write_seqcount_end(&dentry->d_seq); 2310 spin_unlock(&dentry->d_lock); 2311 } 2312 EXPORT_SYMBOL(dentry_update_name_case); 2313 2314 static void switch_names(struct dentry *dentry, struct dentry *target) 2315 { 2316 if (dname_external(target)) { 2317 if (dname_external(dentry)) { 2318 /* 2319 * Both external: swap the pointers 2320 */ 2321 swap(target->d_name.name, dentry->d_name.name); 2322 } else { 2323 /* 2324 * dentry:internal, target:external. Steal target's 2325 * storage and make target internal. 2326 */ 2327 memcpy(target->d_iname, dentry->d_name.name, 2328 dentry->d_name.len + 1); 2329 dentry->d_name.name = target->d_name.name; 2330 target->d_name.name = target->d_iname; 2331 } 2332 } else { 2333 if (dname_external(dentry)) { 2334 /* 2335 * dentry:external, target:internal. Give dentry's 2336 * storage to target and make dentry internal 2337 */ 2338 memcpy(dentry->d_iname, target->d_name.name, 2339 target->d_name.len + 1); 2340 target->d_name.name = dentry->d_name.name; 2341 dentry->d_name.name = dentry->d_iname; 2342 } else { 2343 /* 2344 * Both are internal. Just copy target to dentry 2345 */ 2346 memcpy(dentry->d_iname, target->d_name.name, 2347 target->d_name.len + 1); 2348 dentry->d_name.len = target->d_name.len; 2349 return; 2350 } 2351 } 2352 swap(dentry->d_name.len, target->d_name.len); 2353 } 2354 2355 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) 2356 { 2357 /* 2358 * XXXX: do we really need to take target->d_lock? 2359 */ 2360 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent) 2361 spin_lock(&target->d_parent->d_lock); 2362 else { 2363 if (d_ancestor(dentry->d_parent, target->d_parent)) { 2364 spin_lock(&dentry->d_parent->d_lock); 2365 spin_lock_nested(&target->d_parent->d_lock, 2366 DENTRY_D_LOCK_NESTED); 2367 } else { 2368 spin_lock(&target->d_parent->d_lock); 2369 spin_lock_nested(&dentry->d_parent->d_lock, 2370 DENTRY_D_LOCK_NESTED); 2371 } 2372 } 2373 if (target < dentry) { 2374 spin_lock_nested(&target->d_lock, 2); 2375 spin_lock_nested(&dentry->d_lock, 3); 2376 } else { 2377 spin_lock_nested(&dentry->d_lock, 2); 2378 spin_lock_nested(&target->d_lock, 3); 2379 } 2380 } 2381 2382 static void dentry_unlock_parents_for_move(struct dentry *dentry, 2383 struct dentry *target) 2384 { 2385 if (target->d_parent != dentry->d_parent) 2386 spin_unlock(&dentry->d_parent->d_lock); 2387 if (target->d_parent != target) 2388 spin_unlock(&target->d_parent->d_lock); 2389 } 2390 2391 /* 2392 * When switching names, the actual string doesn't strictly have to 2393 * be preserved in the target - because we're dropping the target 2394 * anyway. As such, we can just do a simple memcpy() to copy over 2395 * the new name before we switch. 2396 * 2397 * Note that we have to be a lot more careful about getting the hash 2398 * switched - we have to switch the hash value properly even if it 2399 * then no longer matches the actual (corrupted) string of the target. 2400 * The hash value has to match the hash queue that the dentry is on.. 2401 */ 2402 /* 2403 * __d_move - move a dentry 2404 * @dentry: entry to move 2405 * @target: new dentry 2406 * 2407 * Update the dcache to reflect the move of a file name. Negative 2408 * dcache entries should not be moved in this way. Caller must hold 2409 * rename_lock, the i_mutex of the source and target directories, 2410 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). 2411 */ 2412 static void __d_move(struct dentry * dentry, struct dentry * target) 2413 { 2414 if (!dentry->d_inode) 2415 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2416 2417 BUG_ON(d_ancestor(dentry, target)); 2418 BUG_ON(d_ancestor(target, dentry)); 2419 2420 dentry_lock_for_move(dentry, target); 2421 2422 write_seqcount_begin(&dentry->d_seq); 2423 write_seqcount_begin(&target->d_seq); 2424 2425 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ 2426 2427 /* 2428 * Move the dentry to the target hash queue. Don't bother checking 2429 * for the same hash queue because of how unlikely it is. 2430 */ 2431 __d_drop(dentry); 2432 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); 2433 2434 /* Unhash the target: dput() will then get rid of it */ 2435 __d_drop(target); 2436 2437 list_del(&dentry->d_u.d_child); 2438 list_del(&target->d_u.d_child); 2439 2440 /* Switch the names.. */ 2441 switch_names(dentry, target); 2442 swap(dentry->d_name.hash, target->d_name.hash); 2443 2444 /* ... and switch the parents */ 2445 if (IS_ROOT(dentry)) { 2446 dentry->d_parent = target->d_parent; 2447 target->d_parent = target; 2448 INIT_LIST_HEAD(&target->d_u.d_child); 2449 } else { 2450 swap(dentry->d_parent, target->d_parent); 2451 2452 /* And add them back to the (new) parent lists */ 2453 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); 2454 } 2455 2456 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2457 2458 write_seqcount_end(&target->d_seq); 2459 write_seqcount_end(&dentry->d_seq); 2460 2461 dentry_unlock_parents_for_move(dentry, target); 2462 spin_unlock(&target->d_lock); 2463 fsnotify_d_move(dentry); 2464 spin_unlock(&dentry->d_lock); 2465 } 2466 2467 /* 2468 * d_move - move a dentry 2469 * @dentry: entry to move 2470 * @target: new dentry 2471 * 2472 * Update the dcache to reflect the move of a file name. Negative 2473 * dcache entries should not be moved in this way. See the locking 2474 * requirements for __d_move. 2475 */ 2476 void d_move(struct dentry *dentry, struct dentry *target) 2477 { 2478 write_seqlock(&rename_lock); 2479 __d_move(dentry, target); 2480 write_sequnlock(&rename_lock); 2481 } 2482 EXPORT_SYMBOL(d_move); 2483 2484 /** 2485 * d_ancestor - search for an ancestor 2486 * @p1: ancestor dentry 2487 * @p2: child dentry 2488 * 2489 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 2490 * an ancestor of p2, else NULL. 2491 */ 2492 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 2493 { 2494 struct dentry *p; 2495 2496 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 2497 if (p->d_parent == p1) 2498 return p; 2499 } 2500 return NULL; 2501 } 2502 2503 /* 2504 * This helper attempts to cope with remotely renamed directories 2505 * 2506 * It assumes that the caller is already holding 2507 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock 2508 * 2509 * Note: If ever the locking in lock_rename() changes, then please 2510 * remember to update this too... 2511 */ 2512 static struct dentry *__d_unalias(struct inode *inode, 2513 struct dentry *dentry, struct dentry *alias) 2514 { 2515 struct mutex *m1 = NULL, *m2 = NULL; 2516 struct dentry *ret = ERR_PTR(-EBUSY); 2517 2518 /* If alias and dentry share a parent, then no extra locks required */ 2519 if (alias->d_parent == dentry->d_parent) 2520 goto out_unalias; 2521 2522 /* See lock_rename() */ 2523 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2524 goto out_err; 2525 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2526 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) 2527 goto out_err; 2528 m2 = &alias->d_parent->d_inode->i_mutex; 2529 out_unalias: 2530 if (likely(!d_mountpoint(alias))) { 2531 __d_move(alias, dentry); 2532 ret = alias; 2533 } 2534 out_err: 2535 spin_unlock(&inode->i_lock); 2536 if (m2) 2537 mutex_unlock(m2); 2538 if (m1) 2539 mutex_unlock(m1); 2540 return ret; 2541 } 2542 2543 /* 2544 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2545 * named dentry in place of the dentry to be replaced. 2546 * returns with anon->d_lock held! 2547 */ 2548 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2549 { 2550 struct dentry *dparent; 2551 2552 dentry_lock_for_move(anon, dentry); 2553 2554 write_seqcount_begin(&dentry->d_seq); 2555 write_seqcount_begin(&anon->d_seq); 2556 2557 dparent = dentry->d_parent; 2558 2559 switch_names(dentry, anon); 2560 swap(dentry->d_name.hash, anon->d_name.hash); 2561 2562 dentry->d_parent = dentry; 2563 list_del_init(&dentry->d_u.d_child); 2564 anon->d_parent = dparent; 2565 list_move(&anon->d_u.d_child, &dparent->d_subdirs); 2566 2567 write_seqcount_end(&dentry->d_seq); 2568 write_seqcount_end(&anon->d_seq); 2569 2570 dentry_unlock_parents_for_move(anon, dentry); 2571 spin_unlock(&dentry->d_lock); 2572 2573 /* anon->d_lock still locked, returns locked */ 2574 anon->d_flags &= ~DCACHE_DISCONNECTED; 2575 } 2576 2577 /** 2578 * d_materialise_unique - introduce an inode into the tree 2579 * @dentry: candidate dentry 2580 * @inode: inode to bind to the dentry, to which aliases may be attached 2581 * 2582 * Introduces an dentry into the tree, substituting an extant disconnected 2583 * root directory alias in its place if there is one. Caller must hold the 2584 * i_mutex of the parent directory. 2585 */ 2586 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) 2587 { 2588 struct dentry *actual; 2589 2590 BUG_ON(!d_unhashed(dentry)); 2591 2592 if (!inode) { 2593 actual = dentry; 2594 __d_instantiate(dentry, NULL); 2595 d_rehash(actual); 2596 goto out_nolock; 2597 } 2598 2599 spin_lock(&inode->i_lock); 2600 2601 if (S_ISDIR(inode->i_mode)) { 2602 struct dentry *alias; 2603 2604 /* Does an aliased dentry already exist? */ 2605 alias = __d_find_alias(inode, 0); 2606 if (alias) { 2607 actual = alias; 2608 write_seqlock(&rename_lock); 2609 2610 if (d_ancestor(alias, dentry)) { 2611 /* Check for loops */ 2612 actual = ERR_PTR(-ELOOP); 2613 spin_unlock(&inode->i_lock); 2614 } else if (IS_ROOT(alias)) { 2615 /* Is this an anonymous mountpoint that we 2616 * could splice into our tree? */ 2617 __d_materialise_dentry(dentry, alias); 2618 write_sequnlock(&rename_lock); 2619 __d_drop(alias); 2620 goto found; 2621 } else { 2622 /* Nope, but we must(!) avoid directory 2623 * aliasing. This drops inode->i_lock */ 2624 actual = __d_unalias(inode, dentry, alias); 2625 } 2626 write_sequnlock(&rename_lock); 2627 if (IS_ERR(actual)) { 2628 if (PTR_ERR(actual) == -ELOOP) 2629 pr_warn_ratelimited( 2630 "VFS: Lookup of '%s' in %s %s" 2631 " would have caused loop\n", 2632 dentry->d_name.name, 2633 inode->i_sb->s_type->name, 2634 inode->i_sb->s_id); 2635 dput(alias); 2636 } 2637 goto out_nolock; 2638 } 2639 } 2640 2641 /* Add a unique reference */ 2642 actual = __d_instantiate_unique(dentry, inode); 2643 if (!actual) 2644 actual = dentry; 2645 else 2646 BUG_ON(!d_unhashed(actual)); 2647 2648 spin_lock(&actual->d_lock); 2649 found: 2650 _d_rehash(actual); 2651 spin_unlock(&actual->d_lock); 2652 spin_unlock(&inode->i_lock); 2653 out_nolock: 2654 if (actual == dentry) { 2655 security_d_instantiate(dentry, inode); 2656 return NULL; 2657 } 2658 2659 iput(inode); 2660 return actual; 2661 } 2662 EXPORT_SYMBOL_GPL(d_materialise_unique); 2663 2664 static int prepend(char **buffer, int *buflen, const char *str, int namelen) 2665 { 2666 *buflen -= namelen; 2667 if (*buflen < 0) 2668 return -ENAMETOOLONG; 2669 *buffer -= namelen; 2670 memcpy(*buffer, str, namelen); 2671 return 0; 2672 } 2673 2674 /** 2675 * prepend_name - prepend a pathname in front of current buffer pointer 2676 * buffer: buffer pointer 2677 * buflen: allocated length of the buffer 2678 * name: name string and length qstr structure 2679 * 2680 * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to 2681 * make sure that either the old or the new name pointer and length are 2682 * fetched. However, there may be mismatch between length and pointer. 2683 * The length cannot be trusted, we need to copy it byte-by-byte until 2684 * the length is reached or a null byte is found. It also prepends "/" at 2685 * the beginning of the name. The sequence number check at the caller will 2686 * retry it again when a d_move() does happen. So any garbage in the buffer 2687 * due to mismatched pointer and length will be discarded. 2688 */ 2689 static int prepend_name(char **buffer, int *buflen, struct qstr *name) 2690 { 2691 const char *dname = ACCESS_ONCE(name->name); 2692 u32 dlen = ACCESS_ONCE(name->len); 2693 char *p; 2694 2695 if (*buflen < dlen + 1) 2696 return -ENAMETOOLONG; 2697 *buflen -= dlen + 1; 2698 p = *buffer -= dlen + 1; 2699 *p++ = '/'; 2700 while (dlen--) { 2701 char c = *dname++; 2702 if (!c) 2703 break; 2704 *p++ = c; 2705 } 2706 return 0; 2707 } 2708 2709 /** 2710 * prepend_path - Prepend path string to a buffer 2711 * @path: the dentry/vfsmount to report 2712 * @root: root vfsmnt/dentry 2713 * @buffer: pointer to the end of the buffer 2714 * @buflen: pointer to buffer length 2715 * 2716 * The function tries to write out the pathname without taking any lock other 2717 * than the RCU read lock to make sure that dentries won't go away. It only 2718 * checks the sequence number of the global rename_lock as any change in the 2719 * dentry's d_seq will be preceded by changes in the rename_lock sequence 2720 * number. If the sequence number had been change, it will restart the whole 2721 * pathname back-tracing sequence again. It performs a total of 3 trials of 2722 * lockless back-tracing sequences before falling back to take the 2723 * rename_lock. 2724 */ 2725 static int prepend_path(const struct path *path, 2726 const struct path *root, 2727 char **buffer, int *buflen) 2728 { 2729 struct dentry *dentry = path->dentry; 2730 struct vfsmount *vfsmnt = path->mnt; 2731 struct mount *mnt = real_mount(vfsmnt); 2732 int error = 0; 2733 unsigned seq = 0; 2734 char *bptr; 2735 int blen; 2736 2737 rcu_read_lock(); 2738 restart: 2739 bptr = *buffer; 2740 blen = *buflen; 2741 read_seqbegin_or_lock(&rename_lock, &seq); 2742 while (dentry != root->dentry || vfsmnt != root->mnt) { 2743 struct dentry * parent; 2744 2745 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2746 /* Global root? */ 2747 if (mnt_has_parent(mnt)) { 2748 dentry = mnt->mnt_mountpoint; 2749 mnt = mnt->mnt_parent; 2750 vfsmnt = &mnt->mnt; 2751 continue; 2752 } 2753 /* 2754 * Filesystems needing to implement special "root names" 2755 * should do so with ->d_dname() 2756 */ 2757 if (IS_ROOT(dentry) && 2758 (dentry->d_name.len != 1 || 2759 dentry->d_name.name[0] != '/')) { 2760 WARN(1, "Root dentry has weird name <%.*s>\n", 2761 (int) dentry->d_name.len, 2762 dentry->d_name.name); 2763 } 2764 if (!error) 2765 error = is_mounted(vfsmnt) ? 1 : 2; 2766 break; 2767 } 2768 parent = dentry->d_parent; 2769 prefetch(parent); 2770 error = prepend_name(&bptr, &blen, &dentry->d_name); 2771 if (error) 2772 break; 2773 2774 dentry = parent; 2775 } 2776 if (!(seq & 1)) 2777 rcu_read_unlock(); 2778 if (need_seqretry(&rename_lock, seq)) { 2779 seq = 1; 2780 goto restart; 2781 } 2782 done_seqretry(&rename_lock, seq); 2783 2784 if (error >= 0 && bptr == *buffer) { 2785 if (--blen < 0) 2786 error = -ENAMETOOLONG; 2787 else 2788 *--bptr = '/'; 2789 } 2790 *buffer = bptr; 2791 *buflen = blen; 2792 return error; 2793 } 2794 2795 /** 2796 * __d_path - return the path of a dentry 2797 * @path: the dentry/vfsmount to report 2798 * @root: root vfsmnt/dentry 2799 * @buf: buffer to return value in 2800 * @buflen: buffer length 2801 * 2802 * Convert a dentry into an ASCII path name. 2803 * 2804 * Returns a pointer into the buffer or an error code if the 2805 * path was too long. 2806 * 2807 * "buflen" should be positive. 2808 * 2809 * If the path is not reachable from the supplied root, return %NULL. 2810 */ 2811 char *__d_path(const struct path *path, 2812 const struct path *root, 2813 char *buf, int buflen) 2814 { 2815 char *res = buf + buflen; 2816 int error; 2817 2818 prepend(&res, &buflen, "\0", 1); 2819 br_read_lock(&vfsmount_lock); 2820 error = prepend_path(path, root, &res, &buflen); 2821 br_read_unlock(&vfsmount_lock); 2822 2823 if (error < 0) 2824 return ERR_PTR(error); 2825 if (error > 0) 2826 return NULL; 2827 return res; 2828 } 2829 2830 char *d_absolute_path(const struct path *path, 2831 char *buf, int buflen) 2832 { 2833 struct path root = {}; 2834 char *res = buf + buflen; 2835 int error; 2836 2837 prepend(&res, &buflen, "\0", 1); 2838 br_read_lock(&vfsmount_lock); 2839 error = prepend_path(path, &root, &res, &buflen); 2840 br_read_unlock(&vfsmount_lock); 2841 2842 if (error > 1) 2843 error = -EINVAL; 2844 if (error < 0) 2845 return ERR_PTR(error); 2846 return res; 2847 } 2848 2849 /* 2850 * same as __d_path but appends "(deleted)" for unlinked files. 2851 */ 2852 static int path_with_deleted(const struct path *path, 2853 const struct path *root, 2854 char **buf, int *buflen) 2855 { 2856 prepend(buf, buflen, "\0", 1); 2857 if (d_unlinked(path->dentry)) { 2858 int error = prepend(buf, buflen, " (deleted)", 10); 2859 if (error) 2860 return error; 2861 } 2862 2863 return prepend_path(path, root, buf, buflen); 2864 } 2865 2866 static int prepend_unreachable(char **buffer, int *buflen) 2867 { 2868 return prepend(buffer, buflen, "(unreachable)", 13); 2869 } 2870 2871 /** 2872 * d_path - return the path of a dentry 2873 * @path: path to report 2874 * @buf: buffer to return value in 2875 * @buflen: buffer length 2876 * 2877 * Convert a dentry into an ASCII path name. If the entry has been deleted 2878 * the string " (deleted)" is appended. Note that this is ambiguous. 2879 * 2880 * Returns a pointer into the buffer or an error code if the path was 2881 * too long. Note: Callers should use the returned pointer, not the passed 2882 * in buffer, to use the name! The implementation often starts at an offset 2883 * into the buffer, and may leave 0 bytes at the start. 2884 * 2885 * "buflen" should be positive. 2886 */ 2887 char *d_path(const struct path *path, char *buf, int buflen) 2888 { 2889 char *res = buf + buflen; 2890 struct path root; 2891 int error; 2892 2893 /* 2894 * We have various synthetic filesystems that never get mounted. On 2895 * these filesystems dentries are never used for lookup purposes, and 2896 * thus don't need to be hashed. They also don't need a name until a 2897 * user wants to identify the object in /proc/pid/fd/. The little hack 2898 * below allows us to generate a name for these objects on demand: 2899 */ 2900 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2901 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2902 2903 get_fs_root(current->fs, &root); 2904 br_read_lock(&vfsmount_lock); 2905 error = path_with_deleted(path, &root, &res, &buflen); 2906 br_read_unlock(&vfsmount_lock); 2907 if (error < 0) 2908 res = ERR_PTR(error); 2909 path_put(&root); 2910 return res; 2911 } 2912 EXPORT_SYMBOL(d_path); 2913 2914 /* 2915 * Helper function for dentry_operations.d_dname() members 2916 */ 2917 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, 2918 const char *fmt, ...) 2919 { 2920 va_list args; 2921 char temp[64]; 2922 int sz; 2923 2924 va_start(args, fmt); 2925 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1; 2926 va_end(args); 2927 2928 if (sz > sizeof(temp) || sz > buflen) 2929 return ERR_PTR(-ENAMETOOLONG); 2930 2931 buffer += buflen - sz; 2932 return memcpy(buffer, temp, sz); 2933 } 2934 2935 char *simple_dname(struct dentry *dentry, char *buffer, int buflen) 2936 { 2937 char *end = buffer + buflen; 2938 /* these dentries are never renamed, so d_lock is not needed */ 2939 if (prepend(&end, &buflen, " (deleted)", 11) || 2940 prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || 2941 prepend(&end, &buflen, "/", 1)) 2942 end = ERR_PTR(-ENAMETOOLONG); 2943 return end; 2944 } 2945 2946 /* 2947 * Write full pathname from the root of the filesystem into the buffer. 2948 */ 2949 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2950 { 2951 char *end, *retval; 2952 int len, seq = 0; 2953 int error = 0; 2954 2955 rcu_read_lock(); 2956 restart: 2957 end = buf + buflen; 2958 len = buflen; 2959 prepend(&end, &len, "\0", 1); 2960 if (buflen < 1) 2961 goto Elong; 2962 /* Get '/' right */ 2963 retval = end-1; 2964 *retval = '/'; 2965 read_seqbegin_or_lock(&rename_lock, &seq); 2966 while (!IS_ROOT(dentry)) { 2967 struct dentry *parent = dentry->d_parent; 2968 int error; 2969 2970 prefetch(parent); 2971 error = prepend_name(&end, &len, &dentry->d_name); 2972 if (error) 2973 break; 2974 2975 retval = end; 2976 dentry = parent; 2977 } 2978 if (!(seq & 1)) 2979 rcu_read_unlock(); 2980 if (need_seqretry(&rename_lock, seq)) { 2981 seq = 1; 2982 goto restart; 2983 } 2984 done_seqretry(&rename_lock, seq); 2985 if (error) 2986 goto Elong; 2987 return retval; 2988 Elong: 2989 return ERR_PTR(-ENAMETOOLONG); 2990 } 2991 2992 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) 2993 { 2994 return __dentry_path(dentry, buf, buflen); 2995 } 2996 EXPORT_SYMBOL(dentry_path_raw); 2997 2998 char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2999 { 3000 char *p = NULL; 3001 char *retval; 3002 3003 if (d_unlinked(dentry)) { 3004 p = buf + buflen; 3005 if (prepend(&p, &buflen, "//deleted", 10) != 0) 3006 goto Elong; 3007 buflen++; 3008 } 3009 retval = __dentry_path(dentry, buf, buflen); 3010 if (!IS_ERR(retval) && p) 3011 *p = '/'; /* restore '/' overriden with '\0' */ 3012 return retval; 3013 Elong: 3014 return ERR_PTR(-ENAMETOOLONG); 3015 } 3016 3017 /* 3018 * NOTE! The user-level library version returns a 3019 * character pointer. The kernel system call just 3020 * returns the length of the buffer filled (which 3021 * includes the ending '\0' character), or a negative 3022 * error value. So libc would do something like 3023 * 3024 * char *getcwd(char * buf, size_t size) 3025 * { 3026 * int retval; 3027 * 3028 * retval = sys_getcwd(buf, size); 3029 * if (retval >= 0) 3030 * return buf; 3031 * errno = -retval; 3032 * return NULL; 3033 * } 3034 */ 3035 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) 3036 { 3037 int error; 3038 struct path pwd, root; 3039 char *page = (char *) __get_free_page(GFP_USER); 3040 3041 if (!page) 3042 return -ENOMEM; 3043 3044 get_fs_root_and_pwd(current->fs, &root, &pwd); 3045 3046 error = -ENOENT; 3047 br_read_lock(&vfsmount_lock); 3048 if (!d_unlinked(pwd.dentry)) { 3049 unsigned long len; 3050 char *cwd = page + PAGE_SIZE; 3051 int buflen = PAGE_SIZE; 3052 3053 prepend(&cwd, &buflen, "\0", 1); 3054 error = prepend_path(&pwd, &root, &cwd, &buflen); 3055 br_read_unlock(&vfsmount_lock); 3056 3057 if (error < 0) 3058 goto out; 3059 3060 /* Unreachable from current root */ 3061 if (error > 0) { 3062 error = prepend_unreachable(&cwd, &buflen); 3063 if (error) 3064 goto out; 3065 } 3066 3067 error = -ERANGE; 3068 len = PAGE_SIZE + page - cwd; 3069 if (len <= size) { 3070 error = len; 3071 if (copy_to_user(buf, cwd, len)) 3072 error = -EFAULT; 3073 } 3074 } else { 3075 br_read_unlock(&vfsmount_lock); 3076 } 3077 3078 out: 3079 path_put(&pwd); 3080 path_put(&root); 3081 free_page((unsigned long) page); 3082 return error; 3083 } 3084 3085 /* 3086 * Test whether new_dentry is a subdirectory of old_dentry. 3087 * 3088 * Trivially implemented using the dcache structure 3089 */ 3090 3091 /** 3092 * is_subdir - is new dentry a subdirectory of old_dentry 3093 * @new_dentry: new dentry 3094 * @old_dentry: old dentry 3095 * 3096 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth). 3097 * Returns 0 otherwise. 3098 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 3099 */ 3100 3101 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 3102 { 3103 int result; 3104 unsigned seq; 3105 3106 if (new_dentry == old_dentry) 3107 return 1; 3108 3109 do { 3110 /* for restarting inner loop in case of seq retry */ 3111 seq = read_seqbegin(&rename_lock); 3112 /* 3113 * Need rcu_readlock to protect against the d_parent trashing 3114 * due to d_move 3115 */ 3116 rcu_read_lock(); 3117 if (d_ancestor(old_dentry, new_dentry)) 3118 result = 1; 3119 else 3120 result = 0; 3121 rcu_read_unlock(); 3122 } while (read_seqretry(&rename_lock, seq)); 3123 3124 return result; 3125 } 3126 3127 static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) 3128 { 3129 struct dentry *root = data; 3130 if (dentry != root) { 3131 if (d_unhashed(dentry) || !dentry->d_inode) 3132 return D_WALK_SKIP; 3133 3134 if (!(dentry->d_flags & DCACHE_GENOCIDE)) { 3135 dentry->d_flags |= DCACHE_GENOCIDE; 3136 dentry->d_lockref.count--; 3137 } 3138 } 3139 return D_WALK_CONTINUE; 3140 } 3141 3142 void d_genocide(struct dentry *parent) 3143 { 3144 d_walk(parent, parent, d_genocide_kill, NULL); 3145 } 3146 3147 void d_tmpfile(struct dentry *dentry, struct inode *inode) 3148 { 3149 inode_dec_link_count(inode); 3150 BUG_ON(dentry->d_name.name != dentry->d_iname || 3151 !hlist_unhashed(&dentry->d_alias) || 3152 !d_unlinked(dentry)); 3153 spin_lock(&dentry->d_parent->d_lock); 3154 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 3155 dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", 3156 (unsigned long long)inode->i_ino); 3157 spin_unlock(&dentry->d_lock); 3158 spin_unlock(&dentry->d_parent->d_lock); 3159 d_instantiate(dentry, inode); 3160 } 3161 EXPORT_SYMBOL(d_tmpfile); 3162 3163 static __initdata unsigned long dhash_entries; 3164 static int __init set_dhash_entries(char *str) 3165 { 3166 if (!str) 3167 return 0; 3168 dhash_entries = simple_strtoul(str, &str, 0); 3169 return 1; 3170 } 3171 __setup("dhash_entries=", set_dhash_entries); 3172 3173 static void __init dcache_init_early(void) 3174 { 3175 unsigned int loop; 3176 3177 /* If hashes are distributed across NUMA nodes, defer 3178 * hash allocation until vmalloc space is available. 3179 */ 3180 if (hashdist) 3181 return; 3182 3183 dentry_hashtable = 3184 alloc_large_system_hash("Dentry cache", 3185 sizeof(struct hlist_bl_head), 3186 dhash_entries, 3187 13, 3188 HASH_EARLY, 3189 &d_hash_shift, 3190 &d_hash_mask, 3191 0, 3192 0); 3193 3194 for (loop = 0; loop < (1U << d_hash_shift); loop++) 3195 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3196 } 3197 3198 static void __init dcache_init(void) 3199 { 3200 unsigned int loop; 3201 3202 /* 3203 * A constructor could be added for stable state like the lists, 3204 * but it is probably not worth it because of the cache nature 3205 * of the dcache. 3206 */ 3207 dentry_cache = KMEM_CACHE(dentry, 3208 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3209 3210 /* Hash may have been set up in dcache_init_early */ 3211 if (!hashdist) 3212 return; 3213 3214 dentry_hashtable = 3215 alloc_large_system_hash("Dentry cache", 3216 sizeof(struct hlist_bl_head), 3217 dhash_entries, 3218 13, 3219 0, 3220 &d_hash_shift, 3221 &d_hash_mask, 3222 0, 3223 0); 3224 3225 for (loop = 0; loop < (1U << d_hash_shift); loop++) 3226 INIT_HLIST_BL_HEAD(dentry_hashtable + loop); 3227 } 3228 3229 /* SLAB cache for __getname() consumers */ 3230 struct kmem_cache *names_cachep __read_mostly; 3231 EXPORT_SYMBOL(names_cachep); 3232 3233 EXPORT_SYMBOL(d_genocide); 3234 3235 void __init vfs_caches_init_early(void) 3236 { 3237 dcache_init_early(); 3238 inode_init_early(); 3239 } 3240 3241 void __init vfs_caches_init(unsigned long mempages) 3242 { 3243 unsigned long reserve; 3244 3245 /* Base hash sizes on available memory, with a reserve equal to 3246 150% of current kernel size */ 3247 3248 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3249 mempages -= reserve; 3250 3251 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3252 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3253 3254 dcache_init(); 3255 inode_init(); 3256 files_init(mempages); 3257 mnt_init(); 3258 bdev_cache_init(); 3259 chrdev_init(); 3260 } 3261