1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/dcache.c 4 * 5 * Complete reimplementation 6 * (C) 1997 Thomas Schoebel-Theuer, 7 * with heavy changes by Linus Torvalds 8 */ 9 10 /* 11 * Notes on the allocation strategy: 12 * 13 * The dcache is a master of the icache - whenever a dcache entry 14 * exists, the inode will always exist. "iput()" is done either when 15 * the dcache entry is deleted or garbage collected. 16 */ 17 18 #include <linux/ratelimit.h> 19 #include <linux/string.h> 20 #include <linux/mm.h> 21 #include <linux/fs.h> 22 #include <linux/fscrypt.h> 23 #include <linux/fsnotify.h> 24 #include <linux/slab.h> 25 #include <linux/init.h> 26 #include <linux/hash.h> 27 #include <linux/cache.h> 28 #include <linux/export.h> 29 #include <linux/security.h> 30 #include <linux/seqlock.h> 31 #include <linux/memblock.h> 32 #include <linux/bit_spinlock.h> 33 #include <linux/rculist_bl.h> 34 #include <linux/list_lru.h> 35 #include "internal.h" 36 #include "mount.h" 37 38 #include <asm/runtime-const.h> 39 40 /* 41 * Usage: 42 * dcache->d_inode->i_lock protects: 43 * - i_dentry, d_alias, d_inode of aliases 44 * dcache_hash_bucket lock protects: 45 * - the dcache hash table 46 * s_roots_lock protects: 47 * - the s_roots list (see __d_move()/dentry_unlist()/d_obtain_root()) 48 * dentry->d_sb->s_dentry_lru_lock protects: 49 * - the dcache lru lists and counters 50 * d_lock protects: 51 * - d_flags 52 * - d_name 53 * - d_lru 54 * - d_count 55 * - d_unhashed() 56 * - d_parent and d_chilren 57 * - childrens' d_sib and d_parent 58 * - d_alias, d_inode 59 * 60 * Ordering: 61 * dentry->d_inode->i_lock 62 * dentry->d_lock 63 * dentry->d_sb->s_dentry_lru_lock 64 * dcache_hash_bucket lock 65 * s_roots lock 66 * 67 * If there is an ancestor relationship: 68 * dentry->d_parent->...->d_parent->d_lock 69 * ... 70 * dentry->d_parent->d_lock 71 * dentry->d_lock 72 * 73 * If no ancestor relationship: 74 * arbitrary, since it's serialized on rename_lock 75 */ 76 static int sysctl_vfs_cache_pressure __read_mostly = 100; 77 static int sysctl_vfs_cache_pressure_denom __read_mostly = 100; 78 79 unsigned long vfs_pressure_ratio(unsigned long val) 80 { 81 return mult_frac(val, sysctl_vfs_cache_pressure, sysctl_vfs_cache_pressure_denom); 82 } 83 EXPORT_SYMBOL_GPL(vfs_pressure_ratio); 84 85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 86 87 EXPORT_SYMBOL(rename_lock); 88 89 static struct kmem_cache *__dentry_cache __ro_after_init; 90 #define dentry_cache runtime_const_ptr(__dentry_cache) 91 92 const struct qstr empty_name = QSTR_INIT("", 0); 93 EXPORT_SYMBOL(empty_name); 94 const struct qstr slash_name = QSTR_INIT("/", 1); 95 EXPORT_SYMBOL(slash_name); 96 const struct qstr dotdot_name = QSTR_INIT("..", 2); 97 EXPORT_SYMBOL(dotdot_name); 98 99 /* 100 * This is the single most critical data structure when it comes 101 * to the dcache: the hashtable for lookups. Somebody should try 102 * to make this good - I've just made it work. 103 * 104 * This hash-function tries to avoid losing too many bits of hash 105 * information, yet avoid using a prime hash-size or similar. 106 * 107 * Marking the variables "used" ensures that the compiler doesn't 108 * optimize them away completely on architectures with runtime 109 * constant infrastructure, this allows debuggers to see their 110 * values. But updating these values has no effect on those arches. 111 */ 112 113 static unsigned int d_hash_shift __ro_after_init __used; 114 115 static struct hlist_bl_head *dentry_hashtable __ro_after_init __used; 116 117 static inline struct hlist_bl_head *d_hash(unsigned long hashlen) 118 { 119 return runtime_const_ptr(dentry_hashtable) + 120 runtime_const_shift_right_32(hashlen, d_hash_shift); 121 } 122 123 #define IN_LOOKUP_SHIFT 10 124 static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT]; 125 126 static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent, 127 unsigned int hash) 128 { 129 hash += (unsigned long) parent / L1_CACHE_BYTES; 130 return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT); 131 } 132 133 struct dentry_stat_t { 134 long nr_dentry; 135 long nr_unused; 136 long age_limit; /* age in seconds */ 137 long want_pages; /* pages requested by system */ 138 long nr_negative; /* # of unused negative dentries */ 139 long dummy; /* Reserved for future use */ 140 }; 141 142 static DEFINE_PER_CPU(long, nr_dentry); 143 static DEFINE_PER_CPU(long, nr_dentry_unused); 144 static DEFINE_PER_CPU(long, nr_dentry_negative); 145 static int dentry_negative_policy; 146 147 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 148 /* Statistics gathering. */ 149 static struct dentry_stat_t dentry_stat = { 150 .age_limit = 45, 151 }; 152 153 /* 154 * Here we resort to our own counters instead of using generic per-cpu counters 155 * for consistency with what the vfs inode code does. We are expected to harvest 156 * better code and performance by having our own specialized counters. 157 * 158 * Please note that the loop is done over all possible CPUs, not over all online 159 * CPUs. The reason for this is that we don't want to play games with CPUs going 160 * on and off. If one of them goes off, we will just keep their counters. 161 * 162 * glommer: See cffbc8a for details, and if you ever intend to change this, 163 * please update all vfs counters to match. 164 */ 165 static long get_nr_dentry(void) 166 { 167 int i; 168 long sum = 0; 169 for_each_possible_cpu(i) 170 sum += per_cpu(nr_dentry, i); 171 return sum < 0 ? 0 : sum; 172 } 173 174 static long get_nr_dentry_unused(void) 175 { 176 int i; 177 long sum = 0; 178 for_each_possible_cpu(i) 179 sum += per_cpu(nr_dentry_unused, i); 180 return sum < 0 ? 0 : sum; 181 } 182 183 static long get_nr_dentry_negative(void) 184 { 185 int i; 186 long sum = 0; 187 188 for_each_possible_cpu(i) 189 sum += per_cpu(nr_dentry_negative, i); 190 return sum < 0 ? 0 : sum; 191 } 192 193 static int proc_nr_dentry(const struct ctl_table *table, int write, void *buffer, 194 size_t *lenp, loff_t *ppos) 195 { 196 dentry_stat.nr_dentry = get_nr_dentry(); 197 dentry_stat.nr_unused = get_nr_dentry_unused(); 198 dentry_stat.nr_negative = get_nr_dentry_negative(); 199 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 200 } 201 202 static const struct ctl_table fs_dcache_sysctls[] = { 203 { 204 .procname = "dentry-state", 205 .data = &dentry_stat, 206 .maxlen = 6*sizeof(long), 207 .mode = 0444, 208 .proc_handler = proc_nr_dentry, 209 }, 210 { 211 .procname = "dentry-negative", 212 .data = &dentry_negative_policy, 213 .maxlen = sizeof(dentry_negative_policy), 214 .mode = 0644, 215 .proc_handler = proc_dointvec_minmax, 216 .extra1 = SYSCTL_ZERO, 217 .extra2 = SYSCTL_ONE, 218 }, 219 }; 220 221 static const struct ctl_table vm_dcache_sysctls[] = { 222 { 223 .procname = "vfs_cache_pressure", 224 .data = &sysctl_vfs_cache_pressure, 225 .maxlen = sizeof(sysctl_vfs_cache_pressure), 226 .mode = 0644, 227 .proc_handler = proc_dointvec_minmax, 228 .extra1 = SYSCTL_ZERO, 229 }, 230 { 231 .procname = "vfs_cache_pressure_denom", 232 .data = &sysctl_vfs_cache_pressure_denom, 233 .maxlen = sizeof(sysctl_vfs_cache_pressure_denom), 234 .mode = 0644, 235 .proc_handler = proc_dointvec_minmax, 236 .extra1 = SYSCTL_ONE_HUNDRED, 237 }, 238 }; 239 240 static int __init init_fs_dcache_sysctls(void) 241 { 242 register_sysctl_init("vm", vm_dcache_sysctls); 243 register_sysctl_init("fs", fs_dcache_sysctls); 244 return 0; 245 } 246 fs_initcall(init_fs_dcache_sysctls); 247 #endif 248 249 /* 250 * Compare 2 name strings, return 0 if they match, otherwise non-zero. 251 * The strings are both count bytes long, and count is non-zero. 252 */ 253 #ifdef CONFIG_DCACHE_WORD_ACCESS 254 255 #include <asm/word-at-a-time.h> 256 /* 257 * NOTE! 'cs' and 'scount' come from a dentry, so it has a 258 * aligned allocation for this particular component. We don't 259 * strictly need the load_unaligned_zeropad() safety, but it 260 * doesn't hurt either. 261 * 262 * In contrast, 'ct' and 'tcount' can be from a pathname, and do 263 * need the careful unaligned handling. 264 */ 265 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 266 { 267 unsigned long a,b,mask; 268 269 for (;;) { 270 a = read_word_at_a_time(cs); 271 b = load_unaligned_zeropad(ct); 272 if (tcount < sizeof(unsigned long)) 273 break; 274 if (unlikely(a != b)) 275 return 1; 276 cs += sizeof(unsigned long); 277 ct += sizeof(unsigned long); 278 tcount -= sizeof(unsigned long); 279 if (!tcount) 280 return 0; 281 } 282 mask = bytemask_from_count(tcount); 283 return unlikely(!!((a ^ b) & mask)); 284 } 285 286 #else 287 288 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) 289 { 290 do { 291 if (*cs != *ct) 292 return 1; 293 cs++; 294 ct++; 295 tcount--; 296 } while (tcount); 297 return 0; 298 } 299 300 #endif 301 302 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) 303 { 304 /* 305 * Be careful about RCU walk racing with rename: 306 * use 'READ_ONCE' to fetch the name pointer. 307 * 308 * NOTE! Even if a rename will mean that the length 309 * was not loaded atomically, we don't care. The 310 * RCU walk will check the sequence count eventually, 311 * and catch it. And we won't overrun the buffer, 312 * because we're reading the name pointer atomically, 313 * and a dentry name is guaranteed to be properly 314 * terminated with a NUL byte. 315 * 316 * End result: even if 'len' is wrong, we'll exit 317 * early because the data cannot match (there can 318 * be no NUL in the ct/tcount data) 319 */ 320 const unsigned char *cs = READ_ONCE(dentry->d_name.name); 321 322 return dentry_string_cmp(cs, ct, tcount); 323 } 324 325 /* 326 * long names are allocated separately from dentry and never modified. 327 * Refcounted, freeing is RCU-delayed. See take_dentry_name_snapshot() 328 * for the reason why ->count and ->head can't be combined into a union. 329 * dentry_string_cmp() relies upon ->name[] being word-aligned. 330 */ 331 struct external_name { 332 atomic_t count; 333 struct rcu_head head; 334 unsigned char name[] __aligned(sizeof(unsigned long)); 335 }; 336 337 static inline struct external_name *external_name(struct dentry *dentry) 338 { 339 return container_of(dentry->d_name.name, struct external_name, name[0]); 340 } 341 342 static void __d_free(struct rcu_head *head) 343 { 344 struct dentry *dentry = container_of(head, struct dentry, d_rcu); 345 346 kmem_cache_free(dentry_cache, dentry); 347 } 348 349 static void __d_free_external(struct rcu_head *head) 350 { 351 struct dentry *dentry = container_of(head, struct dentry, d_rcu); 352 kfree(external_name(dentry)); 353 kmem_cache_free(dentry_cache, dentry); 354 } 355 356 static inline int dname_external(const struct dentry *dentry) 357 { 358 return dentry->d_name.name != dentry->d_shortname.string; 359 } 360 361 void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) 362 { 363 unsigned seq; 364 const unsigned char *s; 365 366 rcu_read_lock(); 367 retry: 368 seq = read_seqcount_begin(&dentry->d_seq); 369 s = READ_ONCE(dentry->d_name.name); 370 name->name.hash_len = dentry->d_name.hash_len; 371 name->name.name = name->inline_name.string; 372 if (likely(s == dentry->d_shortname.string)) { 373 name->inline_name = dentry->d_shortname; 374 } else { 375 struct external_name *p; 376 p = container_of(s, struct external_name, name[0]); 377 // get a valid reference 378 if (unlikely(!atomic_inc_not_zero(&p->count))) 379 goto retry; 380 name->name.name = s; 381 } 382 if (read_seqcount_retry(&dentry->d_seq, seq)) { 383 release_dentry_name_snapshot(name); 384 goto retry; 385 } 386 rcu_read_unlock(); 387 } 388 EXPORT_SYMBOL(take_dentry_name_snapshot); 389 390 void release_dentry_name_snapshot(struct name_snapshot *name) 391 { 392 if (unlikely(name->name.name != name->inline_name.string)) { 393 struct external_name *p; 394 p = container_of(name->name.name, struct external_name, name[0]); 395 if (unlikely(atomic_dec_and_test(&p->count))) 396 kfree_rcu(p, head); 397 } 398 } 399 EXPORT_SYMBOL(release_dentry_name_snapshot); 400 401 static inline void __d_set_inode_and_type(struct dentry *dentry, 402 struct inode *inode, 403 unsigned type_flags) 404 { 405 unsigned flags; 406 407 dentry->d_inode = inode; 408 flags = READ_ONCE(dentry->d_flags); 409 flags &= ~DCACHE_ENTRY_TYPE; 410 flags |= type_flags; 411 smp_store_release(&dentry->d_flags, flags); 412 } 413 414 static inline void __d_clear_type_and_inode(struct dentry *dentry) 415 { 416 unsigned flags = READ_ONCE(dentry->d_flags); 417 418 flags &= ~DCACHE_ENTRY_TYPE; 419 WRITE_ONCE(dentry->d_flags, flags); 420 dentry->d_inode = NULL; 421 /* 422 * The negative counter only tracks dentries on the LRU. Don't inc if 423 * d_lru is on another list. 424 */ 425 if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) 426 this_cpu_inc(nr_dentry_negative); 427 } 428 429 #define DENTRY_WARN_ONCE(condition, dentry) \ 430 WARN_ONCE((condition), "dentry=%p d_flags=0x%x\n", (dentry), (dentry)->d_flags) 431 #define D_FLAG_VERIFY(dentry, x) \ 432 DENTRY_WARN_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x), (dentry)) 433 434 static void dentry_free(struct dentry *dentry) 435 { 436 DENTRY_WARN_ONCE(d_really_is_positive(dentry), dentry); 437 DENTRY_WARN_ONCE(dentry->d_lockref.count >= 0, dentry); 438 D_FLAG_VERIFY(dentry, 0); 439 if (unlikely(dname_external(dentry))) { 440 struct external_name *p = external_name(dentry); 441 if (likely(atomic_dec_and_test(&p->count))) { 442 call_rcu(&dentry->d_rcu, __d_free_external); 443 return; 444 } 445 } 446 /* if dentry was never visible to RCU, immediate free is OK */ 447 if (dentry->d_flags & DCACHE_NORCU) 448 __d_free(&dentry->d_rcu); 449 else 450 call_rcu(&dentry->d_rcu, __d_free); 451 } 452 453 /* 454 * Release the dentry's inode, using the filesystem 455 * d_iput() operation if defined. 456 */ 457 static void dentry_unlink_inode(struct dentry * dentry) 458 __releases(dentry->d_lock) 459 __releases(dentry->d_inode->i_lock) 460 { 461 struct inode *inode = dentry->d_inode; 462 463 raw_write_seqcount_begin(&dentry->d_seq); 464 __d_clear_type_and_inode(dentry); 465 __hlist_del(&dentry->d_alias); 466 /* 467 * dentry becomes negative, so the space occupied by ->d_alias 468 * belongs to ->waiters now. 469 */ 470 dentry->waiters = NULL; 471 raw_write_seqcount_end(&dentry->d_seq); 472 spin_unlock(&dentry->d_lock); 473 spin_unlock(&inode->i_lock); 474 if (!inode->i_nlink) 475 fsnotify_inoderemove(inode); 476 if (dentry->d_op && dentry->d_op->d_iput) 477 dentry->d_op->d_iput(dentry, inode); 478 else 479 iput(inode); 480 } 481 482 /* 483 * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry 484 * is in use - which includes both the "real" per-superblock 485 * LRU list _and_ the DCACHE_SHRINK_LIST use. 486 * 487 * The DCACHE_SHRINK_LIST bit is set whenever the dentry is 488 * on the shrink list (ie not on the superblock LRU list). 489 * 490 * The per-cpu "nr_dentry_unused" counters are updated with 491 * the DCACHE_LRU_LIST bit. 492 * 493 * The per-cpu "nr_dentry_negative" counters are only updated 494 * when deleted from or added to the per-superblock LRU list, not 495 * from/to the shrink list. That is to avoid an unneeded dec/inc 496 * pair when moving from LRU to shrink list in select_collect(). 497 * 498 * These helper functions make sure we always follow the 499 * rules. d_lock must be held by the caller. 500 */ 501 static void d_lru_add(struct dentry *dentry) 502 { 503 D_FLAG_VERIFY(dentry, 0); 504 dentry->d_flags |= DCACHE_LRU_LIST; 505 this_cpu_inc(nr_dentry_unused); 506 if (d_is_negative(dentry)) 507 this_cpu_inc(nr_dentry_negative); 508 WARN_ON_ONCE(!list_lru_add_obj( 509 &dentry->d_sb->s_dentry_lru, &dentry->d_lru)); 510 } 511 512 static void d_lru_del(struct dentry *dentry) 513 { 514 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); 515 dentry->d_flags &= ~DCACHE_LRU_LIST; 516 this_cpu_dec(nr_dentry_unused); 517 if (d_is_negative(dentry)) 518 this_cpu_dec(nr_dentry_negative); 519 WARN_ON_ONCE(!list_lru_del_obj( 520 &dentry->d_sb->s_dentry_lru, &dentry->d_lru)); 521 } 522 523 static void d_shrink_del(struct dentry *dentry) 524 { 525 D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); 526 list_del_init(&dentry->d_lru); 527 dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); 528 this_cpu_dec(nr_dentry_unused); 529 } 530 531 static void d_shrink_add(struct dentry *dentry, struct list_head *list) 532 { 533 D_FLAG_VERIFY(dentry, 0); 534 list_add(&dentry->d_lru, list); 535 dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST; 536 this_cpu_inc(nr_dentry_unused); 537 } 538 539 /* 540 * These can only be called under the global LRU lock, ie during the 541 * callback for freeing the LRU list. "isolate" removes it from the 542 * LRU lists entirely, while shrink_move moves it to the indicated 543 * private list. 544 */ 545 static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry) 546 { 547 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); 548 dentry->d_flags &= ~DCACHE_LRU_LIST; 549 this_cpu_dec(nr_dentry_unused); 550 if (d_is_negative(dentry)) 551 this_cpu_dec(nr_dentry_negative); 552 list_lru_isolate(lru, &dentry->d_lru); 553 } 554 555 static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry, 556 struct list_head *list) 557 { 558 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); 559 dentry->d_flags |= DCACHE_SHRINK_LIST; 560 if (d_is_negative(dentry)) 561 this_cpu_dec(nr_dentry_negative); 562 list_lru_isolate_move(lru, &dentry->d_lru, list); 563 } 564 565 static void ___d_drop(struct dentry *dentry) 566 { 567 struct hlist_bl_head *b = d_hash(dentry->d_name.hash); 568 569 hlist_bl_lock(b); 570 __hlist_bl_del(&dentry->d_hash); 571 hlist_bl_unlock(b); 572 } 573 574 void __d_drop(struct dentry *dentry) 575 { 576 if (!d_unhashed(dentry)) { 577 ___d_drop(dentry); 578 dentry->d_hash.pprev = NULL; 579 write_seqcount_invalidate(&dentry->d_seq); 580 } 581 } 582 EXPORT_SYMBOL(__d_drop); 583 584 /** 585 * d_drop - drop a dentry 586 * @dentry: dentry to drop 587 * 588 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't 589 * be found through a VFS lookup any more. Note that this is different from 590 * deleting the dentry - d_delete will try to mark the dentry negative if 591 * possible, giving a successful _negative_ lookup, while d_drop will 592 * just make the cache lookup fail. 593 * 594 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some 595 * reason (NFS timeouts or autofs deletes). 596 * 597 * __d_drop requires dentry->d_lock 598 * 599 * ___d_drop doesn't mark dentry as "unhashed" 600 * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). 601 */ 602 void d_drop(struct dentry *dentry) 603 { 604 spin_lock(&dentry->d_lock); 605 __d_drop(dentry); 606 spin_unlock(&dentry->d_lock); 607 } 608 EXPORT_SYMBOL(d_drop); 609 610 struct completion_list { 611 struct completion_list *next; 612 struct completion completion; 613 }; 614 615 /* 616 * shrink_dcache_tree() needs to be notified when dentry in process of 617 * being evicted finally gets unlisted. Such dentries are 618 * already with negative ->d_count 619 * already negative 620 * already not in in-lookup hash 621 * reachable only via ->d_sib. 622 * 623 * Use ->waiters for a single-linked list of struct completion_list of 624 * waiters. 625 */ 626 static inline bool d_add_waiter(struct dentry *dentry, struct completion_list *p) 627 { 628 if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) 629 return false; 630 init_completion(&p->completion); 631 p->next = dentry->waiters; 632 dentry->waiters = p; 633 return true; 634 } 635 636 static inline void d_complete_waiters(struct dentry *dentry) 637 { 638 struct completion_list *v = dentry->waiters; 639 if (unlikely(v)) { 640 /* some shrink_dcache_tree() instances are waiting */ 641 dentry->waiters = NULL; 642 while (v) { 643 struct completion *r = &v->completion; 644 v = v->next; 645 complete(r); 646 } 647 } 648 } 649 650 static void unlink_secondary_root(struct dentry *dentry) 651 { 652 spin_lock(&dentry->d_sb->s_roots_lock); 653 hlist_del_init(&dentry->d_sib); 654 spin_unlock(&dentry->d_sb->s_roots_lock); 655 } 656 657 static inline void dentry_unlist(struct dentry *dentry) 658 { 659 struct dentry *next; 660 /* 661 * Inform d_walk() and shrink_dentry_list() that we are no longer 662 * attached to the dentry tree 663 */ 664 dentry->d_flags |= DCACHE_DENTRY_KILLED; 665 d_complete_waiters(dentry); 666 if (unlikely(hlist_unhashed(&dentry->d_sib))) 667 return; 668 if (unlikely(IS_ROOT(dentry))) { 669 unlink_secondary_root(dentry); // secondary root goes away 670 return; 671 } 672 __hlist_del(&dentry->d_sib); 673 /* 674 * Cursors can move around the list of children. While we'd been 675 * a normal list member, it didn't matter - ->d_sib.next would've 676 * been updated. However, from now on it won't be and for the 677 * things like d_walk() it might end up with a nasty surprise. 678 * Normally d_walk() doesn't care about cursors moving around - 679 * ->d_lock on parent prevents that and since a cursor has no children 680 * of its own, we get through it without ever unlocking the parent. 681 * There is one exception, though - if we ascend from a child that 682 * gets killed as soon as we unlock it, the next sibling is found 683 * using the value left in its ->d_sib.next. And if _that_ 684 * pointed to a cursor, and cursor got moved (e.g. by lseek()) 685 * before d_walk() regains parent->d_lock, we'll end up skipping 686 * everything the cursor had been moved past. 687 * 688 * Solution: make sure that the pointer left behind in ->d_sib.next 689 * points to something that won't be moving around. I.e. skip the 690 * cursors. 691 */ 692 while (dentry->d_sib.next) { 693 next = hlist_entry(dentry->d_sib.next, struct dentry, d_sib); 694 if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) 695 break; 696 dentry->d_sib.next = next->d_sib.next; 697 } 698 } 699 700 /* 701 * Prepare locking environment for killing a dentry. 702 * Called under dentry->d_lock. To proceed with eviction of a positive dentry 703 * we need to get ->i_lock of the inode of that dentry as well. 704 * However, ->i_lock nests outside of ->d_lock, so if trylock fails we might 705 * have to drop and regain the latter. Dentry state can change while its 706 * ->d_lock is not held - it might end up getting killed, becoming busy, 707 * negative, etc., so we need to be careful. 708 * 709 * For NORCU dentries memory safety relies upon having only one call of 710 * lock_for_kill() in the entire lifetime of dentry and dentry_free() being 711 * called only by the caller of lock_for_kill(). That this is NORCU-specific; 712 * the crucial part is that refcounts of NORCU dentries never grow once having 713 * dropped to zero. 714 * 715 * For normal dentries we can not assume that there won't be concurrent calls 716 * of dentry_free() - dentry might end up being evicted by another thread 717 * while we are dropping/retaking locks on the slow path. Memory safety is 718 * provided by keeping the RCU read-side critical area contiguous with 719 * an explicit rcu_read_lock() scope bridging over the break in spinlock scopes. 720 * 721 * If dentry is busy (or busy dying, or already dead), unlock dentry 722 * and return false. Otherwise, return true and have that dentry's 723 * inode (if any) locked in addition to dentry itself. 724 */ 725 static bool lock_for_kill(struct dentry *dentry) 726 { 727 struct inode *inode = dentry->d_inode; 728 729 if (unlikely(dentry->d_lockref.count)) { 730 spin_unlock(&dentry->d_lock); 731 return false; 732 } 733 734 if (!inode || likely(spin_trylock(&inode->i_lock))) 735 return true; 736 737 // Too bad - we need to drop ->d_lock and take locks in correct order. 738 // To avoid breaking RCU read-side critical area when we drop ->d_lock, 739 // take an explicit rcu_read_lock() while we are switching locks. 740 rcu_read_lock(); 741 do { 742 spin_unlock(&dentry->d_lock); 743 spin_lock(&inode->i_lock); 744 spin_lock(&dentry->d_lock); 745 // make sure we'd locked the right inode - ->d_inode might've 746 // changed while we were not holding ->d_lock 747 if (likely(inode == dentry->d_inode)) 748 break; 749 spin_unlock(&inode->i_lock); 750 inode = dentry->d_inode; 751 } while (inode); 752 rcu_read_unlock(); 753 if (likely(!dentry->d_lockref.count)) 754 return true; 755 if (inode) 756 spin_unlock(&inode->i_lock); 757 spin_unlock(&dentry->d_lock); 758 return false; 759 } 760 761 /** 762 * dentry_kill - evict a dentry 763 * @dentry: dentry to be evicted 764 * 765 * All dentry evictions are done by this function. The reference we are 766 * passed does not contribute to the refcount; the caller had either 767 * already decremented the refcount or it had never held one in the 768 * first place. @dentry->d_lock is held by the caller and dropped 769 * by dentry_kill(@dentry). 770 * 771 * We are guaranteed that nobody had called dentry_free(@dentry) 772 * prior to the beginning of RCU read-side critical area we are in. 773 * 774 * Caller must not access @dentry after the call. 775 * 776 * If eviction of @dentry drops the last reference to its parent, 777 * the reference to parent is returned to caller. In that case 778 * it is guaranteed to satisfy the requirements for dentry_kill() 779 * argument - its ->d_lock is held and we are guaranteed that nobody 780 * had passed it to dentry_free() prior to acquisition of its ->d_lock. 781 * Otherwise %NULL is returned. 782 * 783 * If @dentry is idle and remains such after we assemble the full 784 * locking environment for eviction (see lock_for_kill() for details) 785 * we mark it doomed (->d_lockref.count < 0) and proceed to detaching 786 * it from any filesystem objects. Otherwise we drop ->d_lock and 787 * return %NULL. 788 * 789 * Once @dentry is detached from the filesystem objects, we complete 790 * detaching it from dentry tree. The parent, if any, gets locked 791 * and its refcount is decremented; dentry is carefully removed from 792 * the tree (see dentry_unlist() for details) and marked killed 793 * (%DCACHE_DENTRY_KILLED set in ->d_flags). At that point it's just 794 * an inert chunk of memory, accessible only via RCU references 795 * and possibly via a shrink list. If it is not on any shrink lists, 796 * we call dentry_free(), which schedules actual freeing of memory. 797 * Othewise freeing is left to the owner of the shrink list in question. 798 */ 799 static struct dentry *dentry_kill(struct dentry *dentry) 800 { 801 struct dentry *parent = NULL; 802 bool can_free = true; 803 804 if (unlikely(!lock_for_kill(dentry))) 805 return NULL; 806 807 /* 808 * The dentry is now unrecoverably dead to the world. 809 */ 810 lockref_mark_dead(&dentry->d_lockref); 811 812 /* 813 * inform the fs via d_prune that this dentry is about to be 814 * unhashed and destroyed. 815 */ 816 if (dentry->d_flags & DCACHE_OP_PRUNE) 817 dentry->d_op->d_prune(dentry); 818 819 if (dentry->d_flags & DCACHE_LRU_LIST) { 820 if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) 821 d_lru_del(dentry); 822 } 823 /* if it was on the hash then remove it */ 824 __d_drop(dentry); 825 if (dentry->d_inode) 826 dentry_unlink_inode(dentry); 827 else 828 spin_unlock(&dentry->d_lock); 829 this_cpu_dec(nr_dentry); 830 if (dentry->d_op && dentry->d_op->d_release) 831 dentry->d_op->d_release(dentry); 832 833 cond_resched(); 834 /* now that it's negative, ->d_parent is stable */ 835 if (!IS_ROOT(dentry)) { 836 parent = dentry->d_parent; 837 spin_lock(&parent->d_lock); 838 } 839 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 840 dentry_unlist(dentry); 841 if (dentry->d_flags & DCACHE_SHRINK_LIST) 842 can_free = false; 843 spin_unlock(&dentry->d_lock); 844 if (likely(can_free)) 845 dentry_free(dentry); 846 if (parent && --parent->d_lockref.count) { 847 spin_unlock(&parent->d_lock); 848 return NULL; 849 } 850 return parent; 851 } 852 853 /* 854 * Decide if dentry is worth retaining. Usually this is called with dentry 855 * locked; if not locked, we are more limited and might not be able to tell 856 * without a lock. False in this case means "punt to locked path and recheck". 857 * 858 * In case we aren't locked, these predicates are not "stable". However, it is 859 * sufficient that at some point after we dropped the reference the dentry was 860 * hashed and the flags had the proper value. Other dentry users may have 861 * re-gotten a reference to the dentry and change that, but our work is done - 862 * we can leave the dentry around with a zero refcount. 863 */ 864 static inline bool retain_dentry(struct dentry *dentry, bool locked) 865 { 866 unsigned int d_flags; 867 868 smp_rmb(); 869 d_flags = READ_ONCE(dentry->d_flags); 870 871 // Unreachable? Nobody would be able to look it up, no point retaining 872 if (unlikely(d_unhashed(dentry))) 873 return false; 874 875 // Same if it's disconnected 876 if (unlikely(d_flags & DCACHE_DISCONNECTED)) 877 return false; 878 879 // ->d_delete() might tell us not to bother, but that requires 880 // ->d_lock; can't decide without it 881 if (unlikely(d_flags & DCACHE_OP_DELETE)) { 882 if (!locked || dentry->d_op->d_delete(dentry)) 883 return false; 884 } 885 886 // Explicitly told not to bother 887 if (unlikely(d_flags & DCACHE_DONTCACHE)) 888 return false; 889 890 // At this point it looks like we ought to keep it. We also might 891 // need to do something - put it on LRU if it wasn't there already 892 // and mark it referenced if it was on LRU, but not marked yet. 893 // Unfortunately, both actions require ->d_lock, so in lockless 894 // case we'd have to punt rather than doing those. 895 if (unlikely(!(d_flags & DCACHE_LRU_LIST))) { 896 if (!locked) 897 return false; 898 d_lru_add(dentry); 899 } else if (unlikely(!(d_flags & DCACHE_REFERENCED))) { 900 if (!locked) 901 return false; 902 dentry->d_flags |= DCACHE_REFERENCED; 903 } 904 return true; 905 } 906 907 void d_mark_dontcache(struct inode *inode) 908 { 909 struct dentry *de; 910 911 spin_lock(&inode->i_lock); 912 for_each_alias(de, inode) { 913 spin_lock(&de->d_lock); 914 de->d_flags |= DCACHE_DONTCACHE; 915 spin_unlock(&de->d_lock); 916 } 917 inode_state_set(inode, I_DONTCACHE); 918 spin_unlock(&inode->i_lock); 919 } 920 EXPORT_SYMBOL(d_mark_dontcache); 921 922 /* 923 * Try to do a lockless dput(), and return whether that was successful. 924 * 925 * If unsuccessful, we return false, having already taken the dentry lock. 926 * In that case refcount is guaranteed to be zero and we have already 927 * decided that it's not worth keeping around. 928 */ 929 static inline bool fast_dput(struct dentry *dentry) 930 { 931 int ret; 932 933 /* 934 * Try to decrement the lockref optimistically. 935 * RCU read lock held so that dentry is guaranteed to stay around 936 * even if the refcount goes down to zero. 937 */ 938 rcu_read_lock(); 939 ret = lockref_put_return(&dentry->d_lockref); 940 941 /* 942 * If the lockref_put_return() failed due to the lock being held 943 * by somebody else, the fast path has failed. We will need to 944 * get the lock, and then check the count again. 945 */ 946 if (unlikely(ret < 0)) { 947 spin_lock(&dentry->d_lock); 948 rcu_read_unlock(); 949 if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) { 950 spin_unlock(&dentry->d_lock); 951 return true; 952 } 953 dentry->d_lockref.count--; 954 goto locked; 955 } 956 957 /* 958 * If we weren't the last ref, we're done. 959 */ 960 if (ret) { 961 rcu_read_unlock(); 962 return true; 963 } 964 965 /* 966 * Can we decide that decrement of refcount is all we needed without 967 * taking the lock? There's a very common case when it's all we need - 968 * dentry looks like it ought to be retained and there's nothing else 969 * to do. 970 */ 971 if (retain_dentry(dentry, false)) { 972 rcu_read_unlock(); 973 return true; 974 } 975 976 /* 977 * Either not worth retaining or we can't tell without the lock. 978 * Get the lock, then. We've already decremented the refcount to 0, 979 * but we'll need to re-check the situation after getting the lock. 980 */ 981 spin_lock(&dentry->d_lock); 982 rcu_read_unlock(); 983 984 /* 985 * Did somebody else grab a reference to it in the meantime, and 986 * we're no longer the last user after all? Alternatively, somebody 987 * else could have killed it and marked it dead. Either way, we 988 * don't need to do anything else. 989 */ 990 locked: 991 if (dentry->d_lockref.count || retain_dentry(dentry, true)) { 992 spin_unlock(&dentry->d_lock); 993 return true; 994 } 995 return false; 996 } 997 998 static void finish_dput(struct dentry *dentry) 999 __releases(dentry->d_lock) 1000 { 1001 while ((dentry = dentry_kill(dentry)) != NULL) { 1002 if (retain_dentry(dentry, true)) { 1003 spin_unlock(&dentry->d_lock); 1004 return; 1005 } 1006 } 1007 } 1008 1009 /* 1010 * This is dput 1011 * 1012 * This is complicated by the fact that we do not want to put 1013 * dentries that are no longer on any hash chain on the unused 1014 * list: we'd much rather just get rid of them immediately. 1015 * 1016 * However, that implies that we have to traverse the dentry 1017 * tree upwards to the parents which might _also_ now be 1018 * scheduled for deletion (it may have been only waiting for 1019 * its last child to go away). 1020 * 1021 * This tail recursion is done by hand as we don't want to depend 1022 * on the compiler to always get this right (gcc generally doesn't). 1023 * Real recursion would eat up our stack space. 1024 */ 1025 1026 /* 1027 * dput - release a dentry 1028 * @dentry: dentry to release 1029 * 1030 * Release a dentry. This will drop the usage count and if appropriate 1031 * call the dentry unlink method as well as removing it from the queues and 1032 * releasing its resources. If the parent dentries were scheduled for release 1033 * they too may now get deleted. 1034 */ 1035 void dput(struct dentry *dentry) 1036 { 1037 if (!dentry) 1038 return; 1039 might_sleep(); 1040 if (likely(fast_dput(dentry))) 1041 return; 1042 finish_dput(dentry); 1043 } 1044 EXPORT_SYMBOL(dput); 1045 1046 void d_make_discardable(struct dentry *dentry) 1047 { 1048 spin_lock(&dentry->d_lock); 1049 WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT)); 1050 dentry->d_flags &= ~DCACHE_PERSISTENT; 1051 dentry->d_lockref.count--; 1052 finish_dput(dentry); 1053 } 1054 EXPORT_SYMBOL(d_make_discardable); 1055 1056 /** 1057 * __move_to_shrink_list - try to place a dentry into a shrink list 1058 * @dentry: dentry to try putting into shrink list 1059 * @list: the list to put @dentry into. 1060 * Returns: true @dentry had been placed into @list, false otherwise 1061 * 1062 * If @dentry is idle and not already include into a shrink list, move 1063 * it into @list and return %true; otherwise do nothing and return %false. 1064 * 1065 * Caller must be holding @dentry->d_lock. There must have been no calls of 1066 * dentry_free(@dentry) prior to the beginning of the RCU read-side critical 1067 * area in which __move_to_shrink_list(@dentry, @list) is called. 1068 * 1069 * @list should be thread-private and eventually emptied by passing it to 1070 * shrink_dentry_list(). 1071 */ 1072 1073 bool __move_to_shrink_list(struct dentry *dentry, struct list_head *list) 1074 __must_hold(&dentry->d_lock) 1075 { 1076 if (likely(!dentry->d_lockref.count && 1077 !(dentry->d_flags & DCACHE_SHRINK_LIST))) { 1078 if (dentry->d_flags & DCACHE_LRU_LIST) 1079 d_lru_del(dentry); 1080 d_shrink_add(dentry, list); 1081 return true; 1082 } 1083 return false; 1084 } 1085 EXPORT_SYMBOL(__move_to_shrink_list); 1086 1087 void dput_to_list(struct dentry *dentry, struct list_head *list) 1088 { 1089 if (likely(fast_dput(dentry))) 1090 return; 1091 __move_to_shrink_list(dentry, list); 1092 spin_unlock(&dentry->d_lock); 1093 } 1094 1095 struct dentry *dget_parent(struct dentry *dentry) 1096 { 1097 int gotref; 1098 struct dentry *ret; 1099 unsigned seq; 1100 1101 /* 1102 * Do optimistic parent lookup without any 1103 * locking. 1104 */ 1105 rcu_read_lock(); 1106 seq = raw_seqcount_begin(&dentry->d_seq); 1107 ret = READ_ONCE(dentry->d_parent); 1108 gotref = lockref_get_not_zero(&ret->d_lockref); 1109 rcu_read_unlock(); 1110 if (likely(gotref)) { 1111 if (!read_seqcount_retry(&dentry->d_seq, seq)) 1112 return ret; 1113 dput(ret); 1114 } 1115 1116 repeat: 1117 /* 1118 * Don't need rcu_dereference because we re-check it was correct under 1119 * the lock. 1120 */ 1121 rcu_read_lock(); 1122 ret = dentry->d_parent; 1123 spin_lock(&ret->d_lock); 1124 if (unlikely(ret != dentry->d_parent)) { 1125 spin_unlock(&ret->d_lock); 1126 rcu_read_unlock(); 1127 goto repeat; 1128 } 1129 rcu_read_unlock(); 1130 BUG_ON(!ret->d_lockref.count); 1131 ret->d_lockref.count++; 1132 spin_unlock(&ret->d_lock); 1133 return ret; 1134 } 1135 EXPORT_SYMBOL(dget_parent); 1136 1137 /* 1138 * inode is a directory, inode->i_lock is held by the caller 1139 */ 1140 static struct dentry * __d_find_dir_alias(struct inode *inode) 1141 { 1142 struct dentry *alias; 1143 1144 if (hlist_empty(&inode->i_dentry)) 1145 return NULL; 1146 alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); 1147 lockref_get(&alias->d_lockref); 1148 return alias; 1149 } 1150 1151 static struct dentry * __d_find_any_alias(struct inode *inode) 1152 { 1153 struct dentry *alias; 1154 1155 if (hlist_empty(&inode->i_dentry)) 1156 return NULL; 1157 for_each_alias(alias, inode) 1158 if (dget_alias_ilocked(alias)) 1159 return alias; 1160 return NULL; 1161 } 1162 1163 /** 1164 * d_find_any_alias - find any alias for a given inode 1165 * @inode: inode to find an alias for 1166 * 1167 * If any aliases exist for the given inode, take and return a 1168 * reference for one of them. If no aliases exist, return %NULL. 1169 */ 1170 struct dentry *d_find_any_alias(struct inode *inode) 1171 { 1172 struct dentry *de; 1173 1174 spin_lock(&inode->i_lock); 1175 de = __d_find_any_alias(inode); 1176 spin_unlock(&inode->i_lock); 1177 return de; 1178 } 1179 EXPORT_SYMBOL(d_find_any_alias); 1180 1181 static struct dentry *__d_find_alias(struct inode *inode) 1182 { 1183 struct dentry *alias; 1184 1185 if (S_ISDIR(inode->i_mode)) 1186 return __d_find_dir_alias(inode); 1187 1188 for_each_alias(alias, inode) { 1189 spin_lock(&alias->d_lock); 1190 if (!d_unhashed(alias)) { 1191 dget_dlock(alias); 1192 spin_unlock(&alias->d_lock); 1193 return alias; 1194 } 1195 spin_unlock(&alias->d_lock); 1196 } 1197 return NULL; 1198 } 1199 1200 /** 1201 * d_find_alias - grab a hashed alias of inode 1202 * @inode: inode in question 1203 * 1204 * If inode has a hashed alias, or is a directory and has any alias, 1205 * acquire the reference to alias and return it. Otherwise return NULL. 1206 * Notice that if inode is a directory there can be only one alias and 1207 * it can be unhashed only if it has no children, or if it is the root 1208 * of a filesystem, or if the directory was renamed and d_revalidate 1209 * was the first vfs operation to notice. 1210 * 1211 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 1212 * any other hashed alias over that one. 1213 */ 1214 struct dentry *d_find_alias(struct inode *inode) 1215 { 1216 struct dentry *de = NULL; 1217 1218 if (!hlist_empty(&inode->i_dentry)) { 1219 spin_lock(&inode->i_lock); 1220 de = __d_find_alias(inode); 1221 spin_unlock(&inode->i_lock); 1222 } 1223 return de; 1224 } 1225 EXPORT_SYMBOL(d_find_alias); 1226 1227 /* 1228 * Caller MUST be holding rcu_read_lock() and be guaranteed 1229 * that inode won't get freed until rcu_read_unlock(). 1230 */ 1231 struct dentry *d_find_alias_rcu(struct inode *inode) 1232 { 1233 struct hlist_head *l = &inode->i_dentry; 1234 struct dentry *de = NULL; 1235 1236 spin_lock(&inode->i_lock); 1237 // ->i_dentry and ->i_rcu are colocated, but the latter won't be 1238 // used without having I_FREEING set, which means no aliases left 1239 if (likely(!(inode_state_read(inode) & I_FREEING) && !hlist_empty(l))) { 1240 if (S_ISDIR(inode->i_mode)) { 1241 de = hlist_entry(l->first, struct dentry, d_alias); 1242 } else { 1243 hlist_for_each_entry(de, l, d_alias) 1244 if (!d_unhashed(de)) 1245 break; 1246 } 1247 } 1248 spin_unlock(&inode->i_lock); 1249 return de; 1250 } 1251 1252 /* 1253 * Try to kill dentries associated with this inode. 1254 * WARNING: you must own a reference to inode. 1255 */ 1256 void d_prune_aliases(struct inode *inode) 1257 { 1258 LIST_HEAD(dispose); 1259 struct dentry *dentry; 1260 1261 spin_lock(&inode->i_lock); 1262 for_each_alias(dentry, inode) { 1263 spin_lock(&dentry->d_lock); 1264 if (likely(!(dentry->d_flags & DCACHE_NORCU))) 1265 __move_to_shrink_list(dentry, &dispose); 1266 spin_unlock(&dentry->d_lock); 1267 } 1268 spin_unlock(&inode->i_lock); 1269 shrink_dentry_list(&dispose); 1270 } 1271 EXPORT_SYMBOL(d_prune_aliases); 1272 1273 static inline void shrink_kill(struct dentry *victim) 1274 { 1275 while ((victim = dentry_kill(victim)) != NULL) 1276 ; 1277 } 1278 1279 void shrink_dentry_list(struct list_head *list) 1280 { 1281 while (!list_empty(list)) { 1282 struct dentry *dentry; 1283 1284 dentry = list_entry(list->prev, struct dentry, d_lru); 1285 spin_lock(&dentry->d_lock); 1286 d_shrink_del(dentry); 1287 if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { 1288 spin_unlock(&dentry->d_lock); 1289 dentry_free(dentry); 1290 continue; 1291 } 1292 shrink_kill(dentry); 1293 } 1294 } 1295 EXPORT_SYMBOL(shrink_dentry_list); 1296 1297 static enum lru_status dentry_lru_isolate(struct list_head *item, 1298 struct list_lru_one *lru, void *arg) 1299 { 1300 struct list_head *freeable = arg; 1301 struct dentry *dentry = container_of(item, struct dentry, d_lru); 1302 1303 1304 /* 1305 * we are inverting the lru lock/dentry->d_lock here, 1306 * so use a trylock. If we fail to get the lock, just skip 1307 * it 1308 */ 1309 if (!spin_trylock(&dentry->d_lock)) 1310 return LRU_SKIP; 1311 1312 /* 1313 * Referenced dentries are still in use. If they have active 1314 * counts, just remove them from the LRU. Otherwise give them 1315 * another pass through the LRU. 1316 */ 1317 if (dentry->d_lockref.count) { 1318 d_lru_isolate(lru, dentry); 1319 spin_unlock(&dentry->d_lock); 1320 return LRU_REMOVED; 1321 } 1322 1323 if (dentry->d_flags & DCACHE_REFERENCED) { 1324 dentry->d_flags &= ~DCACHE_REFERENCED; 1325 spin_unlock(&dentry->d_lock); 1326 1327 /* 1328 * The list move itself will be made by the common LRU code. At 1329 * this point, we've dropped the dentry->d_lock but keep the 1330 * lru lock. This is safe to do, since every list movement is 1331 * protected by the lru lock even if both locks are held. 1332 * 1333 * This is guaranteed by the fact that all LRU management 1334 * functions are intermediated by the LRU API calls like 1335 * list_lru_add_obj and list_lru_del_obj. List movement in this file 1336 * only ever occur through this functions or through callbacks 1337 * like this one, that are called from the LRU API. 1338 * 1339 * The only exceptions to this are functions like 1340 * shrink_dentry_list, and code that first checks for the 1341 * DCACHE_SHRINK_LIST flag. Those are guaranteed to be 1342 * operating only with stack provided lists after they are 1343 * properly isolated from the main list. It is thus, always a 1344 * local access. 1345 */ 1346 return LRU_ROTATE; 1347 } 1348 1349 d_lru_shrink_move(lru, dentry, freeable); 1350 spin_unlock(&dentry->d_lock); 1351 1352 return LRU_REMOVED; 1353 } 1354 1355 /** 1356 * prune_dcache_sb - shrink the dcache 1357 * @sb: superblock 1358 * @sc: shrink control, passed to list_lru_shrink_walk() 1359 * 1360 * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This 1361 * is done when we need more memory and called from the superblock shrinker 1362 * function. 1363 * 1364 * This function may fail to free any resources if all the dentries are in 1365 * use. 1366 */ 1367 long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc) 1368 { 1369 LIST_HEAD(dispose); 1370 long freed; 1371 1372 freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc, 1373 dentry_lru_isolate, &dispose); 1374 shrink_dentry_list(&dispose); 1375 return freed; 1376 } 1377 1378 static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, 1379 struct list_lru_one *lru, void *arg) 1380 { 1381 struct list_head *freeable = arg; 1382 struct dentry *dentry = container_of(item, struct dentry, d_lru); 1383 1384 /* 1385 * we are inverting the lru lock/dentry->d_lock here, 1386 * so use a trylock. If we fail to get the lock, just skip 1387 * it 1388 */ 1389 if (!spin_trylock(&dentry->d_lock)) 1390 return LRU_SKIP; 1391 1392 d_lru_shrink_move(lru, dentry, freeable); 1393 spin_unlock(&dentry->d_lock); 1394 1395 return LRU_REMOVED; 1396 } 1397 1398 1399 /** 1400 * shrink_dcache_sb - shrink dcache for a superblock 1401 * @sb: superblock 1402 * 1403 * Shrink the dcache for the specified super block. This is used to free 1404 * the dcache before unmounting a file system. 1405 */ 1406 void shrink_dcache_sb(struct super_block *sb) 1407 { 1408 do { 1409 LIST_HEAD(dispose); 1410 1411 list_lru_walk(&sb->s_dentry_lru, 1412 dentry_lru_isolate_shrink, &dispose, 1024); 1413 shrink_dentry_list(&dispose); 1414 } while (list_lru_count(&sb->s_dentry_lru) > 0); 1415 } 1416 EXPORT_SYMBOL(shrink_dcache_sb); 1417 1418 /** 1419 * enum d_walk_ret - action to take during tree walk 1420 * @D_WALK_CONTINUE: continue walk 1421 * @D_WALK_QUIT: quit walk 1422 * @D_WALK_NORETRY: quit when retry is needed 1423 * @D_WALK_SKIP: skip this dentry and its children 1424 */ 1425 enum d_walk_ret { 1426 D_WALK_CONTINUE, 1427 D_WALK_QUIT, 1428 D_WALK_NORETRY, 1429 D_WALK_SKIP, 1430 }; 1431 1432 /** 1433 * d_walk - walk the dentry tree 1434 * @parent: start of walk 1435 * @data: data passed to @enter() and @finish() 1436 * @enter: callback when first entering the dentry 1437 * 1438 * The @enter() callbacks are called with d_lock held. 1439 */ 1440 static void d_walk(struct dentry *parent, void *data, 1441 enum d_walk_ret (*enter)(void *, struct dentry *)) 1442 { 1443 struct dentry *this_parent, *dentry; 1444 unsigned seq = 0; 1445 enum d_walk_ret ret; 1446 bool retry = true; 1447 1448 again: 1449 read_seqbegin_or_lock(&rename_lock, &seq); 1450 this_parent = parent; 1451 spin_lock(&this_parent->d_lock); 1452 if (unlikely(this_parent->d_flags & DCACHE_DENTRY_CURSOR)) 1453 goto out_unlock; 1454 1455 ret = enter(data, this_parent); 1456 switch (ret) { 1457 case D_WALK_CONTINUE: 1458 break; 1459 case D_WALK_QUIT: 1460 case D_WALK_SKIP: 1461 goto out_unlock; 1462 case D_WALK_NORETRY: 1463 retry = false; 1464 break; 1465 } 1466 repeat: 1467 dentry = d_first_child(this_parent); 1468 resume: 1469 hlist_for_each_entry_from(dentry, d_sib) { 1470 if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) 1471 continue; 1472 1473 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 1474 1475 ret = enter(data, dentry); 1476 switch (ret) { 1477 case D_WALK_CONTINUE: 1478 break; 1479 case D_WALK_QUIT: 1480 spin_unlock(&dentry->d_lock); 1481 goto out_unlock; 1482 case D_WALK_NORETRY: 1483 retry = false; 1484 break; 1485 case D_WALK_SKIP: 1486 spin_unlock(&dentry->d_lock); 1487 continue; 1488 } 1489 1490 if (!hlist_empty(&dentry->d_children)) { 1491 spin_unlock(&this_parent->d_lock); 1492 spin_release(&dentry->d_lock.dep_map, _RET_IP_); 1493 this_parent = dentry; 1494 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); 1495 goto repeat; 1496 } 1497 spin_unlock(&dentry->d_lock); 1498 } 1499 /* 1500 * All done at this level ... ascend and resume the search. 1501 */ 1502 ascend: 1503 if (this_parent != parent) { 1504 dentry = this_parent; 1505 this_parent = dentry->d_parent; 1506 1507 rcu_read_lock(); 1508 spin_unlock(&dentry->d_lock); 1509 spin_lock(&this_parent->d_lock); 1510 rcu_read_unlock(); 1511 1512 /* might go back up the wrong parent if we have had a rename. */ 1513 if (need_seqretry(&rename_lock, seq)) 1514 goto rename_retry; 1515 /* go into the first sibling still alive */ 1516 hlist_for_each_entry_continue(dentry, d_sib) { 1517 if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) { 1518 goto resume; 1519 } 1520 } 1521 goto ascend; 1522 } 1523 if (need_seqretry(&rename_lock, seq)) 1524 goto rename_retry; 1525 1526 out_unlock: 1527 spin_unlock(&this_parent->d_lock); 1528 done_seqretry(&rename_lock, seq); 1529 return; 1530 1531 rename_retry: 1532 spin_unlock(&this_parent->d_lock); 1533 BUG_ON(seq & 1); 1534 if (!retry) 1535 return; 1536 seq = 1; 1537 goto again; 1538 } 1539 1540 struct check_mount { 1541 struct vfsmount *mnt; 1542 unsigned int mounted; 1543 }; 1544 1545 /* locks: mount_locked_reader && dentry->d_lock */ 1546 static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry) 1547 { 1548 struct check_mount *info = data; 1549 struct path path = { .mnt = info->mnt, .dentry = dentry }; 1550 1551 if (likely(!d_mountpoint(dentry))) 1552 return D_WALK_CONTINUE; 1553 if (__path_is_mountpoint(&path)) { 1554 info->mounted = 1; 1555 return D_WALK_QUIT; 1556 } 1557 return D_WALK_CONTINUE; 1558 } 1559 1560 /** 1561 * path_has_submounts - check for mounts over a dentry in the 1562 * current namespace. 1563 * @parent: path to check. 1564 * 1565 * Return true if the parent or its subdirectories contain 1566 * a mount point in the current namespace. 1567 */ 1568 int path_has_submounts(const struct path *parent) 1569 { 1570 struct check_mount data = { .mnt = parent->mnt, .mounted = 0 }; 1571 1572 guard(mount_locked_reader)(); 1573 d_walk(parent->dentry, &data, path_check_mount); 1574 1575 return data.mounted; 1576 } 1577 EXPORT_SYMBOL(path_has_submounts); 1578 1579 /* 1580 * Called by mount code to set a mountpoint and check if the mountpoint is 1581 * reachable (e.g. NFS can unhash a directory dentry and then the complete 1582 * subtree can become unreachable). 1583 * 1584 * Only one of d_invalidate() and d_set_mounted() must succeed. For 1585 * this reason take rename_lock and d_lock on dentry and ancestors. 1586 */ 1587 int d_set_mounted(struct dentry *dentry) 1588 { 1589 struct dentry *p; 1590 int ret = -ENOENT; 1591 read_seqlock_excl(&rename_lock); 1592 for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { 1593 /* Need exclusion wrt. d_invalidate() */ 1594 spin_lock(&p->d_lock); 1595 if (unlikely(d_unhashed(p))) { 1596 spin_unlock(&p->d_lock); 1597 goto out; 1598 } 1599 spin_unlock(&p->d_lock); 1600 } 1601 spin_lock(&dentry->d_lock); 1602 if (!d_unlinked(dentry)) { 1603 ret = -EBUSY; 1604 if (!d_mountpoint(dentry)) { 1605 dentry->d_flags |= DCACHE_MOUNTED; 1606 ret = 0; 1607 } 1608 } 1609 spin_unlock(&dentry->d_lock); 1610 out: 1611 read_sequnlock_excl(&rename_lock); 1612 return ret; 1613 } 1614 1615 /* 1616 * Search the dentry child list of the specified parent, 1617 * and move any unused dentries to the end of the unused 1618 * list for prune_dcache(). We descend to the next level 1619 * whenever the d_children list is non-empty and continue 1620 * searching. 1621 * 1622 * It returns zero iff there are no unused children, 1623 * otherwise it returns the number of children moved to 1624 * the end of the unused list. This may not be the total 1625 * number of unused children, because select_parent can 1626 * drop the lock and return early due to latency 1627 * constraints. 1628 */ 1629 1630 struct select_data { 1631 struct dentry *start; 1632 union { 1633 long found; 1634 struct dentry *victim; 1635 }; 1636 struct list_head dispose; 1637 }; 1638 1639 static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) 1640 { 1641 struct select_data *data = _data; 1642 enum d_walk_ret ret = D_WALK_CONTINUE; 1643 1644 if (data->start == dentry) 1645 goto out; 1646 1647 if (dentry->d_lockref.count <= 0) { 1648 __move_to_shrink_list(dentry, &data->dispose); 1649 data->found++; 1650 } 1651 /* 1652 * We can return to the caller if we have found some (this 1653 * ensures forward progress). We'll be coming back to find 1654 * the rest. 1655 */ 1656 if (!list_empty(&data->dispose)) 1657 ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; 1658 out: 1659 return ret; 1660 } 1661 1662 static enum d_walk_ret select_collect_umount(void *_data, struct dentry *dentry) 1663 { 1664 if (dentry->d_flags & DCACHE_PERSISTENT) { 1665 dentry->d_flags &= ~DCACHE_PERSISTENT; 1666 dentry->d_lockref.count--; 1667 } 1668 return select_collect(_data, dentry); 1669 } 1670 1671 static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) 1672 { 1673 struct select_data *data = _data; 1674 enum d_walk_ret ret = D_WALK_CONTINUE; 1675 1676 if (data->start == dentry) 1677 goto out; 1678 1679 if (dentry->d_lockref.count <= 0) { 1680 if (!__move_to_shrink_list(dentry, &data->dispose)) { 1681 /* 1682 * We need an enter RCU read-side critical area that 1683 * would extend past the return from d_walk() and 1684 * we are in the scope of ->d_lock that will terminate 1685 * before that, so we use rcu_read_lock() to bridge 1686 * over to the scope of ->d_lock in d_walk() caller. 1687 * The scope of rcu_read_lock() spans from here to 1688 * paired rcu_read_unlock() in shrink_dcache_tree(). 1689 */ 1690 rcu_read_lock(); 1691 data->victim = dentry; 1692 return D_WALK_QUIT; 1693 } 1694 } 1695 /* 1696 * We can return to the caller if we have found some (this 1697 * ensures forward progress). We'll be coming back to find 1698 * the rest. 1699 */ 1700 if (!list_empty(&data->dispose)) 1701 ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; 1702 out: 1703 return ret; 1704 } 1705 1706 /** 1707 * shrink_dcache_tree - prune dcache 1708 * @parent: parent of entries to prune 1709 * @for_umount: true if we want to unpin the persistent ones 1710 * 1711 * Prune the dcache to remove unused children of the parent dentry. 1712 */ 1713 static void shrink_dcache_tree(struct dentry *parent, bool for_umount) 1714 { 1715 for (;;) { 1716 struct completion_list wait; 1717 bool need_wait = false; 1718 struct select_data data = { .start = parent }; 1719 1720 INIT_LIST_HEAD(&data.dispose); 1721 d_walk(parent, &data, 1722 for_umount ? select_collect_umount : select_collect); 1723 1724 if (!list_empty(&data.dispose)) { 1725 shrink_dentry_list(&data.dispose); 1726 continue; 1727 } 1728 1729 cond_resched(); 1730 if (!data.found) 1731 break; 1732 data.victim = NULL; 1733 d_walk(parent, &data, select_collect2); 1734 if (data.victim) { 1735 struct dentry *v = data.victim; 1736 /* 1737 * select_collect2() has picked a dentry that was 1738 * either dying or on a shrink list and arranged 1739 * for it to be returned to us. We are still in 1740 * the RCU read-side critical area started there 1741 * (rcu_read_lock() scope opened in select_collect2()), 1742 * so dentry couldn't have been freed yet, but its 1743 * state might've changed since we dropped ->d_lock 1744 * on the way out. Switch over to ->d_lock scope 1745 * and recheck the dentry state. 1746 */ 1747 spin_lock(&v->d_lock); 1748 rcu_read_unlock(); 1749 1750 if (unlikely(v->d_lockref.count < 0)) { 1751 // It's doomed; if it isn't dead yet, notify us 1752 // once it becomes invisible to d_walk(). 1753 need_wait = d_add_waiter(v, &wait); 1754 spin_unlock(&v->d_lock); 1755 } else { 1756 shrink_kill(v); 1757 } 1758 } 1759 shrink_dentry_list(&data.dispose); 1760 if (unlikely(need_wait)) 1761 wait_for_completion(&wait.completion); 1762 } 1763 } 1764 1765 void shrink_dcache_parent(struct dentry *parent) 1766 { 1767 shrink_dcache_tree(parent, false); 1768 } 1769 EXPORT_SYMBOL(shrink_dcache_parent); 1770 1771 static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) 1772 { 1773 /* it has busy descendents; complain about those instead */ 1774 if (!hlist_empty(&dentry->d_children)) 1775 return D_WALK_CONTINUE; 1776 1777 /* root with refcount 1 is fine */ 1778 if (dentry == _data && dentry->d_lockref.count == 1) 1779 return D_WALK_CONTINUE; 1780 1781 WARN(1, "BUG: Dentry %p{i=%llx,n=%pd} " 1782 " still in use (%d) [unmount of %s %s]\n", 1783 dentry, 1784 dentry->d_inode ? 1785 dentry->d_inode->i_ino : (u64)0, 1786 dentry, 1787 dentry->d_lockref.count, 1788 dentry->d_sb->s_type->name, 1789 dentry->d_sb->s_id); 1790 return D_WALK_CONTINUE; 1791 } 1792 1793 static void do_one_tree(struct dentry *dentry) 1794 { 1795 shrink_dcache_tree(dentry, true); 1796 d_walk(dentry, dentry, umount_check); 1797 d_drop(dentry); 1798 dput(dentry); 1799 } 1800 1801 /* 1802 * destroy the dentries attached to a superblock on unmounting 1803 */ 1804 void shrink_dcache_for_umount(struct super_block *sb) 1805 { 1806 struct dentry *dentry; 1807 1808 rwsem_assert_held_write(&sb->s_umount); 1809 1810 dentry = sb->s_root; 1811 sb->s_root = NULL; 1812 do_one_tree(dentry); 1813 1814 for (;;) { 1815 spin_lock(&sb->s_roots_lock); 1816 dentry = hlist_entry_safe(sb->s_roots.first, 1817 struct dentry, d_sib); 1818 if (!dentry) { 1819 spin_unlock(&sb->s_roots_lock); 1820 break; 1821 } 1822 rcu_read_lock(); 1823 spin_unlock(&sb->s_roots_lock); 1824 spin_lock(&dentry->d_lock); 1825 rcu_read_unlock(); 1826 if (unlikely(dentry->d_lockref.count < 0)) { 1827 struct completion_list wait; 1828 bool need_wait = d_add_waiter(dentry, &wait); 1829 1830 spin_unlock(&dentry->d_lock); 1831 if (need_wait) 1832 wait_for_completion(&wait.completion); 1833 } else { 1834 dget_dlock(dentry); 1835 spin_unlock(&dentry->d_lock); 1836 do_one_tree(dentry); 1837 } 1838 } 1839 } 1840 1841 static enum d_walk_ret find_submount(void *_data, struct dentry *dentry) 1842 { 1843 struct dentry **victim = _data; 1844 if (d_mountpoint(dentry)) { 1845 *victim = dget_dlock(dentry); 1846 return D_WALK_QUIT; 1847 } 1848 return D_WALK_CONTINUE; 1849 } 1850 1851 /** 1852 * d_invalidate - detach submounts, prune dcache, and drop 1853 * @dentry: dentry to invalidate (aka detach, prune and drop) 1854 */ 1855 void d_invalidate(struct dentry *dentry) 1856 { 1857 bool had_submounts = false; 1858 spin_lock(&dentry->d_lock); 1859 if (d_unhashed(dentry)) { 1860 spin_unlock(&dentry->d_lock); 1861 return; 1862 } 1863 __d_drop(dentry); 1864 spin_unlock(&dentry->d_lock); 1865 1866 /* Negative dentries can be dropped without further checks */ 1867 if (!dentry->d_inode) 1868 return; 1869 1870 shrink_dcache_parent(dentry); 1871 for (;;) { 1872 struct dentry *victim = NULL; 1873 d_walk(dentry, &victim, find_submount); 1874 if (!victim) { 1875 if (had_submounts) 1876 shrink_dcache_parent(dentry); 1877 return; 1878 } 1879 had_submounts = true; 1880 detach_mounts(victim); 1881 dput(victim); 1882 } 1883 } 1884 EXPORT_SYMBOL(d_invalidate); 1885 1886 /** 1887 * __d_alloc - allocate a dcache entry 1888 * @sb: filesystem it will belong to 1889 * @name: qstr of the name 1890 * 1891 * Allocates a dentry. It returns %NULL if there is insufficient memory 1892 * available. On a success the dentry is returned. The name passed in is 1893 * copied and the copy passed in may be reused after this call. 1894 */ 1895 1896 static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) 1897 { 1898 struct dentry *dentry; 1899 char *dname; 1900 int err; 1901 1902 dentry = kmem_cache_alloc_lru(dentry_cache, &sb->s_dentry_lru, 1903 GFP_KERNEL); 1904 if (!dentry) 1905 return NULL; 1906 1907 /* 1908 * We guarantee that the inline name is always NUL-terminated. 1909 * This way the memcpy() done by the name switching in rename 1910 * will still always have a NUL at the end, even if we might 1911 * be overwriting an internal NUL character 1912 */ 1913 dentry->d_shortname.string[DNAME_INLINE_LEN-1] = 0; 1914 if (unlikely(!name)) { 1915 name = &slash_name; 1916 dname = dentry->d_shortname.string; 1917 } else if (name->len > DNAME_INLINE_LEN-1) { 1918 struct external_name *p; 1919 1920 p = kmalloc_flex(*p, name, name->len + 1, 1921 GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); 1922 if (!p) { 1923 kmem_cache_free(dentry_cache, dentry); 1924 return NULL; 1925 } 1926 atomic_set(&p->count, 1); 1927 dname = p->name; 1928 } else { 1929 dname = dentry->d_shortname.string; 1930 } 1931 1932 dentry->__d_name.len = name->len; 1933 dentry->__d_name.hash = name->hash; 1934 memcpy(dname, name->name, name->len); 1935 dname[name->len] = 0; 1936 1937 /* Make sure we always see the terminating NUL character */ 1938 smp_store_release(&dentry->__d_name.name, dname); /* ^^^ */ 1939 1940 dentry->d_flags = 0; 1941 lockref_init(&dentry->d_lockref); 1942 seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); 1943 dentry->d_inode = NULL; 1944 dentry->d_parent = dentry; 1945 dentry->d_sb = sb; 1946 dentry->d_op = sb->__s_d_op; 1947 dentry->d_flags = sb->s_d_flags; 1948 dentry->d_fsdata = NULL; 1949 INIT_HLIST_BL_NODE(&dentry->d_hash); 1950 INIT_LIST_HEAD(&dentry->d_lru); 1951 INIT_HLIST_HEAD(&dentry->d_children); 1952 dentry->waiters = NULL; 1953 INIT_HLIST_NODE(&dentry->d_sib); 1954 1955 if (dentry->d_op && dentry->d_op->d_init) { 1956 err = dentry->d_op->d_init(dentry); 1957 if (err) { 1958 if (dname_external(dentry)) 1959 kfree(external_name(dentry)); 1960 kmem_cache_free(dentry_cache, dentry); 1961 return NULL; 1962 } 1963 } 1964 1965 this_cpu_inc(nr_dentry); 1966 1967 return dentry; 1968 } 1969 1970 /** 1971 * d_alloc - allocate a dcache entry 1972 * @parent: parent of entry to allocate 1973 * @name: qstr of the name 1974 * 1975 * Allocates a dentry. It returns %NULL if there is insufficient memory 1976 * available. On a success the dentry is returned. The name passed in is 1977 * copied and the copy passed in may be reused after this call. 1978 */ 1979 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1980 { 1981 struct dentry *dentry = __d_alloc(parent->d_sb, name); 1982 if (!dentry) 1983 return NULL; 1984 spin_lock(&parent->d_lock); 1985 /* 1986 * don't need child lock because it is not subject 1987 * to concurrency here 1988 */ 1989 dentry->d_parent = dget_dlock(parent); 1990 hlist_add_head(&dentry->d_sib, &parent->d_children); 1991 spin_unlock(&parent->d_lock); 1992 1993 return dentry; 1994 } 1995 EXPORT_SYMBOL(d_alloc); 1996 1997 struct dentry *d_alloc_anon(struct super_block *sb) 1998 { 1999 return __d_alloc(sb, NULL); 2000 } 2001 EXPORT_SYMBOL(d_alloc_anon); 2002 2003 struct dentry *d_alloc_cursor(struct dentry * parent) 2004 { 2005 struct dentry *dentry = d_alloc_anon(parent->d_sb); 2006 if (dentry) { 2007 dentry->d_flags |= DCACHE_DENTRY_CURSOR | DCACHE_NORCU; 2008 dentry->d_parent = dget(parent); 2009 } 2010 return dentry; 2011 } 2012 2013 /** 2014 * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) 2015 * @sb: the superblock 2016 * @name: qstr of the name 2017 * 2018 * For a filesystem that just pins its dentries in memory and never 2019 * performs lookups at all, return an unhashed IS_ROOT dentry. 2020 * This is used for pipes, sockets et.al. - the stuff that should 2021 * never be anyone's children or parents. Unlike all other 2022 * dentries, these will not have RCU delay between dropping the 2023 * last reference and freeing them. 2024 * 2025 * The only user is alloc_file_pseudo() and that's what should 2026 * be considered a public interface. Don't use directly. 2027 */ 2028 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 2029 { 2030 static const struct dentry_operations anon_ops = { 2031 .d_dname = simple_dname 2032 }; 2033 struct dentry *dentry = __d_alloc(sb, name); 2034 if (likely(dentry)) { 2035 dentry->d_flags |= DCACHE_NORCU; 2036 /* d_op_flags(&anon_ops) is 0 */ 2037 if (!dentry->d_op) 2038 dentry->d_op = &anon_ops; 2039 } 2040 return dentry; 2041 } 2042 2043 struct dentry *d_alloc_name(struct dentry *parent, const char *name) 2044 { 2045 struct qstr q; 2046 2047 q.name = name; 2048 q.hash_len = hashlen_string(parent, name); 2049 return d_alloc(parent, &q); 2050 } 2051 EXPORT_SYMBOL(d_alloc_name); 2052 2053 #define DCACHE_OP_FLAGS \ 2054 (DCACHE_OP_HASH | DCACHE_OP_COMPARE | DCACHE_OP_REVALIDATE | \ 2055 DCACHE_OP_WEAK_REVALIDATE | DCACHE_OP_DELETE | DCACHE_OP_PRUNE | \ 2056 DCACHE_OP_REAL) 2057 2058 static unsigned int d_op_flags(const struct dentry_operations *op) 2059 { 2060 unsigned int flags = 0; 2061 if (op) { 2062 if (op->d_hash) 2063 flags |= DCACHE_OP_HASH; 2064 if (op->d_compare) 2065 flags |= DCACHE_OP_COMPARE; 2066 if (op->d_revalidate) 2067 flags |= DCACHE_OP_REVALIDATE; 2068 if (op->d_weak_revalidate) 2069 flags |= DCACHE_OP_WEAK_REVALIDATE; 2070 if (op->d_delete) 2071 flags |= DCACHE_OP_DELETE; 2072 if (op->d_prune) 2073 flags |= DCACHE_OP_PRUNE; 2074 if (op->d_real) 2075 flags |= DCACHE_OP_REAL; 2076 } 2077 return flags; 2078 } 2079 2080 static void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) 2081 { 2082 unsigned int flags = d_op_flags(op); 2083 WARN_ON_ONCE(dentry->d_op); 2084 WARN_ON_ONCE(dentry->d_flags & DCACHE_OP_FLAGS); 2085 dentry->d_op = op; 2086 if (flags) 2087 dentry->d_flags |= flags; 2088 } 2089 2090 void set_default_d_op(struct super_block *s, const struct dentry_operations *ops) 2091 { 2092 unsigned int flags = d_op_flags(ops); 2093 s->__s_d_op = ops; 2094 s->s_d_flags = (s->s_d_flags & ~DCACHE_OP_FLAGS) | flags; 2095 } 2096 EXPORT_SYMBOL(set_default_d_op); 2097 2098 static unsigned d_flags_for_inode(struct inode *inode) 2099 { 2100 unsigned add_flags = DCACHE_REGULAR_TYPE; 2101 2102 if (!inode) 2103 return DCACHE_MISS_TYPE; 2104 2105 if (S_ISDIR(inode->i_mode)) { 2106 add_flags = DCACHE_DIRECTORY_TYPE; 2107 if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { 2108 if (unlikely(!inode->i_op->lookup)) 2109 add_flags = DCACHE_AUTODIR_TYPE; 2110 else 2111 inode->i_opflags |= IOP_LOOKUP; 2112 } 2113 goto type_determined; 2114 } 2115 2116 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { 2117 if (unlikely(inode->i_op->get_link)) { 2118 add_flags = DCACHE_SYMLINK_TYPE; 2119 goto type_determined; 2120 } 2121 inode->i_opflags |= IOP_NOFOLLOW; 2122 } 2123 2124 if (unlikely(!S_ISREG(inode->i_mode))) 2125 add_flags = DCACHE_SPECIAL_TYPE; 2126 2127 type_determined: 2128 if (unlikely(IS_AUTOMOUNT(inode))) 2129 add_flags |= DCACHE_NEED_AUTOMOUNT; 2130 return add_flags; 2131 } 2132 2133 static void __d_instantiate(struct dentry *dentry, struct inode *inode) 2134 { 2135 unsigned add_flags = d_flags_for_inode(inode); 2136 WARN_ON(d_in_lookup(dentry)); 2137 2138 /* 2139 * The negative counter only tracks dentries on the LRU. Don't dec if 2140 * d_lru is on another list. 2141 */ 2142 if ((dentry->d_flags & 2143 (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) 2144 this_cpu_dec(nr_dentry_negative); 2145 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 2146 raw_write_seqcount_begin(&dentry->d_seq); 2147 __d_set_inode_and_type(dentry, inode, add_flags); 2148 raw_write_seqcount_end(&dentry->d_seq); 2149 fsnotify_update_flags(dentry); 2150 } 2151 2152 /** 2153 * d_instantiate - fill in inode information for a dentry 2154 * @entry: dentry to complete 2155 * @inode: inode to attach to this dentry 2156 * 2157 * Fill in inode information in the entry. 2158 * 2159 * This turns negative dentries into productive full members 2160 * of society. 2161 * 2162 * NOTE! This assumes that the inode count has been incremented 2163 * (or otherwise set) by the caller to indicate that it is now 2164 * in use by the dcache. 2165 */ 2166 2167 void d_instantiate(struct dentry *entry, struct inode * inode) 2168 { 2169 BUG_ON(d_really_is_positive(entry)); 2170 if (inode) { 2171 security_d_instantiate(entry, inode); 2172 spin_lock(&inode->i_lock); 2173 spin_lock(&entry->d_lock); 2174 __d_instantiate(entry, inode); 2175 spin_unlock(&entry->d_lock); 2176 spin_unlock(&inode->i_lock); 2177 } 2178 } 2179 EXPORT_SYMBOL(d_instantiate); 2180 2181 /* 2182 * This should be equivalent to d_instantiate() + unlock_new_inode(), 2183 * with lockdep-related part of unlock_new_inode() done before 2184 * anything else. Use that instead of open-coding d_instantiate()/ 2185 * unlock_new_inode() combinations. 2186 */ 2187 void d_instantiate_new(struct dentry *entry, struct inode *inode) 2188 { 2189 BUG_ON(d_really_is_positive(entry)); 2190 BUG_ON(!inode); 2191 lockdep_annotate_inode_mutex_key(inode); 2192 security_d_instantiate(entry, inode); 2193 spin_lock(&inode->i_lock); 2194 spin_lock(&entry->d_lock); 2195 __d_instantiate(entry, inode); 2196 spin_unlock(&entry->d_lock); 2197 WARN_ON(!(inode_state_read(inode) & I_NEW)); 2198 /* 2199 * Paired with igrab_from_hash() 2200 */ 2201 smp_wmb(); 2202 inode_state_clear(inode, I_NEW | I_CREATING); 2203 inode_wake_up_bit(inode, __I_NEW); 2204 spin_unlock(&inode->i_lock); 2205 } 2206 EXPORT_SYMBOL(d_instantiate_new); 2207 2208 struct dentry *d_make_root(struct inode *root_inode) 2209 { 2210 struct dentry *res = NULL; 2211 2212 if (root_inode) { 2213 res = d_alloc_anon(root_inode->i_sb); 2214 if (res) 2215 d_instantiate(res, root_inode); 2216 else 2217 iput(root_inode); 2218 } 2219 return res; 2220 } 2221 EXPORT_SYMBOL(d_make_root); 2222 2223 static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) 2224 { 2225 struct super_block *sb; 2226 struct dentry *new, *res; 2227 2228 if (!inode) 2229 return ERR_PTR(-ESTALE); 2230 if (IS_ERR(inode)) 2231 return ERR_CAST(inode); 2232 2233 sb = inode->i_sb; 2234 2235 res = d_find_any_alias(inode); /* existing alias? */ 2236 if (res) 2237 goto out; 2238 2239 new = d_alloc_anon(sb); 2240 if (!new) { 2241 res = ERR_PTR(-ENOMEM); 2242 goto out; 2243 } 2244 2245 security_d_instantiate(new, inode); 2246 spin_lock(&inode->i_lock); 2247 res = __d_find_any_alias(inode); /* recheck under lock */ 2248 if (likely(!res)) { /* still no alias, attach a disconnected dentry */ 2249 unsigned add_flags = d_flags_for_inode(inode); 2250 2251 if (disconnected) 2252 add_flags |= DCACHE_DISCONNECTED; 2253 2254 spin_lock(&new->d_lock); 2255 __d_set_inode_and_type(new, inode, add_flags); 2256 hlist_add_head(&new->d_alias, &inode->i_dentry); 2257 if (!disconnected) { 2258 spin_lock(&sb->s_roots_lock); 2259 hlist_add_head(&new->d_sib, &sb->s_roots); 2260 spin_unlock(&sb->s_roots_lock); 2261 } 2262 spin_unlock(&new->d_lock); 2263 spin_unlock(&inode->i_lock); 2264 inode = NULL; /* consumed by new->d_inode */ 2265 res = new; 2266 } else { 2267 spin_unlock(&inode->i_lock); 2268 dput(new); 2269 } 2270 2271 out: 2272 iput(inode); 2273 return res; 2274 } 2275 2276 /** 2277 * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode 2278 * @inode: inode to allocate the dentry for 2279 * 2280 * Obtain a dentry for an inode resulting from NFS filehandle conversion or 2281 * similar open by handle operations. The returned dentry may be anonymous, 2282 * or may have a full name (if the inode was already in the cache). 2283 * 2284 * When called on a directory inode, we must ensure that the inode only ever 2285 * has one dentry. If a dentry is found, that is returned instead of 2286 * allocating a new one. 2287 * 2288 * On successful return, the reference to the inode has been transferred 2289 * to the dentry. In case of an error the reference on the inode is released. 2290 * To make it easier to use in export operations a %NULL or IS_ERR inode may 2291 * be passed in and the error will be propagated to the return value, 2292 * with a %NULL @inode replaced by ERR_PTR(-ESTALE). 2293 */ 2294 struct dentry *d_obtain_alias(struct inode *inode) 2295 { 2296 return __d_obtain_alias(inode, true); 2297 } 2298 EXPORT_SYMBOL(d_obtain_alias); 2299 2300 /** 2301 * d_obtain_root - find or allocate a dentry for a given inode 2302 * @inode: inode to allocate the dentry for 2303 * 2304 * Obtain an IS_ROOT dentry for the root of a filesystem. 2305 * 2306 * We must ensure that directory inodes only ever have one dentry. If a 2307 * dentry is found, that is returned instead of allocating a new one. 2308 * 2309 * On successful return, the reference to the inode has been transferred 2310 * to the dentry. In case of an error the reference on the inode is 2311 * released. A %NULL or IS_ERR inode may be passed in and will be the 2312 * error will be propagate to the return value, with a %NULL @inode 2313 * replaced by ERR_PTR(-ESTALE). 2314 */ 2315 struct dentry *d_obtain_root(struct inode *inode) 2316 { 2317 return __d_obtain_alias(inode, false); 2318 } 2319 EXPORT_SYMBOL(d_obtain_root); 2320 2321 /** 2322 * d_add_ci - lookup or allocate new dentry with case-exact name 2323 * @dentry: the negative dentry that was passed to the parent's lookup func 2324 * @inode: the inode case-insensitive lookup has found 2325 * @name: the case-exact name to be associated with the returned dentry 2326 * 2327 * This is to avoid filling the dcache with case-insensitive names to the 2328 * same inode, only the actual correct case is stored in the dcache for 2329 * case-insensitive filesystems. 2330 * 2331 * For a case-insensitive lookup match and if the case-exact dentry 2332 * already exists in the dcache, use it and return it. 2333 * 2334 * If no entry exists with the exact case name, allocate new dentry with 2335 * the exact case, and return the spliced entry. 2336 */ 2337 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 2338 struct qstr *name) 2339 { 2340 struct dentry *found, *res; 2341 2342 /* 2343 * First check if a dentry matching the name already exists, 2344 * if not go ahead and create it now. 2345 */ 2346 found = d_hash_and_lookup(dentry->d_parent, name); 2347 if (found) { 2348 iput(inode); 2349 return found; 2350 } 2351 if (d_in_lookup(dentry)) { 2352 found = d_alloc_parallel(dentry->d_parent, name); 2353 if (IS_ERR(found) || !d_in_lookup(found)) { 2354 iput(inode); 2355 return found; 2356 } 2357 } else { 2358 found = d_alloc(dentry->d_parent, name); 2359 if (!found) { 2360 iput(inode); 2361 return ERR_PTR(-ENOMEM); 2362 } 2363 } 2364 res = d_splice_alias(inode, found); 2365 if (res) { 2366 d_lookup_done(found); 2367 dput(found); 2368 return res; 2369 } 2370 return found; 2371 } 2372 EXPORT_SYMBOL(d_add_ci); 2373 2374 /** 2375 * d_same_name - compare dentry name with case-exact name 2376 * @dentry: the negative dentry that was passed to the parent's lookup func 2377 * @parent: parent dentry 2378 * @name: the case-exact name to be associated with the returned dentry 2379 * 2380 * Return: true if names are same, or false 2381 */ 2382 bool d_same_name(const struct dentry *dentry, const struct dentry *parent, 2383 const struct qstr *name) 2384 { 2385 if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) { 2386 if (dentry->d_name.len != name->len) 2387 return false; 2388 return dentry_cmp(dentry, name->name, name->len) == 0; 2389 } 2390 return parent->d_op->d_compare(dentry, 2391 dentry->d_name.len, dentry->d_name.name, 2392 name) == 0; 2393 } 2394 EXPORT_SYMBOL_GPL(d_same_name); 2395 2396 /* 2397 * This is __d_lookup_rcu() when the parent dentry has 2398 * DCACHE_OP_COMPARE, which makes things much nastier. 2399 */ 2400 static noinline struct dentry *__d_lookup_rcu_op_compare( 2401 const struct dentry *parent, 2402 const struct qstr *name, 2403 unsigned *seqp) 2404 { 2405 u64 hashlen = name->hash_len; 2406 struct hlist_bl_head *b = d_hash(hashlen); 2407 struct hlist_bl_node *node; 2408 struct dentry *dentry; 2409 2410 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 2411 int tlen; 2412 const char *tname; 2413 unsigned seq; 2414 2415 seqretry: 2416 seq = raw_seqcount_begin(&dentry->d_seq); 2417 if (dentry->d_parent != parent) 2418 continue; 2419 if (d_unhashed(dentry)) 2420 continue; 2421 if (dentry->d_name.hash != hashlen_hash(hashlen)) 2422 continue; 2423 tlen = dentry->d_name.len; 2424 tname = dentry->d_name.name; 2425 /* we want a consistent (name,len) pair */ 2426 if (read_seqcount_retry(&dentry->d_seq, seq)) { 2427 cpu_relax(); 2428 goto seqretry; 2429 } 2430 if (parent->d_op->d_compare(dentry, tlen, tname, name) != 0) 2431 continue; 2432 *seqp = seq; 2433 return dentry; 2434 } 2435 return NULL; 2436 } 2437 2438 /** 2439 * __d_lookup_rcu - search for a dentry (racy, store-free) 2440 * @parent: parent dentry 2441 * @name: qstr of name we wish to find 2442 * @seqp: returns d_seq value at the point where the dentry was found 2443 * Returns: dentry, or NULL 2444 * 2445 * __d_lookup_rcu is the dcache lookup function for rcu-walk name 2446 * resolution (store-free path walking) design described in 2447 * Documentation/filesystems/path-lookup.txt. 2448 * 2449 * This is not to be used outside core vfs. 2450 * 2451 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock 2452 * held, and rcu_read_lock held. The returned dentry must not be stored into 2453 * without taking d_lock and checking d_seq sequence count against @seq 2454 * returned here. 2455 * 2456 * Alternatively, __d_lookup_rcu may be called again to look up the child of 2457 * the returned dentry, so long as its parent's seqlock is checked after the 2458 * child is looked up. Thus, an interlocking stepping of sequence lock checks 2459 * is formed, giving integrity down the path walk. 2460 * 2461 * NOTE! The caller *has* to check the resulting dentry against the sequence 2462 * number we've returned before using any of the resulting dentry state! 2463 */ 2464 struct dentry *__d_lookup_rcu(const struct dentry *parent, 2465 const struct qstr *name, 2466 unsigned *seqp) 2467 { 2468 u64 hashlen = name->hash_len; 2469 const unsigned char *str = name->name; 2470 struct hlist_bl_head *b = d_hash(hashlen); 2471 struct hlist_bl_node *node; 2472 struct dentry *dentry; 2473 2474 /* 2475 * Note: There is significant duplication with __d_lookup_rcu which is 2476 * required to prevent single threaded performance regressions 2477 * especially on architectures where smp_rmb (in seqcounts) are costly. 2478 * Keep the two functions in sync. 2479 */ 2480 2481 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) 2482 return __d_lookup_rcu_op_compare(parent, name, seqp); 2483 2484 /* 2485 * The hash list is protected using RCU. 2486 * 2487 * Carefully use d_seq when comparing a candidate dentry, to avoid 2488 * races with d_move(). 2489 * 2490 * It is possible that concurrent renames can mess up our list 2491 * walk here and result in missing our dentry, resulting in the 2492 * false-negative result. d_lookup() protects against concurrent 2493 * renames using rename_lock seqlock. 2494 * 2495 * See Documentation/filesystems/path-lookup.txt for more details. 2496 */ 2497 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 2498 unsigned seq; 2499 2500 /* 2501 * The dentry sequence count protects us from concurrent 2502 * renames, and thus protects parent and name fields. 2503 * 2504 * The caller must perform a seqcount check in order 2505 * to do anything useful with the returned dentry. 2506 * 2507 * NOTE! We do a "raw" seqcount_begin here. That means that 2508 * we don't wait for the sequence count to stabilize if it 2509 * is in the middle of a sequence change. If we do the slow 2510 * dentry compare, we will do seqretries until it is stable, 2511 * and if we end up with a successful lookup, we actually 2512 * want to exit RCU lookup anyway. 2513 * 2514 * Note that raw_seqcount_begin still *does* smp_rmb(), so 2515 * we are still guaranteed NUL-termination of ->d_name.name. 2516 */ 2517 seq = raw_seqcount_begin(&dentry->d_seq); 2518 if (dentry->d_parent != parent) 2519 continue; 2520 if (dentry->d_name.hash_len != hashlen) 2521 continue; 2522 if (unlikely(dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)) 2523 continue; 2524 /* 2525 * Check for the dentry being unhashed. 2526 * 2527 * As tempting as it is, we *can't* skip it because of a race window 2528 * between us finding the dentry before it gets unhashed and loading 2529 * the sequence counter after unhashing is finished. 2530 * 2531 * We can at least predict on it. 2532 */ 2533 if (unlikely(d_unhashed(dentry))) 2534 continue; 2535 *seqp = seq; 2536 return dentry; 2537 } 2538 return NULL; 2539 } 2540 2541 /** 2542 * d_lookup - search for a dentry 2543 * @parent: parent dentry 2544 * @name: qstr of name we wish to find 2545 * Returns: dentry, or NULL 2546 * 2547 * d_lookup searches the children of the parent dentry for the name in 2548 * question. If the dentry is found its reference count is incremented and the 2549 * dentry is returned. The caller must use dput to free the entry when it has 2550 * finished using it. %NULL is returned if the dentry does not exist. 2551 */ 2552 struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) 2553 { 2554 struct dentry *dentry; 2555 unsigned seq; 2556 2557 do { 2558 seq = read_seqbegin(&rename_lock); 2559 dentry = __d_lookup(parent, name); 2560 if (dentry) 2561 break; 2562 } while (read_seqretry(&rename_lock, seq)); 2563 return dentry; 2564 } 2565 EXPORT_SYMBOL(d_lookup); 2566 2567 /** 2568 * __d_lookup - search for a dentry (racy) 2569 * @parent: parent dentry 2570 * @name: qstr of name we wish to find 2571 * Returns: dentry, or NULL 2572 * 2573 * __d_lookup is like d_lookup, however it may (rarely) return a 2574 * false-negative result due to unrelated rename activity. 2575 * 2576 * __d_lookup is slightly faster by avoiding rename_lock read seqlock, 2577 * however it must be used carefully, eg. with a following d_lookup in 2578 * the case of failure. 2579 * 2580 * __d_lookup callers must be commented. 2581 */ 2582 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) 2583 { 2584 unsigned int hash = name->hash; 2585 struct hlist_bl_head *b = d_hash(hash); 2586 struct hlist_bl_node *node; 2587 struct dentry *found = NULL; 2588 struct dentry *dentry; 2589 2590 /* 2591 * Note: There is significant duplication with __d_lookup_rcu which is 2592 * required to prevent single threaded performance regressions 2593 * especially on architectures where smp_rmb (in seqcounts) are costly. 2594 * Keep the two functions in sync. 2595 */ 2596 2597 /* 2598 * The hash list is protected using RCU. 2599 * 2600 * Take d_lock when comparing a candidate dentry, to avoid races 2601 * with d_move(). 2602 * 2603 * It is possible that concurrent renames can mess up our list 2604 * walk here and result in missing our dentry, resulting in the 2605 * false-negative result. d_lookup() protects against concurrent 2606 * renames using rename_lock seqlock. 2607 * 2608 * See Documentation/filesystems/path-lookup.txt for more details. 2609 */ 2610 rcu_read_lock(); 2611 2612 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 2613 2614 if (dentry->d_name.hash != hash) 2615 continue; 2616 2617 spin_lock(&dentry->d_lock); 2618 if (dentry->d_parent != parent) 2619 goto next; 2620 if (d_unhashed(dentry)) 2621 goto next; 2622 2623 if (!d_same_name(dentry, parent, name)) 2624 goto next; 2625 2626 dentry->d_lockref.count++; 2627 found = dentry; 2628 spin_unlock(&dentry->d_lock); 2629 break; 2630 next: 2631 spin_unlock(&dentry->d_lock); 2632 } 2633 rcu_read_unlock(); 2634 2635 return found; 2636 } 2637 2638 /** 2639 * d_hash_and_lookup - hash the qstr then search for a dentry 2640 * @dir: Directory to search in 2641 * @name: qstr of name we wish to find 2642 * 2643 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error) 2644 */ 2645 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 2646 { 2647 /* 2648 * Check for a fs-specific hash function. Note that we must 2649 * calculate the standard hash first, as the d_op->d_hash() 2650 * routine may choose to leave the hash value unchanged. 2651 */ 2652 name->hash = full_name_hash(dir, name->name, name->len); 2653 if (dir->d_flags & DCACHE_OP_HASH) { 2654 int err = dir->d_op->d_hash(dir, name); 2655 if (unlikely(err < 0)) 2656 return ERR_PTR(err); 2657 } 2658 return d_lookup(dir, name); 2659 } 2660 2661 /* 2662 * When a file is deleted, we have two options: 2663 * - turn this dentry into a negative dentry 2664 * - unhash this dentry and free it. 2665 * 2666 * Usually, we want to just turn this into 2667 * a negative dentry, but if anybody else is 2668 * currently using the dentry or the inode 2669 * we can't do that and we fall back on removing 2670 * it from the hash queues and waiting for 2671 * it to be deleted later when it has no users 2672 */ 2673 2674 /** 2675 * d_delete - delete a dentry 2676 * @dentry: The dentry to delete 2677 * 2678 * Turn the dentry into a negative dentry if possible, otherwise 2679 * remove it from the hash queues so it can be deleted later 2680 */ 2681 2682 void d_delete(struct dentry * dentry) 2683 { 2684 struct inode *inode = dentry->d_inode; 2685 2686 spin_lock(&inode->i_lock); 2687 spin_lock(&dentry->d_lock); 2688 /* 2689 * Are we the only user? 2690 */ 2691 if (dentry->d_lockref.count == 1) { 2692 if (dentry_negative_policy) 2693 __d_drop(dentry); 2694 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2695 dentry_unlink_inode(dentry); 2696 } else { 2697 __d_drop(dentry); 2698 spin_unlock(&dentry->d_lock); 2699 spin_unlock(&inode->i_lock); 2700 } 2701 } 2702 EXPORT_SYMBOL(d_delete); 2703 2704 static void __d_rehash(struct dentry *entry) 2705 { 2706 struct hlist_bl_head *b = d_hash(entry->d_name.hash); 2707 2708 hlist_bl_lock(b); 2709 hlist_bl_add_head_rcu(&entry->d_hash, b); 2710 hlist_bl_unlock(b); 2711 } 2712 2713 /** 2714 * d_rehash - add an entry back to the hash 2715 * @entry: dentry to add to the hash 2716 * 2717 * Adds a dentry to the hash according to its name. 2718 */ 2719 2720 void d_rehash(struct dentry * entry) 2721 { 2722 spin_lock(&entry->d_lock); 2723 __d_rehash(entry); 2724 spin_unlock(&entry->d_lock); 2725 } 2726 EXPORT_SYMBOL(d_rehash); 2727 2728 static inline unsigned start_dir_add(struct inode *dir) 2729 { 2730 preempt_disable_nested(); 2731 for (;;) { 2732 unsigned n = READ_ONCE(dir->i_dir_seq); 2733 if (!(n & 1) && try_cmpxchg(&dir->i_dir_seq, &n, n + 1)) 2734 return n; 2735 cpu_relax(); 2736 } 2737 } 2738 2739 static inline void end_dir_add(struct inode *dir, unsigned int n) 2740 { 2741 smp_store_release(&dir->i_dir_seq, n + 2); 2742 preempt_enable_nested(); 2743 } 2744 2745 static void d_wait_lookup(struct dentry *dentry) 2746 { 2747 if (likely(d_in_lookup(dentry))) { 2748 dentry->d_flags |= DCACHE_LOOKUP_WAITERS; 2749 wait_var_event_spinlock(&dentry->d_flags, 2750 !d_in_lookup(dentry), 2751 &dentry->d_lock); 2752 } 2753 } 2754 2755 struct dentry *d_alloc_parallel(struct dentry *parent, 2756 const struct qstr *name) 2757 { 2758 unsigned int hash = name->hash; 2759 struct hlist_bl_head *b = in_lookup_hash(parent, hash); 2760 struct hlist_bl_node *node; 2761 struct dentry *new = __d_alloc(parent->d_sb, name); 2762 struct dentry *dentry; 2763 unsigned seq, r_seq, d_seq; 2764 2765 if (unlikely(!new)) 2766 return ERR_PTR(-ENOMEM); 2767 2768 new->d_flags |= DCACHE_PAR_LOOKUP; 2769 spin_lock(&parent->d_lock); 2770 new->d_parent = dget_dlock(parent); 2771 hlist_add_head(&new->d_sib, &parent->d_children); 2772 if (parent->d_flags & DCACHE_DISCONNECTED) 2773 new->d_flags |= DCACHE_DISCONNECTED; 2774 spin_unlock(&parent->d_lock); 2775 2776 retry: 2777 seq = smp_load_acquire(&parent->d_inode->i_dir_seq); 2778 r_seq = read_seqbegin(&rename_lock); 2779 rcu_read_lock(); 2780 dentry = __d_lookup_rcu(parent, name, &d_seq); 2781 if (unlikely(dentry)) { 2782 if (!lockref_get_not_dead(&dentry->d_lockref)) { 2783 rcu_read_unlock(); 2784 goto retry; 2785 } 2786 rcu_read_unlock(); 2787 if (read_seqcount_retry(&dentry->d_seq, d_seq)) { 2788 dput(dentry); 2789 goto retry; 2790 } 2791 dput(new); 2792 return dentry; 2793 } 2794 rcu_read_unlock(); 2795 if (unlikely(read_seqretry(&rename_lock, r_seq))) 2796 goto retry; 2797 2798 if (unlikely(seq & 1)) 2799 goto retry; 2800 2801 hlist_bl_lock(b); 2802 if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { 2803 hlist_bl_unlock(b); 2804 goto retry; 2805 } 2806 /* 2807 * No changes for the parent since the beginning of d_lookup(). 2808 * Since all removals from the chain happen with hlist_bl_lock(), 2809 * any potential in-lookup matches are going to stay here until 2810 * we unlock the chain. All fields are stable in everything 2811 * we encounter. 2812 */ 2813 hlist_bl_for_each_entry(dentry, node, b, d_in_lookup_hash) { 2814 if (dentry->d_name.hash != hash) 2815 continue; 2816 if (dentry->d_parent != parent) 2817 continue; 2818 if (!d_same_name(dentry, parent, name)) 2819 continue; 2820 rcu_read_lock(); 2821 hlist_bl_unlock(b); 2822 spin_lock(&dentry->d_lock); 2823 rcu_read_unlock(); 2824 /* now we can try to grab a reference */ 2825 if (unlikely(dentry->d_lockref.count < 0)) { 2826 spin_unlock(&dentry->d_lock); 2827 goto retry; 2828 } 2829 /* 2830 * somebody is likely to be still doing lookup for it; 2831 * pin it and wait for them to finish 2832 */ 2833 dget_dlock(dentry); 2834 d_wait_lookup(dentry); 2835 /* 2836 * it's not in-lookup anymore; in principle we should repeat 2837 * everything from dcache lookup, but it's likely to be what 2838 * d_lookup() would've found anyway. If it is, just return it; 2839 * otherwise we really have to repeat the whole thing. 2840 */ 2841 if (unlikely(dentry->d_name.hash != hash)) 2842 goto mismatch; 2843 if (unlikely(dentry->d_parent != parent)) 2844 goto mismatch; 2845 if (unlikely(d_unhashed(dentry))) 2846 goto mismatch; 2847 if (unlikely(!d_same_name(dentry, parent, name))) 2848 goto mismatch; 2849 /* OK, it *is* a hashed match; return it */ 2850 spin_unlock(&dentry->d_lock); 2851 dput(new); 2852 return dentry; 2853 } 2854 hlist_bl_add_head(&new->d_in_lookup_hash, b); 2855 hlist_bl_unlock(b); 2856 return new; 2857 mismatch: 2858 spin_unlock(&dentry->d_lock); 2859 dput(dentry); 2860 goto retry; 2861 } 2862 EXPORT_SYMBOL(d_alloc_parallel); 2863 2864 /* 2865 * Move dentry from in-lookup state to busy-negative one. 2866 * 2867 * From now on d_in_lookup(dentry) will return false and dentry is gone from 2868 * in-lookup hash. 2869 * 2870 * Anyone who had been waiting on it in d_alloc_parallel() is free to 2871 * proceed after that. Note that waking such waiters up is left to 2872 * the callers; PREEMPT_RT kernels can't have that wakeup done while 2873 * in write-side critical area for ->i_dir_seq, so it's done by calling 2874 * __d_wake_in_lookup_waiters() once it's safe to do so. 2875 * 2876 * Both __d_lookup_unhash() and __d_wake_in_lookup_waiters() should 2877 * be called within the same ->d_lock scope. PAR_LOOKUP is cleared 2878 * here, while LOOKUP_WAITERS (set by somebody finding dentry in 2879 * the in-lookup hash and setting down to wait) is checked and cleared 2880 * in __d_wake_in_lookup_waiters(). Both are gone by the end of 2881 * ->d_lock scope. 2882 */ 2883 static void __d_lookup_unhash(struct dentry *dentry) 2884 { 2885 struct hlist_bl_head *b; 2886 2887 lockdep_assert_held(&dentry->d_lock); 2888 2889 b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); 2890 hlist_bl_lock(b); 2891 dentry->d_flags &= ~DCACHE_PAR_LOOKUP; 2892 __hlist_bl_del(&dentry->d_in_lookup_hash); 2893 hlist_bl_unlock(b); 2894 dentry->waiters = NULL; 2895 } 2896 2897 static inline void __d_wake_in_lookup_waiters(struct dentry *dentry) 2898 { 2899 if (dentry->d_flags & DCACHE_LOOKUP_WAITERS) { 2900 wake_up_var_locked(&dentry->d_flags, &dentry->d_lock); 2901 dentry->d_flags &= ~DCACHE_LOOKUP_WAITERS; 2902 } 2903 } 2904 2905 void __d_lookup_unhash_wake(struct dentry *dentry) 2906 { 2907 spin_lock(&dentry->d_lock); 2908 __d_lookup_unhash(dentry); 2909 __d_wake_in_lookup_waiters(dentry); 2910 spin_unlock(&dentry->d_lock); 2911 } 2912 EXPORT_SYMBOL(__d_lookup_unhash_wake); 2913 2914 /* inode->i_lock held if inode is non-NULL */ 2915 2916 static inline void __d_add(struct dentry *dentry, struct inode *inode, 2917 const struct dentry_operations *ops) 2918 { 2919 struct inode *dir = NULL; 2920 unsigned n; 2921 spin_lock(&dentry->d_lock); 2922 if (unlikely(d_in_lookup(dentry))) { 2923 dir = dentry->d_parent->d_inode; 2924 n = start_dir_add(dir); 2925 __d_lookup_unhash(dentry); 2926 } 2927 if (unlikely(ops)) 2928 d_set_d_op(dentry, ops); 2929 if (inode) { 2930 unsigned add_flags = d_flags_for_inode(inode); 2931 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 2932 raw_write_seqcount_begin(&dentry->d_seq); 2933 __d_set_inode_and_type(dentry, inode, add_flags); 2934 raw_write_seqcount_end(&dentry->d_seq); 2935 fsnotify_update_flags(dentry); 2936 } 2937 __d_rehash(dentry); 2938 if (dir) { 2939 end_dir_add(dir, n); 2940 __d_wake_in_lookup_waiters(dentry); 2941 } 2942 spin_unlock(&dentry->d_lock); 2943 if (inode) 2944 spin_unlock(&inode->i_lock); 2945 } 2946 2947 /** 2948 * d_add - add dentry to hash queues 2949 * @entry: dentry to add 2950 * @inode: The inode to attach to this dentry 2951 * 2952 * This adds the entry to the hash queues and initializes @inode. 2953 * The entry was actually filled in earlier during d_alloc(). 2954 */ 2955 2956 void d_add(struct dentry *entry, struct inode *inode) 2957 { 2958 if (inode) { 2959 security_d_instantiate(entry, inode); 2960 spin_lock(&inode->i_lock); 2961 } 2962 __d_add(entry, inode, NULL); 2963 } 2964 EXPORT_SYMBOL(d_add); 2965 2966 struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) 2967 { 2968 WARN_ON(d_really_is_positive(dentry)); 2969 WARN_ON(!inode); 2970 security_d_instantiate(dentry, inode); 2971 spin_lock(&inode->i_lock); 2972 spin_lock(&dentry->d_lock); 2973 __d_instantiate(dentry, inode); 2974 dentry->d_flags |= DCACHE_PERSISTENT; 2975 dget_dlock(dentry); 2976 if (d_unhashed(dentry)) 2977 __d_rehash(dentry); 2978 spin_unlock(&dentry->d_lock); 2979 spin_unlock(&inode->i_lock); 2980 return dentry; 2981 } 2982 EXPORT_SYMBOL(d_make_persistent); 2983 2984 static void swap_names(struct dentry *dentry, struct dentry *target) 2985 { 2986 if (unlikely(dname_external(target))) { 2987 if (unlikely(dname_external(dentry))) { 2988 /* 2989 * Both external: swap the pointers 2990 */ 2991 swap(target->__d_name.name, dentry->__d_name.name); 2992 } else { 2993 /* 2994 * dentry:internal, target:external. Steal target's 2995 * storage and make target internal. 2996 */ 2997 dentry->__d_name.name = target->__d_name.name; 2998 target->d_shortname = dentry->d_shortname; 2999 target->__d_name.name = target->d_shortname.string; 3000 } 3001 } else { 3002 if (unlikely(dname_external(dentry))) { 3003 /* 3004 * dentry:external, target:internal. Give dentry's 3005 * storage to target and make dentry internal 3006 */ 3007 target->__d_name.name = dentry->__d_name.name; 3008 dentry->d_shortname = target->d_shortname; 3009 dentry->__d_name.name = dentry->d_shortname.string; 3010 } else { 3011 /* 3012 * Both are internal. 3013 */ 3014 for (int i = 0; i < DNAME_INLINE_WORDS; i++) 3015 swap(dentry->d_shortname.words[i], 3016 target->d_shortname.words[i]); 3017 } 3018 } 3019 swap(dentry->__d_name.hash_len, target->__d_name.hash_len); 3020 } 3021 3022 static void copy_name(struct dentry *dentry, struct dentry *target) 3023 { 3024 struct external_name *old_name = NULL; 3025 if (unlikely(dname_external(dentry))) 3026 old_name = external_name(dentry); 3027 if (unlikely(dname_external(target))) { 3028 atomic_inc(&external_name(target)->count); 3029 dentry->__d_name = target->__d_name; 3030 } else { 3031 dentry->d_shortname = target->d_shortname; 3032 dentry->__d_name.name = dentry->d_shortname.string; 3033 dentry->__d_name.hash_len = target->__d_name.hash_len; 3034 } 3035 if (old_name && likely(atomic_dec_and_test(&old_name->count))) 3036 kfree_rcu(old_name, head); 3037 } 3038 3039 /* 3040 * __d_move - move a dentry 3041 * @dentry: entry to move 3042 * @target: new dentry 3043 * @exchange: exchange the two dentries 3044 * 3045 * Update the dcache to reflect the move of a file name. Negative dcache 3046 * entries should not be moved in this way. Caller must hold rename_lock, the 3047 * i_rwsem of the source and target directories (exclusively), and the sb-> 3048 * s_vfs_rename_mutex if they differ. See lock_rename(). 3049 */ 3050 static void __d_move(struct dentry *dentry, struct dentry *target, 3051 bool exchange) 3052 { 3053 struct dentry *old_parent, *p; 3054 struct inode *dir = NULL; 3055 unsigned n; 3056 3057 WARN_ON(!dentry->d_inode); 3058 if (WARN_ON(dentry == target)) 3059 return; 3060 3061 BUG_ON(d_ancestor(target, dentry)); 3062 old_parent = dentry->d_parent; 3063 p = d_ancestor(old_parent, target); 3064 if (IS_ROOT(dentry)) { 3065 BUG_ON(p); 3066 spin_lock(&target->d_parent->d_lock); 3067 } else if (!p) { 3068 /* target is not a descendent of dentry->d_parent */ 3069 spin_lock(&target->d_parent->d_lock); 3070 spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED); 3071 } else { 3072 BUG_ON(p == dentry); 3073 spin_lock(&old_parent->d_lock); 3074 if (p != target) 3075 spin_lock_nested(&target->d_parent->d_lock, 3076 DENTRY_D_LOCK_NESTED); 3077 } 3078 spin_lock_nested(&dentry->d_lock, 2); 3079 spin_lock_nested(&target->d_lock, 3); 3080 3081 if (unlikely(d_in_lookup(target))) { 3082 dir = target->d_parent->d_inode; 3083 n = start_dir_add(dir); 3084 __d_lookup_unhash(target); 3085 } 3086 3087 write_seqcount_begin(&dentry->d_seq); 3088 write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); 3089 3090 /* unhash both */ 3091 if (!d_unhashed(dentry)) 3092 ___d_drop(dentry); 3093 if (!d_unhashed(target)) 3094 ___d_drop(target); 3095 3096 /* ... and switch them in the tree */ 3097 dentry->d_parent = target->d_parent; 3098 if (!exchange) { 3099 copy_name(dentry, target); 3100 target->d_hash.pprev = NULL; 3101 dentry->d_parent->d_lockref.count++; 3102 if (dentry != old_parent) /* wasn't IS_ROOT */ 3103 WARN_ON(!--old_parent->d_lockref.count); 3104 } else { 3105 target->d_parent = old_parent; 3106 swap_names(dentry, target); 3107 if (!hlist_unhashed(&target->d_sib)) 3108 __hlist_del(&target->d_sib); 3109 hlist_add_head(&target->d_sib, &target->d_parent->d_children); 3110 __d_rehash(target); 3111 fsnotify_update_flags(target); 3112 } 3113 if (!hlist_unhashed(&dentry->d_sib)) 3114 __hlist_del(&dentry->d_sib); 3115 hlist_add_head(&dentry->d_sib, &dentry->d_parent->d_children); 3116 __d_rehash(dentry); 3117 fsnotify_update_flags(dentry); 3118 fscrypt_handle_d_move(dentry); 3119 3120 write_seqcount_end(&target->d_seq); 3121 write_seqcount_end(&dentry->d_seq); 3122 3123 if (dir) { 3124 end_dir_add(dir, n); 3125 __d_wake_in_lookup_waiters(target); 3126 } 3127 if (dentry->d_parent != old_parent) 3128 spin_unlock(&dentry->d_parent->d_lock); 3129 if (dentry != old_parent) 3130 spin_unlock(&old_parent->d_lock); 3131 spin_unlock(&target->d_lock); 3132 spin_unlock(&dentry->d_lock); 3133 } 3134 3135 /* 3136 * d_move - move a dentry 3137 * @dentry: entry to move 3138 * @target: new dentry 3139 * 3140 * Update the dcache to reflect the move of a file name. Negative 3141 * dcache entries should not be moved in this way. See the locking 3142 * requirements for __d_move. 3143 */ 3144 void d_move(struct dentry *dentry, struct dentry *target) 3145 { 3146 write_seqlock(&rename_lock); 3147 __d_move(dentry, target, false); 3148 write_sequnlock(&rename_lock); 3149 } 3150 EXPORT_SYMBOL(d_move); 3151 3152 /* 3153 * d_exchange - exchange two dentries 3154 * @dentry1: first dentry 3155 * @dentry2: second dentry 3156 */ 3157 void d_exchange(struct dentry *dentry1, struct dentry *dentry2) 3158 { 3159 write_seqlock(&rename_lock); 3160 3161 WARN_ON(!dentry1->d_inode); 3162 WARN_ON(!dentry2->d_inode); 3163 WARN_ON(IS_ROOT(dentry1)); 3164 WARN_ON(IS_ROOT(dentry2)); 3165 3166 __d_move(dentry1, dentry2, true); 3167 3168 write_sequnlock(&rename_lock); 3169 } 3170 EXPORT_SYMBOL(d_exchange); 3171 3172 /** 3173 * d_ancestor - search for an ancestor 3174 * @p1: ancestor dentry 3175 * @p2: child dentry 3176 * 3177 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is 3178 * an ancestor of p2, else NULL. 3179 */ 3180 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) 3181 { 3182 struct dentry *p; 3183 3184 for (p = p2; !IS_ROOT(p); p = p->d_parent) { 3185 if (p->d_parent == p1) 3186 return p; 3187 } 3188 return NULL; 3189 } 3190 3191 /* 3192 * This helper attempts to cope with remotely renamed directories 3193 * 3194 * It assumes that the caller is already holding 3195 * dentry->d_parent->d_inode->i_rwsem, and rename_lock 3196 * 3197 * Note: If ever the locking in lock_rename() changes, then please 3198 * remember to update this too... 3199 */ 3200 static int __d_unalias(struct dentry *dentry, struct dentry *alias) 3201 { 3202 struct mutex *m1 = NULL; 3203 struct rw_semaphore *m2 = NULL; 3204 int ret = -ESTALE; 3205 3206 /* If alias and dentry share a parent, then no extra locks required */ 3207 if (alias->d_parent == dentry->d_parent) 3208 goto out_unalias; 3209 3210 /* See lock_rename() */ 3211 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 3212 goto out_err; 3213 m1 = &dentry->d_sb->s_vfs_rename_mutex; 3214 if (!inode_trylock_shared(alias->d_parent->d_inode)) 3215 goto out_err; 3216 m2 = &alias->d_parent->d_inode->i_rwsem; 3217 out_unalias: 3218 if (alias->d_op && alias->d_op->d_unalias_trylock && 3219 !alias->d_op->d_unalias_trylock(alias)) 3220 goto out_err; 3221 __d_move(alias, dentry, false); 3222 if (alias->d_op && alias->d_op->d_unalias_unlock) 3223 alias->d_op->d_unalias_unlock(alias); 3224 ret = 0; 3225 out_err: 3226 if (m2) 3227 up_read(m2); 3228 if (m1) 3229 mutex_unlock(m1); 3230 return ret; 3231 } 3232 3233 struct dentry *d_splice_alias_ops(struct inode *inode, struct dentry *dentry, 3234 const struct dentry_operations *ops) 3235 { 3236 if (IS_ERR(inode)) 3237 return ERR_CAST(inode); 3238 3239 BUG_ON(!d_unhashed(dentry)); 3240 3241 if (!inode) 3242 goto out; 3243 3244 security_d_instantiate(dentry, inode); 3245 spin_lock(&inode->i_lock); 3246 if (S_ISDIR(inode->i_mode)) { 3247 struct dentry *new = __d_find_dir_alias(inode); 3248 if (unlikely(new)) { 3249 /* The reference to new ensures it remains an alias */ 3250 spin_unlock(&inode->i_lock); 3251 write_seqlock(&rename_lock); 3252 if (unlikely(d_ancestor(new, dentry))) { 3253 write_sequnlock(&rename_lock); 3254 dput(new); 3255 new = ERR_PTR(-ELOOP); 3256 pr_warn_ratelimited( 3257 "VFS: Lookup of '%s' in %s %s" 3258 " would have caused loop\n", 3259 dentry->d_name.name, 3260 inode->i_sb->s_type->name, 3261 inode->i_sb->s_id); 3262 } else if (!IS_ROOT(new)) { 3263 struct dentry *old_parent = dget(new->d_parent); 3264 int err = __d_unalias(dentry, new); 3265 write_sequnlock(&rename_lock); 3266 if (err) { 3267 dput(new); 3268 new = ERR_PTR(err); 3269 } 3270 dput(old_parent); 3271 } else { 3272 if (unlikely(!hlist_unhashed(&new->d_sib))) { 3273 // secondary root getting spliced 3274 spin_lock(&new->d_lock); 3275 unlink_secondary_root(new); 3276 spin_unlock(&new->d_lock); 3277 } 3278 __d_move(new, dentry, false); 3279 write_sequnlock(&rename_lock); 3280 } 3281 iput(inode); 3282 return new; 3283 } 3284 } 3285 out: 3286 __d_add(dentry, inode, ops); 3287 return NULL; 3288 } 3289 3290 /** 3291 * d_splice_alias - splice a disconnected dentry into the tree if one exists 3292 * @inode: the inode which may have a disconnected dentry 3293 * @dentry: a negative dentry which we want to point to the inode. 3294 * 3295 * If inode is a directory and has an IS_ROOT alias, then d_move that in 3296 * place of the given dentry and return it, else simply d_add the inode 3297 * to the dentry and return NULL. 3298 * 3299 * If a non-IS_ROOT directory is found, the filesystem is corrupt, and 3300 * we should error out: directories can't have multiple aliases. 3301 * 3302 * This is needed in the lookup routine of any filesystem that is exportable 3303 * (via knfsd) so that we can build dcache paths to directories effectively. 3304 * 3305 * If a dentry was found and moved, then it is returned. Otherwise NULL 3306 * is returned. This matches the expected return value of ->lookup. 3307 * 3308 * Cluster filesystems may call this function with a negative, hashed dentry. 3309 * In that case, we know that the inode will be a regular file, and also this 3310 * will only occur during atomic_open. So we need to check for the dentry 3311 * being already hashed only in the final case. 3312 */ 3313 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 3314 { 3315 return d_splice_alias_ops(inode, dentry, NULL); 3316 } 3317 EXPORT_SYMBOL(d_splice_alias); 3318 3319 /* 3320 * Test whether new_dentry is a subdirectory of old_dentry. 3321 * 3322 * Trivially implemented using the dcache structure 3323 */ 3324 3325 /** 3326 * is_subdir - is new dentry a subdirectory of old_dentry 3327 * @new_dentry: new dentry 3328 * @old_dentry: old dentry 3329 * 3330 * Returns true if new_dentry is a subdirectory of the parent (at any depth). 3331 * Returns false otherwise. 3332 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 3333 */ 3334 3335 bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 3336 { 3337 bool subdir; 3338 unsigned seq; 3339 3340 if (new_dentry == old_dentry) 3341 return true; 3342 3343 /* Access d_parent under rcu as d_move() may change it. */ 3344 rcu_read_lock(); 3345 seq = read_seqbegin(&rename_lock); 3346 subdir = d_ancestor(old_dentry, new_dentry); 3347 /* Try lockless once... */ 3348 if (read_seqretry(&rename_lock, seq)) { 3349 /* ...else acquire lock for progress even on deep chains. */ 3350 read_seqlock_excl(&rename_lock); 3351 subdir = d_ancestor(old_dentry, new_dentry); 3352 read_sequnlock_excl(&rename_lock); 3353 } 3354 rcu_read_unlock(); 3355 return subdir; 3356 } 3357 EXPORT_SYMBOL(is_subdir); 3358 3359 void d_mark_tmpfile(struct file *file, struct inode *inode) 3360 { 3361 struct dentry *dentry = file->f_path.dentry; 3362 3363 BUG_ON(dname_external(dentry) || 3364 d_really_is_positive(dentry) || 3365 !d_unlinked(dentry)); 3366 spin_lock(&dentry->d_parent->d_lock); 3367 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 3368 dentry->__d_name.len = sprintf(dentry->d_shortname.string, "#%llu", 3369 (unsigned long long)inode->i_ino); 3370 spin_unlock(&dentry->d_lock); 3371 spin_unlock(&dentry->d_parent->d_lock); 3372 } 3373 EXPORT_SYMBOL(d_mark_tmpfile); 3374 3375 int d_mark_tmpfile_name(struct file *file, const struct qstr *name) 3376 { 3377 struct dentry *dentry = file->f_path.dentry; 3378 char *dname = dentry->d_shortname.string; 3379 3380 if (unlikely(dname_external(dentry) || 3381 d_really_is_positive(dentry) || 3382 !d_unlinked(dentry))) 3383 return -EINVAL; 3384 if (unlikely(name->len > DNAME_INLINE_LEN - 1)) 3385 return -ENAMETOOLONG; 3386 3387 spin_lock(&dentry->d_parent->d_lock); 3388 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 3389 dentry->__d_name.len = name->len; 3390 memcpy(dname, name->name, name->len); 3391 dname[name->len] = '\0'; 3392 spin_unlock(&dentry->d_lock); 3393 spin_unlock(&dentry->d_parent->d_lock); 3394 return 0; 3395 } 3396 EXPORT_SYMBOL(d_mark_tmpfile_name); 3397 3398 void d_tmpfile(struct file *file, struct inode *inode) 3399 { 3400 struct dentry *dentry = file->f_path.dentry; 3401 3402 inode_dec_link_count(inode); 3403 d_mark_tmpfile(file, inode); 3404 d_instantiate(dentry, inode); 3405 } 3406 EXPORT_SYMBOL(d_tmpfile); 3407 3408 /* 3409 * Obtain inode number of the parent dentry. 3410 */ 3411 ino_t d_parent_ino(struct dentry *dentry) 3412 { 3413 struct dentry *parent; 3414 struct inode *iparent; 3415 unsigned seq; 3416 ino_t ret; 3417 3418 scoped_guard(rcu) { 3419 seq = raw_seqcount_begin(&dentry->d_seq); 3420 parent = READ_ONCE(dentry->d_parent); 3421 iparent = d_inode_rcu(parent); 3422 if (likely(iparent)) { 3423 ret = iparent->i_ino; 3424 if (!read_seqcount_retry(&dentry->d_seq, seq)) 3425 return ret; 3426 } 3427 } 3428 3429 spin_lock(&dentry->d_lock); 3430 ret = dentry->d_parent->d_inode->i_ino; 3431 spin_unlock(&dentry->d_lock); 3432 return ret; 3433 } 3434 EXPORT_SYMBOL(d_parent_ino); 3435 3436 static __initdata unsigned long dhash_entries; 3437 static int __init set_dhash_entries(char *str) 3438 { 3439 return kstrtoul(str, 0, &dhash_entries) == 0; 3440 } 3441 __setup("dhash_entries=", set_dhash_entries); 3442 3443 static void __init dcache_init_early(void) 3444 { 3445 /* If hashes are distributed across NUMA nodes, defer 3446 * hash allocation until vmalloc space is available. 3447 */ 3448 if (hashdist) 3449 return; 3450 3451 dentry_hashtable = 3452 alloc_large_system_hash("Dentry cache", 3453 sizeof(struct hlist_bl_head), 3454 dhash_entries, 3455 13, 3456 HASH_EARLY | HASH_ZERO, 3457 &d_hash_shift, 3458 NULL, 3459 2, 3460 0); 3461 d_hash_shift = 32 - d_hash_shift; 3462 3463 runtime_const_init(shift, d_hash_shift); 3464 runtime_const_init(ptr, dentry_hashtable); 3465 } 3466 3467 static void __init dcache_init(void) 3468 { 3469 /* 3470 * A constructor could be added for stable state like the lists, 3471 * but it is probably not worth it because of the cache nature 3472 * of the dcache. 3473 */ 3474 __dentry_cache = KMEM_CACHE_USERCOPY(dentry, 3475 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT, 3476 d_shortname.string); 3477 runtime_const_init(ptr, __dentry_cache); 3478 3479 /* Hash may have been set up in dcache_init_early */ 3480 if (!hashdist) 3481 return; 3482 3483 dentry_hashtable = 3484 alloc_large_system_hash("Dentry cache", 3485 sizeof(struct hlist_bl_head), 3486 dhash_entries, 3487 13, 3488 HASH_ZERO, 3489 &d_hash_shift, 3490 NULL, 3491 2, 3492 0); 3493 d_hash_shift = 32 - d_hash_shift; 3494 3495 runtime_const_init(shift, d_hash_shift); 3496 runtime_const_init(ptr, dentry_hashtable); 3497 } 3498 3499 void __init vfs_caches_init_early(void) 3500 { 3501 int i; 3502 3503 for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) 3504 INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); 3505 3506 dcache_init_early(); 3507 inode_init_early(); 3508 } 3509 3510 void __init vfs_caches_init(void) 3511 { 3512 filename_init(); 3513 dcache_init(); 3514 inode_init(); 3515 files_init(); 3516 files_maxfiles_init(); 3517 mnt_init(); 3518 bdev_cache_init(); 3519 chrdev_init(); 3520 } 3521