xref: /linux/fs/dcache.c (revision 2dbf708448c836754d25fe6108c5bfe1f5697c95)
1 /*
2  * fs/dcache.c
3  *
4  * Complete reimplementation
5  * (C) 1997 Thomas Schoebel-Theuer,
6  * with heavy changes by Linus Torvalds
7  */
8 
9 /*
10  * Notes on the allocation strategy:
11  *
12  * The dcache is a master of the icache - whenever a dcache entry
13  * exists, the inode will always exist. "iput()" is done either when
14  * the dcache entry is deleted or garbage collected.
15  */
16 
17 #include <linux/syscalls.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/fs.h>
21 #include <linux/fsnotify.h>
22 #include <linux/slab.h>
23 #include <linux/init.h>
24 #include <linux/hash.h>
25 #include <linux/cache.h>
26 #include <linux/export.h>
27 #include <linux/mount.h>
28 #include <linux/file.h>
29 #include <asm/uaccess.h>
30 #include <linux/security.h>
31 #include <linux/seqlock.h>
32 #include <linux/swap.h>
33 #include <linux/bootmem.h>
34 #include <linux/fs_struct.h>
35 #include <linux/hardirq.h>
36 #include <linux/bit_spinlock.h>
37 #include <linux/rculist_bl.h>
38 #include <linux/prefetch.h>
39 #include <linux/ratelimit.h>
40 #include "internal.h"
41 #include "mount.h"
42 
43 /*
44  * Usage:
45  * dcache->d_inode->i_lock protects:
46  *   - i_dentry, d_alias, d_inode of aliases
47  * dcache_hash_bucket lock protects:
48  *   - the dcache hash table
49  * s_anon bl list spinlock protects:
50  *   - the s_anon list (see __d_drop)
51  * dcache_lru_lock protects:
52  *   - the dcache lru lists and counters
53  * d_lock protects:
54  *   - d_flags
55  *   - d_name
56  *   - d_lru
57  *   - d_count
58  *   - d_unhashed()
59  *   - d_parent and d_subdirs
60  *   - childrens' d_child and d_parent
61  *   - d_alias, d_inode
62  *
63  * Ordering:
64  * dentry->d_inode->i_lock
65  *   dentry->d_lock
66  *     dcache_lru_lock
67  *     dcache_hash_bucket lock
68  *     s_anon lock
69  *
70  * If there is an ancestor relationship:
71  * dentry->d_parent->...->d_parent->d_lock
72  *   ...
73  *     dentry->d_parent->d_lock
74  *       dentry->d_lock
75  *
76  * If no ancestor relationship:
77  * if (dentry1 < dentry2)
78  *   dentry1->d_lock
79  *     dentry2->d_lock
80  */
81 int sysctl_vfs_cache_pressure __read_mostly = 100;
82 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
83 
84 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
86 
87 EXPORT_SYMBOL(rename_lock);
88 
89 static struct kmem_cache *dentry_cache __read_mostly;
90 
91 /*
92  * This is the single most critical data structure when it comes
93  * to the dcache: the hashtable for lookups. Somebody should try
94  * to make this good - I've just made it work.
95  *
96  * This hash-function tries to avoid losing too many bits of hash
97  * information, yet avoid using a prime hash-size or similar.
98  */
99 #define D_HASHBITS     d_hash_shift
100 #define D_HASHMASK     d_hash_mask
101 
102 static unsigned int d_hash_mask __read_mostly;
103 static unsigned int d_hash_shift __read_mostly;
104 
105 static struct hlist_bl_head *dentry_hashtable __read_mostly;
106 
107 static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
108 					unsigned int hash)
109 {
110 	hash += (unsigned long) parent / L1_CACHE_BYTES;
111 	hash = hash + (hash >> D_HASHBITS);
112 	return dentry_hashtable + (hash & D_HASHMASK);
113 }
114 
115 /* Statistics gathering. */
116 struct dentry_stat_t dentry_stat = {
117 	.age_limit = 45,
118 };
119 
120 static DEFINE_PER_CPU(unsigned int, nr_dentry);
121 
122 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
123 static int get_nr_dentry(void)
124 {
125 	int i;
126 	int sum = 0;
127 	for_each_possible_cpu(i)
128 		sum += per_cpu(nr_dentry, i);
129 	return sum < 0 ? 0 : sum;
130 }
131 
132 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
133 		   size_t *lenp, loff_t *ppos)
134 {
135 	dentry_stat.nr_dentry = get_nr_dentry();
136 	return proc_dointvec(table, write, buffer, lenp, ppos);
137 }
138 #endif
139 
140 /*
141  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142  * The strings are both count bytes long, and count is non-zero.
143  */
144 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 				const unsigned char *ct, size_t tcount)
146 {
147 #ifdef CONFIG_DCACHE_WORD_ACCESS
148 	unsigned long a,b,mask;
149 
150 	if (unlikely(scount != tcount))
151 		return 1;
152 
153 	for (;;) {
154 		a = *(unsigned long *)cs;
155 		b = *(unsigned long *)ct;
156 		if (tcount < sizeof(unsigned long))
157 			break;
158 		if (unlikely(a != b))
159 			return 1;
160 		cs += sizeof(unsigned long);
161 		ct += sizeof(unsigned long);
162 		tcount -= sizeof(unsigned long);
163 		if (!tcount)
164 			return 0;
165 	}
166 	mask = ~(~0ul << tcount*8);
167 	return unlikely(!!((a ^ b) & mask));
168 #else
169 	if (scount != tcount)
170 		return 1;
171 
172 	do {
173 		if (*cs != *ct)
174 			return 1;
175 		cs++;
176 		ct++;
177 		tcount--;
178 	} while (tcount);
179 	return 0;
180 #endif
181 }
182 
183 static void __d_free(struct rcu_head *head)
184 {
185 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
186 
187 	WARN_ON(!list_empty(&dentry->d_alias));
188 	if (dname_external(dentry))
189 		kfree(dentry->d_name.name);
190 	kmem_cache_free(dentry_cache, dentry);
191 }
192 
193 /*
194  * no locks, please.
195  */
196 static void d_free(struct dentry *dentry)
197 {
198 	BUG_ON(dentry->d_count);
199 	this_cpu_dec(nr_dentry);
200 	if (dentry->d_op && dentry->d_op->d_release)
201 		dentry->d_op->d_release(dentry);
202 
203 	/* if dentry was never visible to RCU, immediate free is OK */
204 	if (!(dentry->d_flags & DCACHE_RCUACCESS))
205 		__d_free(&dentry->d_u.d_rcu);
206 	else
207 		call_rcu(&dentry->d_u.d_rcu, __d_free);
208 }
209 
210 /**
211  * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
212  * @dentry: the target dentry
213  * After this call, in-progress rcu-walk path lookup will fail. This
214  * should be called after unhashing, and after changing d_inode (if
215  * the dentry has not already been unhashed).
216  */
217 static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
218 {
219 	assert_spin_locked(&dentry->d_lock);
220 	/* Go through a barrier */
221 	write_seqcount_barrier(&dentry->d_seq);
222 }
223 
224 /*
225  * Release the dentry's inode, using the filesystem
226  * d_iput() operation if defined. Dentry has no refcount
227  * and is unhashed.
228  */
229 static void dentry_iput(struct dentry * dentry)
230 	__releases(dentry->d_lock)
231 	__releases(dentry->d_inode->i_lock)
232 {
233 	struct inode *inode = dentry->d_inode;
234 	if (inode) {
235 		dentry->d_inode = NULL;
236 		list_del_init(&dentry->d_alias);
237 		spin_unlock(&dentry->d_lock);
238 		spin_unlock(&inode->i_lock);
239 		if (!inode->i_nlink)
240 			fsnotify_inoderemove(inode);
241 		if (dentry->d_op && dentry->d_op->d_iput)
242 			dentry->d_op->d_iput(dentry, inode);
243 		else
244 			iput(inode);
245 	} else {
246 		spin_unlock(&dentry->d_lock);
247 	}
248 }
249 
250 /*
251  * Release the dentry's inode, using the filesystem
252  * d_iput() operation if defined. dentry remains in-use.
253  */
254 static void dentry_unlink_inode(struct dentry * dentry)
255 	__releases(dentry->d_lock)
256 	__releases(dentry->d_inode->i_lock)
257 {
258 	struct inode *inode = dentry->d_inode;
259 	dentry->d_inode = NULL;
260 	list_del_init(&dentry->d_alias);
261 	dentry_rcuwalk_barrier(dentry);
262 	spin_unlock(&dentry->d_lock);
263 	spin_unlock(&inode->i_lock);
264 	if (!inode->i_nlink)
265 		fsnotify_inoderemove(inode);
266 	if (dentry->d_op && dentry->d_op->d_iput)
267 		dentry->d_op->d_iput(dentry, inode);
268 	else
269 		iput(inode);
270 }
271 
272 /*
273  * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
274  */
275 static void dentry_lru_add(struct dentry *dentry)
276 {
277 	if (list_empty(&dentry->d_lru)) {
278 		spin_lock(&dcache_lru_lock);
279 		list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
280 		dentry->d_sb->s_nr_dentry_unused++;
281 		dentry_stat.nr_unused++;
282 		spin_unlock(&dcache_lru_lock);
283 	}
284 }
285 
286 static void __dentry_lru_del(struct dentry *dentry)
287 {
288 	list_del_init(&dentry->d_lru);
289 	dentry->d_flags &= ~DCACHE_SHRINK_LIST;
290 	dentry->d_sb->s_nr_dentry_unused--;
291 	dentry_stat.nr_unused--;
292 }
293 
294 /*
295  * Remove a dentry with references from the LRU.
296  */
297 static void dentry_lru_del(struct dentry *dentry)
298 {
299 	if (!list_empty(&dentry->d_lru)) {
300 		spin_lock(&dcache_lru_lock);
301 		__dentry_lru_del(dentry);
302 		spin_unlock(&dcache_lru_lock);
303 	}
304 }
305 
306 /*
307  * Remove a dentry that is unreferenced and about to be pruned
308  * (unhashed and destroyed) from the LRU, and inform the file system.
309  * This wrapper should be called _prior_ to unhashing a victim dentry.
310  */
311 static void dentry_lru_prune(struct dentry *dentry)
312 {
313 	if (!list_empty(&dentry->d_lru)) {
314 		if (dentry->d_flags & DCACHE_OP_PRUNE)
315 			dentry->d_op->d_prune(dentry);
316 
317 		spin_lock(&dcache_lru_lock);
318 		__dentry_lru_del(dentry);
319 		spin_unlock(&dcache_lru_lock);
320 	}
321 }
322 
323 static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
324 {
325 	spin_lock(&dcache_lru_lock);
326 	if (list_empty(&dentry->d_lru)) {
327 		list_add_tail(&dentry->d_lru, list);
328 		dentry->d_sb->s_nr_dentry_unused++;
329 		dentry_stat.nr_unused++;
330 	} else {
331 		list_move_tail(&dentry->d_lru, list);
332 	}
333 	spin_unlock(&dcache_lru_lock);
334 }
335 
336 /**
337  * d_kill - kill dentry and return parent
338  * @dentry: dentry to kill
339  * @parent: parent dentry
340  *
341  * The dentry must already be unhashed and removed from the LRU.
342  *
343  * If this is the root of the dentry tree, return NULL.
344  *
345  * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
346  * d_kill.
347  */
348 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
349 	__releases(dentry->d_lock)
350 	__releases(parent->d_lock)
351 	__releases(dentry->d_inode->i_lock)
352 {
353 	list_del(&dentry->d_u.d_child);
354 	/*
355 	 * Inform try_to_ascend() that we are no longer attached to the
356 	 * dentry tree
357 	 */
358 	dentry->d_flags |= DCACHE_DISCONNECTED;
359 	if (parent)
360 		spin_unlock(&parent->d_lock);
361 	dentry_iput(dentry);
362 	/*
363 	 * dentry_iput drops the locks, at which point nobody (except
364 	 * transient RCU lookups) can reach this dentry.
365 	 */
366 	d_free(dentry);
367 	return parent;
368 }
369 
370 /*
371  * Unhash a dentry without inserting an RCU walk barrier or checking that
372  * dentry->d_lock is locked.  The caller must take care of that, if
373  * appropriate.
374  */
375 static void __d_shrink(struct dentry *dentry)
376 {
377 	if (!d_unhashed(dentry)) {
378 		struct hlist_bl_head *b;
379 		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
380 			b = &dentry->d_sb->s_anon;
381 		else
382 			b = d_hash(dentry->d_parent, dentry->d_name.hash);
383 
384 		hlist_bl_lock(b);
385 		__hlist_bl_del(&dentry->d_hash);
386 		dentry->d_hash.pprev = NULL;
387 		hlist_bl_unlock(b);
388 	}
389 }
390 
391 /**
392  * d_drop - drop a dentry
393  * @dentry: dentry to drop
394  *
395  * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
396  * be found through a VFS lookup any more. Note that this is different from
397  * deleting the dentry - d_delete will try to mark the dentry negative if
398  * possible, giving a successful _negative_ lookup, while d_drop will
399  * just make the cache lookup fail.
400  *
401  * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
402  * reason (NFS timeouts or autofs deletes).
403  *
404  * __d_drop requires dentry->d_lock.
405  */
406 void __d_drop(struct dentry *dentry)
407 {
408 	if (!d_unhashed(dentry)) {
409 		__d_shrink(dentry);
410 		dentry_rcuwalk_barrier(dentry);
411 	}
412 }
413 EXPORT_SYMBOL(__d_drop);
414 
415 void d_drop(struct dentry *dentry)
416 {
417 	spin_lock(&dentry->d_lock);
418 	__d_drop(dentry);
419 	spin_unlock(&dentry->d_lock);
420 }
421 EXPORT_SYMBOL(d_drop);
422 
423 /*
424  * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
425  * @dentry: dentry to drop
426  *
427  * This is called when we do a lookup on a placeholder dentry that needed to be
428  * looked up.  The dentry should have been hashed in order for it to be found by
429  * the lookup code, but now needs to be unhashed while we do the actual lookup
430  * and clear the DCACHE_NEED_LOOKUP flag.
431  */
432 void d_clear_need_lookup(struct dentry *dentry)
433 {
434 	spin_lock(&dentry->d_lock);
435 	__d_drop(dentry);
436 	dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
437 	spin_unlock(&dentry->d_lock);
438 }
439 EXPORT_SYMBOL(d_clear_need_lookup);
440 
441 /*
442  * Finish off a dentry we've decided to kill.
443  * dentry->d_lock must be held, returns with it unlocked.
444  * If ref is non-zero, then decrement the refcount too.
445  * Returns dentry requiring refcount drop, or NULL if we're done.
446  */
447 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
448 	__releases(dentry->d_lock)
449 {
450 	struct inode *inode;
451 	struct dentry *parent;
452 
453 	inode = dentry->d_inode;
454 	if (inode && !spin_trylock(&inode->i_lock)) {
455 relock:
456 		spin_unlock(&dentry->d_lock);
457 		cpu_relax();
458 		return dentry; /* try again with same dentry */
459 	}
460 	if (IS_ROOT(dentry))
461 		parent = NULL;
462 	else
463 		parent = dentry->d_parent;
464 	if (parent && !spin_trylock(&parent->d_lock)) {
465 		if (inode)
466 			spin_unlock(&inode->i_lock);
467 		goto relock;
468 	}
469 
470 	if (ref)
471 		dentry->d_count--;
472 	/*
473 	 * if dentry was on the d_lru list delete it from there.
474 	 * inform the fs via d_prune that this dentry is about to be
475 	 * unhashed and destroyed.
476 	 */
477 	dentry_lru_prune(dentry);
478 	/* if it was on the hash then remove it */
479 	__d_drop(dentry);
480 	return d_kill(dentry, parent);
481 }
482 
483 /*
484  * This is dput
485  *
486  * This is complicated by the fact that we do not want to put
487  * dentries that are no longer on any hash chain on the unused
488  * list: we'd much rather just get rid of them immediately.
489  *
490  * However, that implies that we have to traverse the dentry
491  * tree upwards to the parents which might _also_ now be
492  * scheduled for deletion (it may have been only waiting for
493  * its last child to go away).
494  *
495  * This tail recursion is done by hand as we don't want to depend
496  * on the compiler to always get this right (gcc generally doesn't).
497  * Real recursion would eat up our stack space.
498  */
499 
500 /*
501  * dput - release a dentry
502  * @dentry: dentry to release
503  *
504  * Release a dentry. This will drop the usage count and if appropriate
505  * call the dentry unlink method as well as removing it from the queues and
506  * releasing its resources. If the parent dentries were scheduled for release
507  * they too may now get deleted.
508  */
509 void dput(struct dentry *dentry)
510 {
511 	if (!dentry)
512 		return;
513 
514 repeat:
515 	if (dentry->d_count == 1)
516 		might_sleep();
517 	spin_lock(&dentry->d_lock);
518 	BUG_ON(!dentry->d_count);
519 	if (dentry->d_count > 1) {
520 		dentry->d_count--;
521 		spin_unlock(&dentry->d_lock);
522 		return;
523 	}
524 
525 	if (dentry->d_flags & DCACHE_OP_DELETE) {
526 		if (dentry->d_op->d_delete(dentry))
527 			goto kill_it;
528 	}
529 
530 	/* Unreachable? Get rid of it */
531  	if (d_unhashed(dentry))
532 		goto kill_it;
533 
534 	/*
535 	 * If this dentry needs lookup, don't set the referenced flag so that it
536 	 * is more likely to be cleaned up by the dcache shrinker in case of
537 	 * memory pressure.
538 	 */
539 	if (!d_need_lookup(dentry))
540 		dentry->d_flags |= DCACHE_REFERENCED;
541 	dentry_lru_add(dentry);
542 
543 	dentry->d_count--;
544 	spin_unlock(&dentry->d_lock);
545 	return;
546 
547 kill_it:
548 	dentry = dentry_kill(dentry, 1);
549 	if (dentry)
550 		goto repeat;
551 }
552 EXPORT_SYMBOL(dput);
553 
554 /**
555  * d_invalidate - invalidate a dentry
556  * @dentry: dentry to invalidate
557  *
558  * Try to invalidate the dentry if it turns out to be
559  * possible. If there are other dentries that can be
560  * reached through this one we can't delete it and we
561  * return -EBUSY. On success we return 0.
562  *
563  * no dcache lock.
564  */
565 
566 int d_invalidate(struct dentry * dentry)
567 {
568 	/*
569 	 * If it's already been dropped, return OK.
570 	 */
571 	spin_lock(&dentry->d_lock);
572 	if (d_unhashed(dentry)) {
573 		spin_unlock(&dentry->d_lock);
574 		return 0;
575 	}
576 	/*
577 	 * Check whether to do a partial shrink_dcache
578 	 * to get rid of unused child entries.
579 	 */
580 	if (!list_empty(&dentry->d_subdirs)) {
581 		spin_unlock(&dentry->d_lock);
582 		shrink_dcache_parent(dentry);
583 		spin_lock(&dentry->d_lock);
584 	}
585 
586 	/*
587 	 * Somebody else still using it?
588 	 *
589 	 * If it's a directory, we can't drop it
590 	 * for fear of somebody re-populating it
591 	 * with children (even though dropping it
592 	 * would make it unreachable from the root,
593 	 * we might still populate it if it was a
594 	 * working directory or similar).
595 	 * We also need to leave mountpoints alone,
596 	 * directory or not.
597 	 */
598 	if (dentry->d_count > 1 && dentry->d_inode) {
599 		if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
600 			spin_unlock(&dentry->d_lock);
601 			return -EBUSY;
602 		}
603 	}
604 
605 	__d_drop(dentry);
606 	spin_unlock(&dentry->d_lock);
607 	return 0;
608 }
609 EXPORT_SYMBOL(d_invalidate);
610 
611 /* This must be called with d_lock held */
612 static inline void __dget_dlock(struct dentry *dentry)
613 {
614 	dentry->d_count++;
615 }
616 
617 static inline void __dget(struct dentry *dentry)
618 {
619 	spin_lock(&dentry->d_lock);
620 	__dget_dlock(dentry);
621 	spin_unlock(&dentry->d_lock);
622 }
623 
624 struct dentry *dget_parent(struct dentry *dentry)
625 {
626 	struct dentry *ret;
627 
628 repeat:
629 	/*
630 	 * Don't need rcu_dereference because we re-check it was correct under
631 	 * the lock.
632 	 */
633 	rcu_read_lock();
634 	ret = dentry->d_parent;
635 	spin_lock(&ret->d_lock);
636 	if (unlikely(ret != dentry->d_parent)) {
637 		spin_unlock(&ret->d_lock);
638 		rcu_read_unlock();
639 		goto repeat;
640 	}
641 	rcu_read_unlock();
642 	BUG_ON(!ret->d_count);
643 	ret->d_count++;
644 	spin_unlock(&ret->d_lock);
645 	return ret;
646 }
647 EXPORT_SYMBOL(dget_parent);
648 
649 /**
650  * d_find_alias - grab a hashed alias of inode
651  * @inode: inode in question
652  * @want_discon:  flag, used by d_splice_alias, to request
653  *          that only a DISCONNECTED alias be returned.
654  *
655  * If inode has a hashed alias, or is a directory and has any alias,
656  * acquire the reference to alias and return it. Otherwise return NULL.
657  * Notice that if inode is a directory there can be only one alias and
658  * it can be unhashed only if it has no children, or if it is the root
659  * of a filesystem.
660  *
661  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
662  * any other hashed alias over that one unless @want_discon is set,
663  * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
664  */
665 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
666 {
667 	struct dentry *alias, *discon_alias;
668 
669 again:
670 	discon_alias = NULL;
671 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
672 		spin_lock(&alias->d_lock);
673  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
674 			if (IS_ROOT(alias) &&
675 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
676 				discon_alias = alias;
677 			} else if (!want_discon) {
678 				__dget_dlock(alias);
679 				spin_unlock(&alias->d_lock);
680 				return alias;
681 			}
682 		}
683 		spin_unlock(&alias->d_lock);
684 	}
685 	if (discon_alias) {
686 		alias = discon_alias;
687 		spin_lock(&alias->d_lock);
688 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
689 			if (IS_ROOT(alias) &&
690 			    (alias->d_flags & DCACHE_DISCONNECTED)) {
691 				__dget_dlock(alias);
692 				spin_unlock(&alias->d_lock);
693 				return alias;
694 			}
695 		}
696 		spin_unlock(&alias->d_lock);
697 		goto again;
698 	}
699 	return NULL;
700 }
701 
702 struct dentry *d_find_alias(struct inode *inode)
703 {
704 	struct dentry *de = NULL;
705 
706 	if (!list_empty(&inode->i_dentry)) {
707 		spin_lock(&inode->i_lock);
708 		de = __d_find_alias(inode, 0);
709 		spin_unlock(&inode->i_lock);
710 	}
711 	return de;
712 }
713 EXPORT_SYMBOL(d_find_alias);
714 
715 /*
716  *	Try to kill dentries associated with this inode.
717  * WARNING: you must own a reference to inode.
718  */
719 void d_prune_aliases(struct inode *inode)
720 {
721 	struct dentry *dentry;
722 restart:
723 	spin_lock(&inode->i_lock);
724 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
725 		spin_lock(&dentry->d_lock);
726 		if (!dentry->d_count) {
727 			__dget_dlock(dentry);
728 			__d_drop(dentry);
729 			spin_unlock(&dentry->d_lock);
730 			spin_unlock(&inode->i_lock);
731 			dput(dentry);
732 			goto restart;
733 		}
734 		spin_unlock(&dentry->d_lock);
735 	}
736 	spin_unlock(&inode->i_lock);
737 }
738 EXPORT_SYMBOL(d_prune_aliases);
739 
740 /*
741  * Try to throw away a dentry - free the inode, dput the parent.
742  * Requires dentry->d_lock is held, and dentry->d_count == 0.
743  * Releases dentry->d_lock.
744  *
745  * This may fail if locks cannot be acquired no problem, just try again.
746  */
747 static void try_prune_one_dentry(struct dentry *dentry)
748 	__releases(dentry->d_lock)
749 {
750 	struct dentry *parent;
751 
752 	parent = dentry_kill(dentry, 0);
753 	/*
754 	 * If dentry_kill returns NULL, we have nothing more to do.
755 	 * if it returns the same dentry, trylocks failed. In either
756 	 * case, just loop again.
757 	 *
758 	 * Otherwise, we need to prune ancestors too. This is necessary
759 	 * to prevent quadratic behavior of shrink_dcache_parent(), but
760 	 * is also expected to be beneficial in reducing dentry cache
761 	 * fragmentation.
762 	 */
763 	if (!parent)
764 		return;
765 	if (parent == dentry)
766 		return;
767 
768 	/* Prune ancestors. */
769 	dentry = parent;
770 	while (dentry) {
771 		spin_lock(&dentry->d_lock);
772 		if (dentry->d_count > 1) {
773 			dentry->d_count--;
774 			spin_unlock(&dentry->d_lock);
775 			return;
776 		}
777 		dentry = dentry_kill(dentry, 1);
778 	}
779 }
780 
781 static void shrink_dentry_list(struct list_head *list)
782 {
783 	struct dentry *dentry;
784 
785 	rcu_read_lock();
786 	for (;;) {
787 		dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
788 		if (&dentry->d_lru == list)
789 			break; /* empty */
790 		spin_lock(&dentry->d_lock);
791 		if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
792 			spin_unlock(&dentry->d_lock);
793 			continue;
794 		}
795 
796 		/*
797 		 * We found an inuse dentry which was not removed from
798 		 * the LRU because of laziness during lookup.  Do not free
799 		 * it - just keep it off the LRU list.
800 		 */
801 		if (dentry->d_count) {
802 			dentry_lru_del(dentry);
803 			spin_unlock(&dentry->d_lock);
804 			continue;
805 		}
806 
807 		rcu_read_unlock();
808 
809 		try_prune_one_dentry(dentry);
810 
811 		rcu_read_lock();
812 	}
813 	rcu_read_unlock();
814 }
815 
816 /**
817  * prune_dcache_sb - shrink the dcache
818  * @sb: superblock
819  * @count: number of entries to try to free
820  *
821  * Attempt to shrink the superblock dcache LRU by @count entries. This is
822  * done when we need more memory an called from the superblock shrinker
823  * function.
824  *
825  * This function may fail to free any resources if all the dentries are in
826  * use.
827  */
828 void prune_dcache_sb(struct super_block *sb, int count)
829 {
830 	struct dentry *dentry;
831 	LIST_HEAD(referenced);
832 	LIST_HEAD(tmp);
833 
834 relock:
835 	spin_lock(&dcache_lru_lock);
836 	while (!list_empty(&sb->s_dentry_lru)) {
837 		dentry = list_entry(sb->s_dentry_lru.prev,
838 				struct dentry, d_lru);
839 		BUG_ON(dentry->d_sb != sb);
840 
841 		if (!spin_trylock(&dentry->d_lock)) {
842 			spin_unlock(&dcache_lru_lock);
843 			cpu_relax();
844 			goto relock;
845 		}
846 
847 		if (dentry->d_flags & DCACHE_REFERENCED) {
848 			dentry->d_flags &= ~DCACHE_REFERENCED;
849 			list_move(&dentry->d_lru, &referenced);
850 			spin_unlock(&dentry->d_lock);
851 		} else {
852 			list_move_tail(&dentry->d_lru, &tmp);
853 			dentry->d_flags |= DCACHE_SHRINK_LIST;
854 			spin_unlock(&dentry->d_lock);
855 			if (!--count)
856 				break;
857 		}
858 		cond_resched_lock(&dcache_lru_lock);
859 	}
860 	if (!list_empty(&referenced))
861 		list_splice(&referenced, &sb->s_dentry_lru);
862 	spin_unlock(&dcache_lru_lock);
863 
864 	shrink_dentry_list(&tmp);
865 }
866 
867 /**
868  * shrink_dcache_sb - shrink dcache for a superblock
869  * @sb: superblock
870  *
871  * Shrink the dcache for the specified super block. This is used to free
872  * the dcache before unmounting a file system.
873  */
874 void shrink_dcache_sb(struct super_block *sb)
875 {
876 	LIST_HEAD(tmp);
877 
878 	spin_lock(&dcache_lru_lock);
879 	while (!list_empty(&sb->s_dentry_lru)) {
880 		list_splice_init(&sb->s_dentry_lru, &tmp);
881 		spin_unlock(&dcache_lru_lock);
882 		shrink_dentry_list(&tmp);
883 		spin_lock(&dcache_lru_lock);
884 	}
885 	spin_unlock(&dcache_lru_lock);
886 }
887 EXPORT_SYMBOL(shrink_dcache_sb);
888 
889 /*
890  * destroy a single subtree of dentries for unmount
891  * - see the comments on shrink_dcache_for_umount() for a description of the
892  *   locking
893  */
894 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
895 {
896 	struct dentry *parent;
897 
898 	BUG_ON(!IS_ROOT(dentry));
899 
900 	for (;;) {
901 		/* descend to the first leaf in the current subtree */
902 		while (!list_empty(&dentry->d_subdirs))
903 			dentry = list_entry(dentry->d_subdirs.next,
904 					    struct dentry, d_u.d_child);
905 
906 		/* consume the dentries from this leaf up through its parents
907 		 * until we find one with children or run out altogether */
908 		do {
909 			struct inode *inode;
910 
911 			/*
912 			 * remove the dentry from the lru, and inform
913 			 * the fs that this dentry is about to be
914 			 * unhashed and destroyed.
915 			 */
916 			dentry_lru_prune(dentry);
917 			__d_shrink(dentry);
918 
919 			if (dentry->d_count != 0) {
920 				printk(KERN_ERR
921 				       "BUG: Dentry %p{i=%lx,n=%s}"
922 				       " still in use (%d)"
923 				       " [unmount of %s %s]\n",
924 				       dentry,
925 				       dentry->d_inode ?
926 				       dentry->d_inode->i_ino : 0UL,
927 				       dentry->d_name.name,
928 				       dentry->d_count,
929 				       dentry->d_sb->s_type->name,
930 				       dentry->d_sb->s_id);
931 				BUG();
932 			}
933 
934 			if (IS_ROOT(dentry)) {
935 				parent = NULL;
936 				list_del(&dentry->d_u.d_child);
937 			} else {
938 				parent = dentry->d_parent;
939 				parent->d_count--;
940 				list_del(&dentry->d_u.d_child);
941 			}
942 
943 			inode = dentry->d_inode;
944 			if (inode) {
945 				dentry->d_inode = NULL;
946 				list_del_init(&dentry->d_alias);
947 				if (dentry->d_op && dentry->d_op->d_iput)
948 					dentry->d_op->d_iput(dentry, inode);
949 				else
950 					iput(inode);
951 			}
952 
953 			d_free(dentry);
954 
955 			/* finished when we fall off the top of the tree,
956 			 * otherwise we ascend to the parent and move to the
957 			 * next sibling if there is one */
958 			if (!parent)
959 				return;
960 			dentry = parent;
961 		} while (list_empty(&dentry->d_subdirs));
962 
963 		dentry = list_entry(dentry->d_subdirs.next,
964 				    struct dentry, d_u.d_child);
965 	}
966 }
967 
968 /*
969  * destroy the dentries attached to a superblock on unmounting
970  * - we don't need to use dentry->d_lock because:
971  *   - the superblock is detached from all mountings and open files, so the
972  *     dentry trees will not be rearranged by the VFS
973  *   - s_umount is write-locked, so the memory pressure shrinker will ignore
974  *     any dentries belonging to this superblock that it comes across
975  *   - the filesystem itself is no longer permitted to rearrange the dentries
976  *     in this superblock
977  */
978 void shrink_dcache_for_umount(struct super_block *sb)
979 {
980 	struct dentry *dentry;
981 
982 	if (down_read_trylock(&sb->s_umount))
983 		BUG();
984 
985 	dentry = sb->s_root;
986 	sb->s_root = NULL;
987 	dentry->d_count--;
988 	shrink_dcache_for_umount_subtree(dentry);
989 
990 	while (!hlist_bl_empty(&sb->s_anon)) {
991 		dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
992 		shrink_dcache_for_umount_subtree(dentry);
993 	}
994 }
995 
996 /*
997  * This tries to ascend one level of parenthood, but
998  * we can race with renaming, so we need to re-check
999  * the parenthood after dropping the lock and check
1000  * that the sequence number still matches.
1001  */
1002 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1003 {
1004 	struct dentry *new = old->d_parent;
1005 
1006 	rcu_read_lock();
1007 	spin_unlock(&old->d_lock);
1008 	spin_lock(&new->d_lock);
1009 
1010 	/*
1011 	 * might go back up the wrong parent if we have had a rename
1012 	 * or deletion
1013 	 */
1014 	if (new != old->d_parent ||
1015 		 (old->d_flags & DCACHE_DISCONNECTED) ||
1016 		 (!locked && read_seqretry(&rename_lock, seq))) {
1017 		spin_unlock(&new->d_lock);
1018 		new = NULL;
1019 	}
1020 	rcu_read_unlock();
1021 	return new;
1022 }
1023 
1024 
1025 /*
1026  * Search for at least 1 mount point in the dentry's subdirs.
1027  * We descend to the next level whenever the d_subdirs
1028  * list is non-empty and continue searching.
1029  */
1030 
1031 /**
1032  * have_submounts - check for mounts over a dentry
1033  * @parent: dentry to check.
1034  *
1035  * Return true if the parent or its subdirectories contain
1036  * a mount point
1037  */
1038 int have_submounts(struct dentry *parent)
1039 {
1040 	struct dentry *this_parent;
1041 	struct list_head *next;
1042 	unsigned seq;
1043 	int locked = 0;
1044 
1045 	seq = read_seqbegin(&rename_lock);
1046 again:
1047 	this_parent = parent;
1048 
1049 	if (d_mountpoint(parent))
1050 		goto positive;
1051 	spin_lock(&this_parent->d_lock);
1052 repeat:
1053 	next = this_parent->d_subdirs.next;
1054 resume:
1055 	while (next != &this_parent->d_subdirs) {
1056 		struct list_head *tmp = next;
1057 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1058 		next = tmp->next;
1059 
1060 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1061 		/* Have we found a mount point ? */
1062 		if (d_mountpoint(dentry)) {
1063 			spin_unlock(&dentry->d_lock);
1064 			spin_unlock(&this_parent->d_lock);
1065 			goto positive;
1066 		}
1067 		if (!list_empty(&dentry->d_subdirs)) {
1068 			spin_unlock(&this_parent->d_lock);
1069 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1070 			this_parent = dentry;
1071 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1072 			goto repeat;
1073 		}
1074 		spin_unlock(&dentry->d_lock);
1075 	}
1076 	/*
1077 	 * All done at this level ... ascend and resume the search.
1078 	 */
1079 	if (this_parent != parent) {
1080 		struct dentry *child = this_parent;
1081 		this_parent = try_to_ascend(this_parent, locked, seq);
1082 		if (!this_parent)
1083 			goto rename_retry;
1084 		next = child->d_u.d_child.next;
1085 		goto resume;
1086 	}
1087 	spin_unlock(&this_parent->d_lock);
1088 	if (!locked && read_seqretry(&rename_lock, seq))
1089 		goto rename_retry;
1090 	if (locked)
1091 		write_sequnlock(&rename_lock);
1092 	return 0; /* No mount points found in tree */
1093 positive:
1094 	if (!locked && read_seqretry(&rename_lock, seq))
1095 		goto rename_retry;
1096 	if (locked)
1097 		write_sequnlock(&rename_lock);
1098 	return 1;
1099 
1100 rename_retry:
1101 	locked = 1;
1102 	write_seqlock(&rename_lock);
1103 	goto again;
1104 }
1105 EXPORT_SYMBOL(have_submounts);
1106 
1107 /*
1108  * Search the dentry child list for the specified parent,
1109  * and move any unused dentries to the end of the unused
1110  * list for prune_dcache(). We descend to the next level
1111  * whenever the d_subdirs list is non-empty and continue
1112  * searching.
1113  *
1114  * It returns zero iff there are no unused children,
1115  * otherwise  it returns the number of children moved to
1116  * the end of the unused list. This may not be the total
1117  * number of unused children, because select_parent can
1118  * drop the lock and return early due to latency
1119  * constraints.
1120  */
1121 static int select_parent(struct dentry *parent, struct list_head *dispose)
1122 {
1123 	struct dentry *this_parent;
1124 	struct list_head *next;
1125 	unsigned seq;
1126 	int found = 0;
1127 	int locked = 0;
1128 
1129 	seq = read_seqbegin(&rename_lock);
1130 again:
1131 	this_parent = parent;
1132 	spin_lock(&this_parent->d_lock);
1133 repeat:
1134 	next = this_parent->d_subdirs.next;
1135 resume:
1136 	while (next != &this_parent->d_subdirs) {
1137 		struct list_head *tmp = next;
1138 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1139 		next = tmp->next;
1140 
1141 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1142 
1143 		/*
1144 		 * move only zero ref count dentries to the dispose list.
1145 		 *
1146 		 * Those which are presently on the shrink list, being processed
1147 		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1148 		 * loop in shrink_dcache_parent() might not make any progress
1149 		 * and loop forever.
1150 		 */
1151 		if (dentry->d_count) {
1152 			dentry_lru_del(dentry);
1153 		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1154 			dentry_lru_move_list(dentry, dispose);
1155 			dentry->d_flags |= DCACHE_SHRINK_LIST;
1156 			found++;
1157 		}
1158 		/*
1159 		 * We can return to the caller if we have found some (this
1160 		 * ensures forward progress). We'll be coming back to find
1161 		 * the rest.
1162 		 */
1163 		if (found && need_resched()) {
1164 			spin_unlock(&dentry->d_lock);
1165 			goto out;
1166 		}
1167 
1168 		/*
1169 		 * Descend a level if the d_subdirs list is non-empty.
1170 		 */
1171 		if (!list_empty(&dentry->d_subdirs)) {
1172 			spin_unlock(&this_parent->d_lock);
1173 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1174 			this_parent = dentry;
1175 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1176 			goto repeat;
1177 		}
1178 
1179 		spin_unlock(&dentry->d_lock);
1180 	}
1181 	/*
1182 	 * All done at this level ... ascend and resume the search.
1183 	 */
1184 	if (this_parent != parent) {
1185 		struct dentry *child = this_parent;
1186 		this_parent = try_to_ascend(this_parent, locked, seq);
1187 		if (!this_parent)
1188 			goto rename_retry;
1189 		next = child->d_u.d_child.next;
1190 		goto resume;
1191 	}
1192 out:
1193 	spin_unlock(&this_parent->d_lock);
1194 	if (!locked && read_seqretry(&rename_lock, seq))
1195 		goto rename_retry;
1196 	if (locked)
1197 		write_sequnlock(&rename_lock);
1198 	return found;
1199 
1200 rename_retry:
1201 	if (found)
1202 		return found;
1203 	locked = 1;
1204 	write_seqlock(&rename_lock);
1205 	goto again;
1206 }
1207 
1208 /**
1209  * shrink_dcache_parent - prune dcache
1210  * @parent: parent of entries to prune
1211  *
1212  * Prune the dcache to remove unused children of the parent dentry.
1213  */
1214 void shrink_dcache_parent(struct dentry * parent)
1215 {
1216 	LIST_HEAD(dispose);
1217 	int found;
1218 
1219 	while ((found = select_parent(parent, &dispose)) != 0)
1220 		shrink_dentry_list(&dispose);
1221 }
1222 EXPORT_SYMBOL(shrink_dcache_parent);
1223 
1224 /**
1225  * __d_alloc	-	allocate a dcache entry
1226  * @sb: filesystem it will belong to
1227  * @name: qstr of the name
1228  *
1229  * Allocates a dentry. It returns %NULL if there is insufficient memory
1230  * available. On a success the dentry is returned. The name passed in is
1231  * copied and the copy passed in may be reused after this call.
1232  */
1233 
1234 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1235 {
1236 	struct dentry *dentry;
1237 	char *dname;
1238 
1239 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1240 	if (!dentry)
1241 		return NULL;
1242 
1243 	if (name->len > DNAME_INLINE_LEN-1) {
1244 		dname = kmalloc(name->len + 1, GFP_KERNEL);
1245 		if (!dname) {
1246 			kmem_cache_free(dentry_cache, dentry);
1247 			return NULL;
1248 		}
1249 	} else  {
1250 		dname = dentry->d_iname;
1251 	}
1252 	dentry->d_name.name = dname;
1253 
1254 	dentry->d_name.len = name->len;
1255 	dentry->d_name.hash = name->hash;
1256 	memcpy(dname, name->name, name->len);
1257 	dname[name->len] = 0;
1258 
1259 	dentry->d_count = 1;
1260 	dentry->d_flags = 0;
1261 	spin_lock_init(&dentry->d_lock);
1262 	seqcount_init(&dentry->d_seq);
1263 	dentry->d_inode = NULL;
1264 	dentry->d_parent = dentry;
1265 	dentry->d_sb = sb;
1266 	dentry->d_op = NULL;
1267 	dentry->d_fsdata = NULL;
1268 	INIT_HLIST_BL_NODE(&dentry->d_hash);
1269 	INIT_LIST_HEAD(&dentry->d_lru);
1270 	INIT_LIST_HEAD(&dentry->d_subdirs);
1271 	INIT_LIST_HEAD(&dentry->d_alias);
1272 	INIT_LIST_HEAD(&dentry->d_u.d_child);
1273 	d_set_d_op(dentry, dentry->d_sb->s_d_op);
1274 
1275 	this_cpu_inc(nr_dentry);
1276 
1277 	return dentry;
1278 }
1279 
1280 /**
1281  * d_alloc	-	allocate a dcache entry
1282  * @parent: parent of entry to allocate
1283  * @name: qstr of the name
1284  *
1285  * Allocates a dentry. It returns %NULL if there is insufficient memory
1286  * available. On a success the dentry is returned. The name passed in is
1287  * copied and the copy passed in may be reused after this call.
1288  */
1289 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1290 {
1291 	struct dentry *dentry = __d_alloc(parent->d_sb, name);
1292 	if (!dentry)
1293 		return NULL;
1294 
1295 	spin_lock(&parent->d_lock);
1296 	/*
1297 	 * don't need child lock because it is not subject
1298 	 * to concurrency here
1299 	 */
1300 	__dget_dlock(parent);
1301 	dentry->d_parent = parent;
1302 	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1303 	spin_unlock(&parent->d_lock);
1304 
1305 	return dentry;
1306 }
1307 EXPORT_SYMBOL(d_alloc);
1308 
1309 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1310 {
1311 	struct dentry *dentry = __d_alloc(sb, name);
1312 	if (dentry)
1313 		dentry->d_flags |= DCACHE_DISCONNECTED;
1314 	return dentry;
1315 }
1316 EXPORT_SYMBOL(d_alloc_pseudo);
1317 
1318 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1319 {
1320 	struct qstr q;
1321 
1322 	q.name = name;
1323 	q.len = strlen(name);
1324 	q.hash = full_name_hash(q.name, q.len);
1325 	return d_alloc(parent, &q);
1326 }
1327 EXPORT_SYMBOL(d_alloc_name);
1328 
1329 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1330 {
1331 	WARN_ON_ONCE(dentry->d_op);
1332 	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
1333 				DCACHE_OP_COMPARE	|
1334 				DCACHE_OP_REVALIDATE	|
1335 				DCACHE_OP_DELETE ));
1336 	dentry->d_op = op;
1337 	if (!op)
1338 		return;
1339 	if (op->d_hash)
1340 		dentry->d_flags |= DCACHE_OP_HASH;
1341 	if (op->d_compare)
1342 		dentry->d_flags |= DCACHE_OP_COMPARE;
1343 	if (op->d_revalidate)
1344 		dentry->d_flags |= DCACHE_OP_REVALIDATE;
1345 	if (op->d_delete)
1346 		dentry->d_flags |= DCACHE_OP_DELETE;
1347 	if (op->d_prune)
1348 		dentry->d_flags |= DCACHE_OP_PRUNE;
1349 
1350 }
1351 EXPORT_SYMBOL(d_set_d_op);
1352 
1353 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1354 {
1355 	spin_lock(&dentry->d_lock);
1356 	if (inode) {
1357 		if (unlikely(IS_AUTOMOUNT(inode)))
1358 			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1359 		list_add(&dentry->d_alias, &inode->i_dentry);
1360 	}
1361 	dentry->d_inode = inode;
1362 	dentry_rcuwalk_barrier(dentry);
1363 	spin_unlock(&dentry->d_lock);
1364 	fsnotify_d_instantiate(dentry, inode);
1365 }
1366 
1367 /**
1368  * d_instantiate - fill in inode information for a dentry
1369  * @entry: dentry to complete
1370  * @inode: inode to attach to this dentry
1371  *
1372  * Fill in inode information in the entry.
1373  *
1374  * This turns negative dentries into productive full members
1375  * of society.
1376  *
1377  * NOTE! This assumes that the inode count has been incremented
1378  * (or otherwise set) by the caller to indicate that it is now
1379  * in use by the dcache.
1380  */
1381 
1382 void d_instantiate(struct dentry *entry, struct inode * inode)
1383 {
1384 	BUG_ON(!list_empty(&entry->d_alias));
1385 	if (inode)
1386 		spin_lock(&inode->i_lock);
1387 	__d_instantiate(entry, inode);
1388 	if (inode)
1389 		spin_unlock(&inode->i_lock);
1390 	security_d_instantiate(entry, inode);
1391 }
1392 EXPORT_SYMBOL(d_instantiate);
1393 
1394 /**
1395  * d_instantiate_unique - instantiate a non-aliased dentry
1396  * @entry: dentry to instantiate
1397  * @inode: inode to attach to this dentry
1398  *
1399  * Fill in inode information in the entry. On success, it returns NULL.
1400  * If an unhashed alias of "entry" already exists, then we return the
1401  * aliased dentry instead and drop one reference to inode.
1402  *
1403  * Note that in order to avoid conflicts with rename() etc, the caller
1404  * had better be holding the parent directory semaphore.
1405  *
1406  * This also assumes that the inode count has been incremented
1407  * (or otherwise set) by the caller to indicate that it is now
1408  * in use by the dcache.
1409  */
1410 static struct dentry *__d_instantiate_unique(struct dentry *entry,
1411 					     struct inode *inode)
1412 {
1413 	struct dentry *alias;
1414 	int len = entry->d_name.len;
1415 	const char *name = entry->d_name.name;
1416 	unsigned int hash = entry->d_name.hash;
1417 
1418 	if (!inode) {
1419 		__d_instantiate(entry, NULL);
1420 		return NULL;
1421 	}
1422 
1423 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1424 		struct qstr *qstr = &alias->d_name;
1425 
1426 		/*
1427 		 * Don't need alias->d_lock here, because aliases with
1428 		 * d_parent == entry->d_parent are not subject to name or
1429 		 * parent changes, because the parent inode i_mutex is held.
1430 		 */
1431 		if (qstr->hash != hash)
1432 			continue;
1433 		if (alias->d_parent != entry->d_parent)
1434 			continue;
1435 		if (dentry_cmp(qstr->name, qstr->len, name, len))
1436 			continue;
1437 		__dget(alias);
1438 		return alias;
1439 	}
1440 
1441 	__d_instantiate(entry, inode);
1442 	return NULL;
1443 }
1444 
1445 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1446 {
1447 	struct dentry *result;
1448 
1449 	BUG_ON(!list_empty(&entry->d_alias));
1450 
1451 	if (inode)
1452 		spin_lock(&inode->i_lock);
1453 	result = __d_instantiate_unique(entry, inode);
1454 	if (inode)
1455 		spin_unlock(&inode->i_lock);
1456 
1457 	if (!result) {
1458 		security_d_instantiate(entry, inode);
1459 		return NULL;
1460 	}
1461 
1462 	BUG_ON(!d_unhashed(result));
1463 	iput(inode);
1464 	return result;
1465 }
1466 
1467 EXPORT_SYMBOL(d_instantiate_unique);
1468 
1469 struct dentry *d_make_root(struct inode *root_inode)
1470 {
1471 	struct dentry *res = NULL;
1472 
1473 	if (root_inode) {
1474 		static const struct qstr name = { .name = "/", .len = 1 };
1475 
1476 		res = __d_alloc(root_inode->i_sb, &name);
1477 		if (res)
1478 			d_instantiate(res, root_inode);
1479 		else
1480 			iput(root_inode);
1481 	}
1482 	return res;
1483 }
1484 EXPORT_SYMBOL(d_make_root);
1485 
1486 static struct dentry * __d_find_any_alias(struct inode *inode)
1487 {
1488 	struct dentry *alias;
1489 
1490 	if (list_empty(&inode->i_dentry))
1491 		return NULL;
1492 	alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1493 	__dget(alias);
1494 	return alias;
1495 }
1496 
1497 /**
1498  * d_find_any_alias - find any alias for a given inode
1499  * @inode: inode to find an alias for
1500  *
1501  * If any aliases exist for the given inode, take and return a
1502  * reference for one of them.  If no aliases exist, return %NULL.
1503  */
1504 struct dentry *d_find_any_alias(struct inode *inode)
1505 {
1506 	struct dentry *de;
1507 
1508 	spin_lock(&inode->i_lock);
1509 	de = __d_find_any_alias(inode);
1510 	spin_unlock(&inode->i_lock);
1511 	return de;
1512 }
1513 EXPORT_SYMBOL(d_find_any_alias);
1514 
1515 /**
1516  * d_obtain_alias - find or allocate a dentry for a given inode
1517  * @inode: inode to allocate the dentry for
1518  *
1519  * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1520  * similar open by handle operations.  The returned dentry may be anonymous,
1521  * or may have a full name (if the inode was already in the cache).
1522  *
1523  * When called on a directory inode, we must ensure that the inode only ever
1524  * has one dentry.  If a dentry is found, that is returned instead of
1525  * allocating a new one.
1526  *
1527  * On successful return, the reference to the inode has been transferred
1528  * to the dentry.  In case of an error the reference on the inode is released.
1529  * To make it easier to use in export operations a %NULL or IS_ERR inode may
1530  * be passed in and will be the error will be propagate to the return value,
1531  * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1532  */
1533 struct dentry *d_obtain_alias(struct inode *inode)
1534 {
1535 	static const struct qstr anonstring = { .name = "" };
1536 	struct dentry *tmp;
1537 	struct dentry *res;
1538 
1539 	if (!inode)
1540 		return ERR_PTR(-ESTALE);
1541 	if (IS_ERR(inode))
1542 		return ERR_CAST(inode);
1543 
1544 	res = d_find_any_alias(inode);
1545 	if (res)
1546 		goto out_iput;
1547 
1548 	tmp = __d_alloc(inode->i_sb, &anonstring);
1549 	if (!tmp) {
1550 		res = ERR_PTR(-ENOMEM);
1551 		goto out_iput;
1552 	}
1553 
1554 	spin_lock(&inode->i_lock);
1555 	res = __d_find_any_alias(inode);
1556 	if (res) {
1557 		spin_unlock(&inode->i_lock);
1558 		dput(tmp);
1559 		goto out_iput;
1560 	}
1561 
1562 	/* attach a disconnected dentry */
1563 	spin_lock(&tmp->d_lock);
1564 	tmp->d_inode = inode;
1565 	tmp->d_flags |= DCACHE_DISCONNECTED;
1566 	list_add(&tmp->d_alias, &inode->i_dentry);
1567 	hlist_bl_lock(&tmp->d_sb->s_anon);
1568 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1569 	hlist_bl_unlock(&tmp->d_sb->s_anon);
1570 	spin_unlock(&tmp->d_lock);
1571 	spin_unlock(&inode->i_lock);
1572 	security_d_instantiate(tmp, inode);
1573 
1574 	return tmp;
1575 
1576  out_iput:
1577 	if (res && !IS_ERR(res))
1578 		security_d_instantiate(res, inode);
1579 	iput(inode);
1580 	return res;
1581 }
1582 EXPORT_SYMBOL(d_obtain_alias);
1583 
1584 /**
1585  * d_splice_alias - splice a disconnected dentry into the tree if one exists
1586  * @inode:  the inode which may have a disconnected dentry
1587  * @dentry: a negative dentry which we want to point to the inode.
1588  *
1589  * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1590  * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1591  * and return it, else simply d_add the inode to the dentry and return NULL.
1592  *
1593  * This is needed in the lookup routine of any filesystem that is exportable
1594  * (via knfsd) so that we can build dcache paths to directories effectively.
1595  *
1596  * If a dentry was found and moved, then it is returned.  Otherwise NULL
1597  * is returned.  This matches the expected return value of ->lookup.
1598  *
1599  */
1600 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1601 {
1602 	struct dentry *new = NULL;
1603 
1604 	if (IS_ERR(inode))
1605 		return ERR_CAST(inode);
1606 
1607 	if (inode && S_ISDIR(inode->i_mode)) {
1608 		spin_lock(&inode->i_lock);
1609 		new = __d_find_alias(inode, 1);
1610 		if (new) {
1611 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1612 			spin_unlock(&inode->i_lock);
1613 			security_d_instantiate(new, inode);
1614 			d_move(new, dentry);
1615 			iput(inode);
1616 		} else {
1617 			/* already taking inode->i_lock, so d_add() by hand */
1618 			__d_instantiate(dentry, inode);
1619 			spin_unlock(&inode->i_lock);
1620 			security_d_instantiate(dentry, inode);
1621 			d_rehash(dentry);
1622 		}
1623 	} else
1624 		d_add(dentry, inode);
1625 	return new;
1626 }
1627 EXPORT_SYMBOL(d_splice_alias);
1628 
1629 /**
1630  * d_add_ci - lookup or allocate new dentry with case-exact name
1631  * @inode:  the inode case-insensitive lookup has found
1632  * @dentry: the negative dentry that was passed to the parent's lookup func
1633  * @name:   the case-exact name to be associated with the returned dentry
1634  *
1635  * This is to avoid filling the dcache with case-insensitive names to the
1636  * same inode, only the actual correct case is stored in the dcache for
1637  * case-insensitive filesystems.
1638  *
1639  * For a case-insensitive lookup match and if the the case-exact dentry
1640  * already exists in in the dcache, use it and return it.
1641  *
1642  * If no entry exists with the exact case name, allocate new dentry with
1643  * the exact case, and return the spliced entry.
1644  */
1645 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1646 			struct qstr *name)
1647 {
1648 	int error;
1649 	struct dentry *found;
1650 	struct dentry *new;
1651 
1652 	/*
1653 	 * First check if a dentry matching the name already exists,
1654 	 * if not go ahead and create it now.
1655 	 */
1656 	found = d_hash_and_lookup(dentry->d_parent, name);
1657 	if (!found) {
1658 		new = d_alloc(dentry->d_parent, name);
1659 		if (!new) {
1660 			error = -ENOMEM;
1661 			goto err_out;
1662 		}
1663 
1664 		found = d_splice_alias(inode, new);
1665 		if (found) {
1666 			dput(new);
1667 			return found;
1668 		}
1669 		return new;
1670 	}
1671 
1672 	/*
1673 	 * If a matching dentry exists, and it's not negative use it.
1674 	 *
1675 	 * Decrement the reference count to balance the iget() done
1676 	 * earlier on.
1677 	 */
1678 	if (found->d_inode) {
1679 		if (unlikely(found->d_inode != inode)) {
1680 			/* This can't happen because bad inodes are unhashed. */
1681 			BUG_ON(!is_bad_inode(inode));
1682 			BUG_ON(!is_bad_inode(found->d_inode));
1683 		}
1684 		iput(inode);
1685 		return found;
1686 	}
1687 
1688 	/*
1689 	 * We are going to instantiate this dentry, unhash it and clear the
1690 	 * lookup flag so we can do that.
1691 	 */
1692 	if (unlikely(d_need_lookup(found)))
1693 		d_clear_need_lookup(found);
1694 
1695 	/*
1696 	 * Negative dentry: instantiate it unless the inode is a directory and
1697 	 * already has a dentry.
1698 	 */
1699 	new = d_splice_alias(inode, found);
1700 	if (new) {
1701 		dput(found);
1702 		found = new;
1703 	}
1704 	return found;
1705 
1706 err_out:
1707 	iput(inode);
1708 	return ERR_PTR(error);
1709 }
1710 EXPORT_SYMBOL(d_add_ci);
1711 
1712 /**
1713  * __d_lookup_rcu - search for a dentry (racy, store-free)
1714  * @parent: parent dentry
1715  * @name: qstr of name we wish to find
1716  * @seqp: returns d_seq value at the point where the dentry was found
1717  * @inode: returns dentry->d_inode when the inode was found valid.
1718  * Returns: dentry, or NULL
1719  *
1720  * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1721  * resolution (store-free path walking) design described in
1722  * Documentation/filesystems/path-lookup.txt.
1723  *
1724  * This is not to be used outside core vfs.
1725  *
1726  * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1727  * held, and rcu_read_lock held. The returned dentry must not be stored into
1728  * without taking d_lock and checking d_seq sequence count against @seq
1729  * returned here.
1730  *
1731  * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1732  * function.
1733  *
1734  * Alternatively, __d_lookup_rcu may be called again to look up the child of
1735  * the returned dentry, so long as its parent's seqlock is checked after the
1736  * child is looked up. Thus, an interlocking stepping of sequence lock checks
1737  * is formed, giving integrity down the path walk.
1738  */
1739 struct dentry *__d_lookup_rcu(const struct dentry *parent,
1740 				const struct qstr *name,
1741 				unsigned *seqp, struct inode **inode)
1742 {
1743 	unsigned int len = name->len;
1744 	unsigned int hash = name->hash;
1745 	const unsigned char *str = name->name;
1746 	struct hlist_bl_head *b = d_hash(parent, hash);
1747 	struct hlist_bl_node *node;
1748 	struct dentry *dentry;
1749 
1750 	/*
1751 	 * Note: There is significant duplication with __d_lookup_rcu which is
1752 	 * required to prevent single threaded performance regressions
1753 	 * especially on architectures where smp_rmb (in seqcounts) are costly.
1754 	 * Keep the two functions in sync.
1755 	 */
1756 
1757 	/*
1758 	 * The hash list is protected using RCU.
1759 	 *
1760 	 * Carefully use d_seq when comparing a candidate dentry, to avoid
1761 	 * races with d_move().
1762 	 *
1763 	 * It is possible that concurrent renames can mess up our list
1764 	 * walk here and result in missing our dentry, resulting in the
1765 	 * false-negative result. d_lookup() protects against concurrent
1766 	 * renames using rename_lock seqlock.
1767 	 *
1768 	 * See Documentation/filesystems/path-lookup.txt for more details.
1769 	 */
1770 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1771 		unsigned seq;
1772 		struct inode *i;
1773 		const char *tname;
1774 		int tlen;
1775 
1776 		if (dentry->d_name.hash != hash)
1777 			continue;
1778 
1779 seqretry:
1780 		seq = read_seqcount_begin(&dentry->d_seq);
1781 		if (dentry->d_parent != parent)
1782 			continue;
1783 		if (d_unhashed(dentry))
1784 			continue;
1785 		tlen = dentry->d_name.len;
1786 		tname = dentry->d_name.name;
1787 		i = dentry->d_inode;
1788 		prefetch(tname);
1789 		/*
1790 		 * This seqcount check is required to ensure name and
1791 		 * len are loaded atomically, so as not to walk off the
1792 		 * edge of memory when walking. If we could load this
1793 		 * atomically some other way, we could drop this check.
1794 		 */
1795 		if (read_seqcount_retry(&dentry->d_seq, seq))
1796 			goto seqretry;
1797 		if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1798 			if (parent->d_op->d_compare(parent, *inode,
1799 						dentry, i,
1800 						tlen, tname, name))
1801 				continue;
1802 		} else {
1803 			if (dentry_cmp(tname, tlen, str, len))
1804 				continue;
1805 		}
1806 		/*
1807 		 * No extra seqcount check is required after the name
1808 		 * compare. The caller must perform a seqcount check in
1809 		 * order to do anything useful with the returned dentry
1810 		 * anyway.
1811 		 */
1812 		*seqp = seq;
1813 		*inode = i;
1814 		return dentry;
1815 	}
1816 	return NULL;
1817 }
1818 
1819 /**
1820  * d_lookup - search for a dentry
1821  * @parent: parent dentry
1822  * @name: qstr of name we wish to find
1823  * Returns: dentry, or NULL
1824  *
1825  * d_lookup searches the children of the parent dentry for the name in
1826  * question. If the dentry is found its reference count is incremented and the
1827  * dentry is returned. The caller must use dput to free the entry when it has
1828  * finished using it. %NULL is returned if the dentry does not exist.
1829  */
1830 struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1831 {
1832 	struct dentry *dentry;
1833 	unsigned seq;
1834 
1835         do {
1836                 seq = read_seqbegin(&rename_lock);
1837                 dentry = __d_lookup(parent, name);
1838                 if (dentry)
1839 			break;
1840 	} while (read_seqretry(&rename_lock, seq));
1841 	return dentry;
1842 }
1843 EXPORT_SYMBOL(d_lookup);
1844 
1845 /**
1846  * __d_lookup - search for a dentry (racy)
1847  * @parent: parent dentry
1848  * @name: qstr of name we wish to find
1849  * Returns: dentry, or NULL
1850  *
1851  * __d_lookup is like d_lookup, however it may (rarely) return a
1852  * false-negative result due to unrelated rename activity.
1853  *
1854  * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1855  * however it must be used carefully, eg. with a following d_lookup in
1856  * the case of failure.
1857  *
1858  * __d_lookup callers must be commented.
1859  */
1860 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1861 {
1862 	unsigned int len = name->len;
1863 	unsigned int hash = name->hash;
1864 	const unsigned char *str = name->name;
1865 	struct hlist_bl_head *b = d_hash(parent, hash);
1866 	struct hlist_bl_node *node;
1867 	struct dentry *found = NULL;
1868 	struct dentry *dentry;
1869 
1870 	/*
1871 	 * Note: There is significant duplication with __d_lookup_rcu which is
1872 	 * required to prevent single threaded performance regressions
1873 	 * especially on architectures where smp_rmb (in seqcounts) are costly.
1874 	 * Keep the two functions in sync.
1875 	 */
1876 
1877 	/*
1878 	 * The hash list is protected using RCU.
1879 	 *
1880 	 * Take d_lock when comparing a candidate dentry, to avoid races
1881 	 * with d_move().
1882 	 *
1883 	 * It is possible that concurrent renames can mess up our list
1884 	 * walk here and result in missing our dentry, resulting in the
1885 	 * false-negative result. d_lookup() protects against concurrent
1886 	 * renames using rename_lock seqlock.
1887 	 *
1888 	 * See Documentation/filesystems/path-lookup.txt for more details.
1889 	 */
1890 	rcu_read_lock();
1891 
1892 	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1893 		const char *tname;
1894 		int tlen;
1895 
1896 		if (dentry->d_name.hash != hash)
1897 			continue;
1898 
1899 		spin_lock(&dentry->d_lock);
1900 		if (dentry->d_parent != parent)
1901 			goto next;
1902 		if (d_unhashed(dentry))
1903 			goto next;
1904 
1905 		/*
1906 		 * It is safe to compare names since d_move() cannot
1907 		 * change the qstr (protected by d_lock).
1908 		 */
1909 		tlen = dentry->d_name.len;
1910 		tname = dentry->d_name.name;
1911 		if (parent->d_flags & DCACHE_OP_COMPARE) {
1912 			if (parent->d_op->d_compare(parent, parent->d_inode,
1913 						dentry, dentry->d_inode,
1914 						tlen, tname, name))
1915 				goto next;
1916 		} else {
1917 			if (dentry_cmp(tname, tlen, str, len))
1918 				goto next;
1919 		}
1920 
1921 		dentry->d_count++;
1922 		found = dentry;
1923 		spin_unlock(&dentry->d_lock);
1924 		break;
1925 next:
1926 		spin_unlock(&dentry->d_lock);
1927  	}
1928  	rcu_read_unlock();
1929 
1930  	return found;
1931 }
1932 
1933 /**
1934  * d_hash_and_lookup - hash the qstr then search for a dentry
1935  * @dir: Directory to search in
1936  * @name: qstr of name we wish to find
1937  *
1938  * On hash failure or on lookup failure NULL is returned.
1939  */
1940 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1941 {
1942 	struct dentry *dentry = NULL;
1943 
1944 	/*
1945 	 * Check for a fs-specific hash function. Note that we must
1946 	 * calculate the standard hash first, as the d_op->d_hash()
1947 	 * routine may choose to leave the hash value unchanged.
1948 	 */
1949 	name->hash = full_name_hash(name->name, name->len);
1950 	if (dir->d_flags & DCACHE_OP_HASH) {
1951 		if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1952 			goto out;
1953 	}
1954 	dentry = d_lookup(dir, name);
1955 out:
1956 	return dentry;
1957 }
1958 
1959 /**
1960  * d_validate - verify dentry provided from insecure source (deprecated)
1961  * @dentry: The dentry alleged to be valid child of @dparent
1962  * @dparent: The parent dentry (known to be valid)
1963  *
1964  * An insecure source has sent us a dentry, here we verify it and dget() it.
1965  * This is used by ncpfs in its readdir implementation.
1966  * Zero is returned in the dentry is invalid.
1967  *
1968  * This function is slow for big directories, and deprecated, do not use it.
1969  */
1970 int d_validate(struct dentry *dentry, struct dentry *dparent)
1971 {
1972 	struct dentry *child;
1973 
1974 	spin_lock(&dparent->d_lock);
1975 	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1976 		if (dentry == child) {
1977 			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1978 			__dget_dlock(dentry);
1979 			spin_unlock(&dentry->d_lock);
1980 			spin_unlock(&dparent->d_lock);
1981 			return 1;
1982 		}
1983 	}
1984 	spin_unlock(&dparent->d_lock);
1985 
1986 	return 0;
1987 }
1988 EXPORT_SYMBOL(d_validate);
1989 
1990 /*
1991  * When a file is deleted, we have two options:
1992  * - turn this dentry into a negative dentry
1993  * - unhash this dentry and free it.
1994  *
1995  * Usually, we want to just turn this into
1996  * a negative dentry, but if anybody else is
1997  * currently using the dentry or the inode
1998  * we can't do that and we fall back on removing
1999  * it from the hash queues and waiting for
2000  * it to be deleted later when it has no users
2001  */
2002 
2003 /**
2004  * d_delete - delete a dentry
2005  * @dentry: The dentry to delete
2006  *
2007  * Turn the dentry into a negative dentry if possible, otherwise
2008  * remove it from the hash queues so it can be deleted later
2009  */
2010 
2011 void d_delete(struct dentry * dentry)
2012 {
2013 	struct inode *inode;
2014 	int isdir = 0;
2015 	/*
2016 	 * Are we the only user?
2017 	 */
2018 again:
2019 	spin_lock(&dentry->d_lock);
2020 	inode = dentry->d_inode;
2021 	isdir = S_ISDIR(inode->i_mode);
2022 	if (dentry->d_count == 1) {
2023 		if (inode && !spin_trylock(&inode->i_lock)) {
2024 			spin_unlock(&dentry->d_lock);
2025 			cpu_relax();
2026 			goto again;
2027 		}
2028 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2029 		dentry_unlink_inode(dentry);
2030 		fsnotify_nameremove(dentry, isdir);
2031 		return;
2032 	}
2033 
2034 	if (!d_unhashed(dentry))
2035 		__d_drop(dentry);
2036 
2037 	spin_unlock(&dentry->d_lock);
2038 
2039 	fsnotify_nameremove(dentry, isdir);
2040 }
2041 EXPORT_SYMBOL(d_delete);
2042 
2043 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2044 {
2045 	BUG_ON(!d_unhashed(entry));
2046 	hlist_bl_lock(b);
2047 	entry->d_flags |= DCACHE_RCUACCESS;
2048 	hlist_bl_add_head_rcu(&entry->d_hash, b);
2049 	hlist_bl_unlock(b);
2050 }
2051 
2052 static void _d_rehash(struct dentry * entry)
2053 {
2054 	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2055 }
2056 
2057 /**
2058  * d_rehash	- add an entry back to the hash
2059  * @entry: dentry to add to the hash
2060  *
2061  * Adds a dentry to the hash according to its name.
2062  */
2063 
2064 void d_rehash(struct dentry * entry)
2065 {
2066 	spin_lock(&entry->d_lock);
2067 	_d_rehash(entry);
2068 	spin_unlock(&entry->d_lock);
2069 }
2070 EXPORT_SYMBOL(d_rehash);
2071 
2072 /**
2073  * dentry_update_name_case - update case insensitive dentry with a new name
2074  * @dentry: dentry to be updated
2075  * @name: new name
2076  *
2077  * Update a case insensitive dentry with new case of name.
2078  *
2079  * dentry must have been returned by d_lookup with name @name. Old and new
2080  * name lengths must match (ie. no d_compare which allows mismatched name
2081  * lengths).
2082  *
2083  * Parent inode i_mutex must be held over d_lookup and into this call (to
2084  * keep renames and concurrent inserts, and readdir(2) away).
2085  */
2086 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2087 {
2088 	BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2089 	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2090 
2091 	spin_lock(&dentry->d_lock);
2092 	write_seqcount_begin(&dentry->d_seq);
2093 	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2094 	write_seqcount_end(&dentry->d_seq);
2095 	spin_unlock(&dentry->d_lock);
2096 }
2097 EXPORT_SYMBOL(dentry_update_name_case);
2098 
2099 static void switch_names(struct dentry *dentry, struct dentry *target)
2100 {
2101 	if (dname_external(target)) {
2102 		if (dname_external(dentry)) {
2103 			/*
2104 			 * Both external: swap the pointers
2105 			 */
2106 			swap(target->d_name.name, dentry->d_name.name);
2107 		} else {
2108 			/*
2109 			 * dentry:internal, target:external.  Steal target's
2110 			 * storage and make target internal.
2111 			 */
2112 			memcpy(target->d_iname, dentry->d_name.name,
2113 					dentry->d_name.len + 1);
2114 			dentry->d_name.name = target->d_name.name;
2115 			target->d_name.name = target->d_iname;
2116 		}
2117 	} else {
2118 		if (dname_external(dentry)) {
2119 			/*
2120 			 * dentry:external, target:internal.  Give dentry's
2121 			 * storage to target and make dentry internal
2122 			 */
2123 			memcpy(dentry->d_iname, target->d_name.name,
2124 					target->d_name.len + 1);
2125 			target->d_name.name = dentry->d_name.name;
2126 			dentry->d_name.name = dentry->d_iname;
2127 		} else {
2128 			/*
2129 			 * Both are internal.  Just copy target to dentry
2130 			 */
2131 			memcpy(dentry->d_iname, target->d_name.name,
2132 					target->d_name.len + 1);
2133 			dentry->d_name.len = target->d_name.len;
2134 			return;
2135 		}
2136 	}
2137 	swap(dentry->d_name.len, target->d_name.len);
2138 }
2139 
2140 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2141 {
2142 	/*
2143 	 * XXXX: do we really need to take target->d_lock?
2144 	 */
2145 	if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2146 		spin_lock(&target->d_parent->d_lock);
2147 	else {
2148 		if (d_ancestor(dentry->d_parent, target->d_parent)) {
2149 			spin_lock(&dentry->d_parent->d_lock);
2150 			spin_lock_nested(&target->d_parent->d_lock,
2151 						DENTRY_D_LOCK_NESTED);
2152 		} else {
2153 			spin_lock(&target->d_parent->d_lock);
2154 			spin_lock_nested(&dentry->d_parent->d_lock,
2155 						DENTRY_D_LOCK_NESTED);
2156 		}
2157 	}
2158 	if (target < dentry) {
2159 		spin_lock_nested(&target->d_lock, 2);
2160 		spin_lock_nested(&dentry->d_lock, 3);
2161 	} else {
2162 		spin_lock_nested(&dentry->d_lock, 2);
2163 		spin_lock_nested(&target->d_lock, 3);
2164 	}
2165 }
2166 
2167 static void dentry_unlock_parents_for_move(struct dentry *dentry,
2168 					struct dentry *target)
2169 {
2170 	if (target->d_parent != dentry->d_parent)
2171 		spin_unlock(&dentry->d_parent->d_lock);
2172 	if (target->d_parent != target)
2173 		spin_unlock(&target->d_parent->d_lock);
2174 }
2175 
2176 /*
2177  * When switching names, the actual string doesn't strictly have to
2178  * be preserved in the target - because we're dropping the target
2179  * anyway. As such, we can just do a simple memcpy() to copy over
2180  * the new name before we switch.
2181  *
2182  * Note that we have to be a lot more careful about getting the hash
2183  * switched - we have to switch the hash value properly even if it
2184  * then no longer matches the actual (corrupted) string of the target.
2185  * The hash value has to match the hash queue that the dentry is on..
2186  */
2187 /*
2188  * __d_move - move a dentry
2189  * @dentry: entry to move
2190  * @target: new dentry
2191  *
2192  * Update the dcache to reflect the move of a file name. Negative
2193  * dcache entries should not be moved in this way. Caller must hold
2194  * rename_lock, the i_mutex of the source and target directories,
2195  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2196  */
2197 static void __d_move(struct dentry * dentry, struct dentry * target)
2198 {
2199 	if (!dentry->d_inode)
2200 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2201 
2202 	BUG_ON(d_ancestor(dentry, target));
2203 	BUG_ON(d_ancestor(target, dentry));
2204 
2205 	dentry_lock_for_move(dentry, target);
2206 
2207 	write_seqcount_begin(&dentry->d_seq);
2208 	write_seqcount_begin(&target->d_seq);
2209 
2210 	/* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2211 
2212 	/*
2213 	 * Move the dentry to the target hash queue. Don't bother checking
2214 	 * for the same hash queue because of how unlikely it is.
2215 	 */
2216 	__d_drop(dentry);
2217 	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2218 
2219 	/* Unhash the target: dput() will then get rid of it */
2220 	__d_drop(target);
2221 
2222 	list_del(&dentry->d_u.d_child);
2223 	list_del(&target->d_u.d_child);
2224 
2225 	/* Switch the names.. */
2226 	switch_names(dentry, target);
2227 	swap(dentry->d_name.hash, target->d_name.hash);
2228 
2229 	/* ... and switch the parents */
2230 	if (IS_ROOT(dentry)) {
2231 		dentry->d_parent = target->d_parent;
2232 		target->d_parent = target;
2233 		INIT_LIST_HEAD(&target->d_u.d_child);
2234 	} else {
2235 		swap(dentry->d_parent, target->d_parent);
2236 
2237 		/* And add them back to the (new) parent lists */
2238 		list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2239 	}
2240 
2241 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2242 
2243 	write_seqcount_end(&target->d_seq);
2244 	write_seqcount_end(&dentry->d_seq);
2245 
2246 	dentry_unlock_parents_for_move(dentry, target);
2247 	spin_unlock(&target->d_lock);
2248 	fsnotify_d_move(dentry);
2249 	spin_unlock(&dentry->d_lock);
2250 }
2251 
2252 /*
2253  * d_move - move a dentry
2254  * @dentry: entry to move
2255  * @target: new dentry
2256  *
2257  * Update the dcache to reflect the move of a file name. Negative
2258  * dcache entries should not be moved in this way. See the locking
2259  * requirements for __d_move.
2260  */
2261 void d_move(struct dentry *dentry, struct dentry *target)
2262 {
2263 	write_seqlock(&rename_lock);
2264 	__d_move(dentry, target);
2265 	write_sequnlock(&rename_lock);
2266 }
2267 EXPORT_SYMBOL(d_move);
2268 
2269 /**
2270  * d_ancestor - search for an ancestor
2271  * @p1: ancestor dentry
2272  * @p2: child dentry
2273  *
2274  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2275  * an ancestor of p2, else NULL.
2276  */
2277 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2278 {
2279 	struct dentry *p;
2280 
2281 	for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2282 		if (p->d_parent == p1)
2283 			return p;
2284 	}
2285 	return NULL;
2286 }
2287 
2288 /*
2289  * This helper attempts to cope with remotely renamed directories
2290  *
2291  * It assumes that the caller is already holding
2292  * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2293  *
2294  * Note: If ever the locking in lock_rename() changes, then please
2295  * remember to update this too...
2296  */
2297 static struct dentry *__d_unalias(struct inode *inode,
2298 		struct dentry *dentry, struct dentry *alias)
2299 {
2300 	struct mutex *m1 = NULL, *m2 = NULL;
2301 	struct dentry *ret;
2302 
2303 	/* If alias and dentry share a parent, then no extra locks required */
2304 	if (alias->d_parent == dentry->d_parent)
2305 		goto out_unalias;
2306 
2307 	/* See lock_rename() */
2308 	ret = ERR_PTR(-EBUSY);
2309 	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2310 		goto out_err;
2311 	m1 = &dentry->d_sb->s_vfs_rename_mutex;
2312 	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2313 		goto out_err;
2314 	m2 = &alias->d_parent->d_inode->i_mutex;
2315 out_unalias:
2316 	__d_move(alias, dentry);
2317 	ret = alias;
2318 out_err:
2319 	spin_unlock(&inode->i_lock);
2320 	if (m2)
2321 		mutex_unlock(m2);
2322 	if (m1)
2323 		mutex_unlock(m1);
2324 	return ret;
2325 }
2326 
2327 /*
2328  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2329  * named dentry in place of the dentry to be replaced.
2330  * returns with anon->d_lock held!
2331  */
2332 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2333 {
2334 	struct dentry *dparent, *aparent;
2335 
2336 	dentry_lock_for_move(anon, dentry);
2337 
2338 	write_seqcount_begin(&dentry->d_seq);
2339 	write_seqcount_begin(&anon->d_seq);
2340 
2341 	dparent = dentry->d_parent;
2342 	aparent = anon->d_parent;
2343 
2344 	switch_names(dentry, anon);
2345 	swap(dentry->d_name.hash, anon->d_name.hash);
2346 
2347 	dentry->d_parent = (aparent == anon) ? dentry : aparent;
2348 	list_del(&dentry->d_u.d_child);
2349 	if (!IS_ROOT(dentry))
2350 		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2351 	else
2352 		INIT_LIST_HEAD(&dentry->d_u.d_child);
2353 
2354 	anon->d_parent = (dparent == dentry) ? anon : dparent;
2355 	list_del(&anon->d_u.d_child);
2356 	if (!IS_ROOT(anon))
2357 		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2358 	else
2359 		INIT_LIST_HEAD(&anon->d_u.d_child);
2360 
2361 	write_seqcount_end(&dentry->d_seq);
2362 	write_seqcount_end(&anon->d_seq);
2363 
2364 	dentry_unlock_parents_for_move(anon, dentry);
2365 	spin_unlock(&dentry->d_lock);
2366 
2367 	/* anon->d_lock still locked, returns locked */
2368 	anon->d_flags &= ~DCACHE_DISCONNECTED;
2369 }
2370 
2371 /**
2372  * d_materialise_unique - introduce an inode into the tree
2373  * @dentry: candidate dentry
2374  * @inode: inode to bind to the dentry, to which aliases may be attached
2375  *
2376  * Introduces an dentry into the tree, substituting an extant disconnected
2377  * root directory alias in its place if there is one. Caller must hold the
2378  * i_mutex of the parent directory.
2379  */
2380 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2381 {
2382 	struct dentry *actual;
2383 
2384 	BUG_ON(!d_unhashed(dentry));
2385 
2386 	if (!inode) {
2387 		actual = dentry;
2388 		__d_instantiate(dentry, NULL);
2389 		d_rehash(actual);
2390 		goto out_nolock;
2391 	}
2392 
2393 	spin_lock(&inode->i_lock);
2394 
2395 	if (S_ISDIR(inode->i_mode)) {
2396 		struct dentry *alias;
2397 
2398 		/* Does an aliased dentry already exist? */
2399 		alias = __d_find_alias(inode, 0);
2400 		if (alias) {
2401 			actual = alias;
2402 			write_seqlock(&rename_lock);
2403 
2404 			if (d_ancestor(alias, dentry)) {
2405 				/* Check for loops */
2406 				actual = ERR_PTR(-ELOOP);
2407 				spin_unlock(&inode->i_lock);
2408 			} else if (IS_ROOT(alias)) {
2409 				/* Is this an anonymous mountpoint that we
2410 				 * could splice into our tree? */
2411 				__d_materialise_dentry(dentry, alias);
2412 				write_sequnlock(&rename_lock);
2413 				__d_drop(alias);
2414 				goto found;
2415 			} else {
2416 				/* Nope, but we must(!) avoid directory
2417 				 * aliasing. This drops inode->i_lock */
2418 				actual = __d_unalias(inode, dentry, alias);
2419 			}
2420 			write_sequnlock(&rename_lock);
2421 			if (IS_ERR(actual)) {
2422 				if (PTR_ERR(actual) == -ELOOP)
2423 					pr_warn_ratelimited(
2424 						"VFS: Lookup of '%s' in %s %s"
2425 						" would have caused loop\n",
2426 						dentry->d_name.name,
2427 						inode->i_sb->s_type->name,
2428 						inode->i_sb->s_id);
2429 				dput(alias);
2430 			}
2431 			goto out_nolock;
2432 		}
2433 	}
2434 
2435 	/* Add a unique reference */
2436 	actual = __d_instantiate_unique(dentry, inode);
2437 	if (!actual)
2438 		actual = dentry;
2439 	else
2440 		BUG_ON(!d_unhashed(actual));
2441 
2442 	spin_lock(&actual->d_lock);
2443 found:
2444 	_d_rehash(actual);
2445 	spin_unlock(&actual->d_lock);
2446 	spin_unlock(&inode->i_lock);
2447 out_nolock:
2448 	if (actual == dentry) {
2449 		security_d_instantiate(dentry, inode);
2450 		return NULL;
2451 	}
2452 
2453 	iput(inode);
2454 	return actual;
2455 }
2456 EXPORT_SYMBOL_GPL(d_materialise_unique);
2457 
2458 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2459 {
2460 	*buflen -= namelen;
2461 	if (*buflen < 0)
2462 		return -ENAMETOOLONG;
2463 	*buffer -= namelen;
2464 	memcpy(*buffer, str, namelen);
2465 	return 0;
2466 }
2467 
2468 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2469 {
2470 	return prepend(buffer, buflen, name->name, name->len);
2471 }
2472 
2473 /**
2474  * prepend_path - Prepend path string to a buffer
2475  * @path: the dentry/vfsmount to report
2476  * @root: root vfsmnt/dentry
2477  * @buffer: pointer to the end of the buffer
2478  * @buflen: pointer to buffer length
2479  *
2480  * Caller holds the rename_lock.
2481  */
2482 static int prepend_path(const struct path *path,
2483 			const struct path *root,
2484 			char **buffer, int *buflen)
2485 {
2486 	struct dentry *dentry = path->dentry;
2487 	struct vfsmount *vfsmnt = path->mnt;
2488 	struct mount *mnt = real_mount(vfsmnt);
2489 	bool slash = false;
2490 	int error = 0;
2491 
2492 	br_read_lock(vfsmount_lock);
2493 	while (dentry != root->dentry || vfsmnt != root->mnt) {
2494 		struct dentry * parent;
2495 
2496 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2497 			/* Global root? */
2498 			if (!mnt_has_parent(mnt))
2499 				goto global_root;
2500 			dentry = mnt->mnt_mountpoint;
2501 			mnt = mnt->mnt_parent;
2502 			vfsmnt = &mnt->mnt;
2503 			continue;
2504 		}
2505 		parent = dentry->d_parent;
2506 		prefetch(parent);
2507 		spin_lock(&dentry->d_lock);
2508 		error = prepend_name(buffer, buflen, &dentry->d_name);
2509 		spin_unlock(&dentry->d_lock);
2510 		if (!error)
2511 			error = prepend(buffer, buflen, "/", 1);
2512 		if (error)
2513 			break;
2514 
2515 		slash = true;
2516 		dentry = parent;
2517 	}
2518 
2519 	if (!error && !slash)
2520 		error = prepend(buffer, buflen, "/", 1);
2521 
2522 out:
2523 	br_read_unlock(vfsmount_lock);
2524 	return error;
2525 
2526 global_root:
2527 	/*
2528 	 * Filesystems needing to implement special "root names"
2529 	 * should do so with ->d_dname()
2530 	 */
2531 	if (IS_ROOT(dentry) &&
2532 	    (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2533 		WARN(1, "Root dentry has weird name <%.*s>\n",
2534 		     (int) dentry->d_name.len, dentry->d_name.name);
2535 	}
2536 	if (!slash)
2537 		error = prepend(buffer, buflen, "/", 1);
2538 	if (!error)
2539 		error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
2540 	goto out;
2541 }
2542 
2543 /**
2544  * __d_path - return the path of a dentry
2545  * @path: the dentry/vfsmount to report
2546  * @root: root vfsmnt/dentry
2547  * @buf: buffer to return value in
2548  * @buflen: buffer length
2549  *
2550  * Convert a dentry into an ASCII path name.
2551  *
2552  * Returns a pointer into the buffer or an error code if the
2553  * path was too long.
2554  *
2555  * "buflen" should be positive.
2556  *
2557  * If the path is not reachable from the supplied root, return %NULL.
2558  */
2559 char *__d_path(const struct path *path,
2560 	       const struct path *root,
2561 	       char *buf, int buflen)
2562 {
2563 	char *res = buf + buflen;
2564 	int error;
2565 
2566 	prepend(&res, &buflen, "\0", 1);
2567 	write_seqlock(&rename_lock);
2568 	error = prepend_path(path, root, &res, &buflen);
2569 	write_sequnlock(&rename_lock);
2570 
2571 	if (error < 0)
2572 		return ERR_PTR(error);
2573 	if (error > 0)
2574 		return NULL;
2575 	return res;
2576 }
2577 
2578 char *d_absolute_path(const struct path *path,
2579 	       char *buf, int buflen)
2580 {
2581 	struct path root = {};
2582 	char *res = buf + buflen;
2583 	int error;
2584 
2585 	prepend(&res, &buflen, "\0", 1);
2586 	write_seqlock(&rename_lock);
2587 	error = prepend_path(path, &root, &res, &buflen);
2588 	write_sequnlock(&rename_lock);
2589 
2590 	if (error > 1)
2591 		error = -EINVAL;
2592 	if (error < 0)
2593 		return ERR_PTR(error);
2594 	return res;
2595 }
2596 
2597 /*
2598  * same as __d_path but appends "(deleted)" for unlinked files.
2599  */
2600 static int path_with_deleted(const struct path *path,
2601 			     const struct path *root,
2602 			     char **buf, int *buflen)
2603 {
2604 	prepend(buf, buflen, "\0", 1);
2605 	if (d_unlinked(path->dentry)) {
2606 		int error = prepend(buf, buflen, " (deleted)", 10);
2607 		if (error)
2608 			return error;
2609 	}
2610 
2611 	return prepend_path(path, root, buf, buflen);
2612 }
2613 
2614 static int prepend_unreachable(char **buffer, int *buflen)
2615 {
2616 	return prepend(buffer, buflen, "(unreachable)", 13);
2617 }
2618 
2619 /**
2620  * d_path - return the path of a dentry
2621  * @path: path to report
2622  * @buf: buffer to return value in
2623  * @buflen: buffer length
2624  *
2625  * Convert a dentry into an ASCII path name. If the entry has been deleted
2626  * the string " (deleted)" is appended. Note that this is ambiguous.
2627  *
2628  * Returns a pointer into the buffer or an error code if the path was
2629  * too long. Note: Callers should use the returned pointer, not the passed
2630  * in buffer, to use the name! The implementation often starts at an offset
2631  * into the buffer, and may leave 0 bytes at the start.
2632  *
2633  * "buflen" should be positive.
2634  */
2635 char *d_path(const struct path *path, char *buf, int buflen)
2636 {
2637 	char *res = buf + buflen;
2638 	struct path root;
2639 	int error;
2640 
2641 	/*
2642 	 * We have various synthetic filesystems that never get mounted.  On
2643 	 * these filesystems dentries are never used for lookup purposes, and
2644 	 * thus don't need to be hashed.  They also don't need a name until a
2645 	 * user wants to identify the object in /proc/pid/fd/.  The little hack
2646 	 * below allows us to generate a name for these objects on demand:
2647 	 */
2648 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2649 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2650 
2651 	get_fs_root(current->fs, &root);
2652 	write_seqlock(&rename_lock);
2653 	error = path_with_deleted(path, &root, &res, &buflen);
2654 	if (error < 0)
2655 		res = ERR_PTR(error);
2656 	write_sequnlock(&rename_lock);
2657 	path_put(&root);
2658 	return res;
2659 }
2660 EXPORT_SYMBOL(d_path);
2661 
2662 /**
2663  * d_path_with_unreachable - return the path of a dentry
2664  * @path: path to report
2665  * @buf: buffer to return value in
2666  * @buflen: buffer length
2667  *
2668  * The difference from d_path() is that this prepends "(unreachable)"
2669  * to paths which are unreachable from the current process' root.
2670  */
2671 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2672 {
2673 	char *res = buf + buflen;
2674 	struct path root;
2675 	int error;
2676 
2677 	if (path->dentry->d_op && path->dentry->d_op->d_dname)
2678 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2679 
2680 	get_fs_root(current->fs, &root);
2681 	write_seqlock(&rename_lock);
2682 	error = path_with_deleted(path, &root, &res, &buflen);
2683 	if (error > 0)
2684 		error = prepend_unreachable(&res, &buflen);
2685 	write_sequnlock(&rename_lock);
2686 	path_put(&root);
2687 	if (error)
2688 		res =  ERR_PTR(error);
2689 
2690 	return res;
2691 }
2692 
2693 /*
2694  * Helper function for dentry_operations.d_dname() members
2695  */
2696 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2697 			const char *fmt, ...)
2698 {
2699 	va_list args;
2700 	char temp[64];
2701 	int sz;
2702 
2703 	va_start(args, fmt);
2704 	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2705 	va_end(args);
2706 
2707 	if (sz > sizeof(temp) || sz > buflen)
2708 		return ERR_PTR(-ENAMETOOLONG);
2709 
2710 	buffer += buflen - sz;
2711 	return memcpy(buffer, temp, sz);
2712 }
2713 
2714 /*
2715  * Write full pathname from the root of the filesystem into the buffer.
2716  */
2717 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2718 {
2719 	char *end = buf + buflen;
2720 	char *retval;
2721 
2722 	prepend(&end, &buflen, "\0", 1);
2723 	if (buflen < 1)
2724 		goto Elong;
2725 	/* Get '/' right */
2726 	retval = end-1;
2727 	*retval = '/';
2728 
2729 	while (!IS_ROOT(dentry)) {
2730 		struct dentry *parent = dentry->d_parent;
2731 		int error;
2732 
2733 		prefetch(parent);
2734 		spin_lock(&dentry->d_lock);
2735 		error = prepend_name(&end, &buflen, &dentry->d_name);
2736 		spin_unlock(&dentry->d_lock);
2737 		if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2738 			goto Elong;
2739 
2740 		retval = end;
2741 		dentry = parent;
2742 	}
2743 	return retval;
2744 Elong:
2745 	return ERR_PTR(-ENAMETOOLONG);
2746 }
2747 
2748 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2749 {
2750 	char *retval;
2751 
2752 	write_seqlock(&rename_lock);
2753 	retval = __dentry_path(dentry, buf, buflen);
2754 	write_sequnlock(&rename_lock);
2755 
2756 	return retval;
2757 }
2758 EXPORT_SYMBOL(dentry_path_raw);
2759 
2760 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2761 {
2762 	char *p = NULL;
2763 	char *retval;
2764 
2765 	write_seqlock(&rename_lock);
2766 	if (d_unlinked(dentry)) {
2767 		p = buf + buflen;
2768 		if (prepend(&p, &buflen, "//deleted", 10) != 0)
2769 			goto Elong;
2770 		buflen++;
2771 	}
2772 	retval = __dentry_path(dentry, buf, buflen);
2773 	write_sequnlock(&rename_lock);
2774 	if (!IS_ERR(retval) && p)
2775 		*p = '/';	/* restore '/' overriden with '\0' */
2776 	return retval;
2777 Elong:
2778 	return ERR_PTR(-ENAMETOOLONG);
2779 }
2780 
2781 /*
2782  * NOTE! The user-level library version returns a
2783  * character pointer. The kernel system call just
2784  * returns the length of the buffer filled (which
2785  * includes the ending '\0' character), or a negative
2786  * error value. So libc would do something like
2787  *
2788  *	char *getcwd(char * buf, size_t size)
2789  *	{
2790  *		int retval;
2791  *
2792  *		retval = sys_getcwd(buf, size);
2793  *		if (retval >= 0)
2794  *			return buf;
2795  *		errno = -retval;
2796  *		return NULL;
2797  *	}
2798  */
2799 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2800 {
2801 	int error;
2802 	struct path pwd, root;
2803 	char *page = (char *) __get_free_page(GFP_USER);
2804 
2805 	if (!page)
2806 		return -ENOMEM;
2807 
2808 	get_fs_root_and_pwd(current->fs, &root, &pwd);
2809 
2810 	error = -ENOENT;
2811 	write_seqlock(&rename_lock);
2812 	if (!d_unlinked(pwd.dentry)) {
2813 		unsigned long len;
2814 		char *cwd = page + PAGE_SIZE;
2815 		int buflen = PAGE_SIZE;
2816 
2817 		prepend(&cwd, &buflen, "\0", 1);
2818 		error = prepend_path(&pwd, &root, &cwd, &buflen);
2819 		write_sequnlock(&rename_lock);
2820 
2821 		if (error < 0)
2822 			goto out;
2823 
2824 		/* Unreachable from current root */
2825 		if (error > 0) {
2826 			error = prepend_unreachable(&cwd, &buflen);
2827 			if (error)
2828 				goto out;
2829 		}
2830 
2831 		error = -ERANGE;
2832 		len = PAGE_SIZE + page - cwd;
2833 		if (len <= size) {
2834 			error = len;
2835 			if (copy_to_user(buf, cwd, len))
2836 				error = -EFAULT;
2837 		}
2838 	} else {
2839 		write_sequnlock(&rename_lock);
2840 	}
2841 
2842 out:
2843 	path_put(&pwd);
2844 	path_put(&root);
2845 	free_page((unsigned long) page);
2846 	return error;
2847 }
2848 
2849 /*
2850  * Test whether new_dentry is a subdirectory of old_dentry.
2851  *
2852  * Trivially implemented using the dcache structure
2853  */
2854 
2855 /**
2856  * is_subdir - is new dentry a subdirectory of old_dentry
2857  * @new_dentry: new dentry
2858  * @old_dentry: old dentry
2859  *
2860  * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2861  * Returns 0 otherwise.
2862  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2863  */
2864 
2865 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2866 {
2867 	int result;
2868 	unsigned seq;
2869 
2870 	if (new_dentry == old_dentry)
2871 		return 1;
2872 
2873 	do {
2874 		/* for restarting inner loop in case of seq retry */
2875 		seq = read_seqbegin(&rename_lock);
2876 		/*
2877 		 * Need rcu_readlock to protect against the d_parent trashing
2878 		 * due to d_move
2879 		 */
2880 		rcu_read_lock();
2881 		if (d_ancestor(old_dentry, new_dentry))
2882 			result = 1;
2883 		else
2884 			result = 0;
2885 		rcu_read_unlock();
2886 	} while (read_seqretry(&rename_lock, seq));
2887 
2888 	return result;
2889 }
2890 
2891 void d_genocide(struct dentry *root)
2892 {
2893 	struct dentry *this_parent;
2894 	struct list_head *next;
2895 	unsigned seq;
2896 	int locked = 0;
2897 
2898 	seq = read_seqbegin(&rename_lock);
2899 again:
2900 	this_parent = root;
2901 	spin_lock(&this_parent->d_lock);
2902 repeat:
2903 	next = this_parent->d_subdirs.next;
2904 resume:
2905 	while (next != &this_parent->d_subdirs) {
2906 		struct list_head *tmp = next;
2907 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2908 		next = tmp->next;
2909 
2910 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2911 		if (d_unhashed(dentry) || !dentry->d_inode) {
2912 			spin_unlock(&dentry->d_lock);
2913 			continue;
2914 		}
2915 		if (!list_empty(&dentry->d_subdirs)) {
2916 			spin_unlock(&this_parent->d_lock);
2917 			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2918 			this_parent = dentry;
2919 			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2920 			goto repeat;
2921 		}
2922 		if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2923 			dentry->d_flags |= DCACHE_GENOCIDE;
2924 			dentry->d_count--;
2925 		}
2926 		spin_unlock(&dentry->d_lock);
2927 	}
2928 	if (this_parent != root) {
2929 		struct dentry *child = this_parent;
2930 		if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2931 			this_parent->d_flags |= DCACHE_GENOCIDE;
2932 			this_parent->d_count--;
2933 		}
2934 		this_parent = try_to_ascend(this_parent, locked, seq);
2935 		if (!this_parent)
2936 			goto rename_retry;
2937 		next = child->d_u.d_child.next;
2938 		goto resume;
2939 	}
2940 	spin_unlock(&this_parent->d_lock);
2941 	if (!locked && read_seqretry(&rename_lock, seq))
2942 		goto rename_retry;
2943 	if (locked)
2944 		write_sequnlock(&rename_lock);
2945 	return;
2946 
2947 rename_retry:
2948 	locked = 1;
2949 	write_seqlock(&rename_lock);
2950 	goto again;
2951 }
2952 
2953 /**
2954  * find_inode_number - check for dentry with name
2955  * @dir: directory to check
2956  * @name: Name to find.
2957  *
2958  * Check whether a dentry already exists for the given name,
2959  * and return the inode number if it has an inode. Otherwise
2960  * 0 is returned.
2961  *
2962  * This routine is used to post-process directory listings for
2963  * filesystems using synthetic inode numbers, and is necessary
2964  * to keep getcwd() working.
2965  */
2966 
2967 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2968 {
2969 	struct dentry * dentry;
2970 	ino_t ino = 0;
2971 
2972 	dentry = d_hash_and_lookup(dir, name);
2973 	if (dentry) {
2974 		if (dentry->d_inode)
2975 			ino = dentry->d_inode->i_ino;
2976 		dput(dentry);
2977 	}
2978 	return ino;
2979 }
2980 EXPORT_SYMBOL(find_inode_number);
2981 
2982 static __initdata unsigned long dhash_entries;
2983 static int __init set_dhash_entries(char *str)
2984 {
2985 	if (!str)
2986 		return 0;
2987 	dhash_entries = simple_strtoul(str, &str, 0);
2988 	return 1;
2989 }
2990 __setup("dhash_entries=", set_dhash_entries);
2991 
2992 static void __init dcache_init_early(void)
2993 {
2994 	unsigned int loop;
2995 
2996 	/* If hashes are distributed across NUMA nodes, defer
2997 	 * hash allocation until vmalloc space is available.
2998 	 */
2999 	if (hashdist)
3000 		return;
3001 
3002 	dentry_hashtable =
3003 		alloc_large_system_hash("Dentry cache",
3004 					sizeof(struct hlist_bl_head),
3005 					dhash_entries,
3006 					13,
3007 					HASH_EARLY,
3008 					&d_hash_shift,
3009 					&d_hash_mask,
3010 					0);
3011 
3012 	for (loop = 0; loop < (1U << d_hash_shift); loop++)
3013 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3014 }
3015 
3016 static void __init dcache_init(void)
3017 {
3018 	unsigned int loop;
3019 
3020 	/*
3021 	 * A constructor could be added for stable state like the lists,
3022 	 * but it is probably not worth it because of the cache nature
3023 	 * of the dcache.
3024 	 */
3025 	dentry_cache = KMEM_CACHE(dentry,
3026 		SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3027 
3028 	/* Hash may have been set up in dcache_init_early */
3029 	if (!hashdist)
3030 		return;
3031 
3032 	dentry_hashtable =
3033 		alloc_large_system_hash("Dentry cache",
3034 					sizeof(struct hlist_bl_head),
3035 					dhash_entries,
3036 					13,
3037 					0,
3038 					&d_hash_shift,
3039 					&d_hash_mask,
3040 					0);
3041 
3042 	for (loop = 0; loop < (1U << d_hash_shift); loop++)
3043 		INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3044 }
3045 
3046 /* SLAB cache for __getname() consumers */
3047 struct kmem_cache *names_cachep __read_mostly;
3048 EXPORT_SYMBOL(names_cachep);
3049 
3050 EXPORT_SYMBOL(d_genocide);
3051 
3052 void __init vfs_caches_init_early(void)
3053 {
3054 	dcache_init_early();
3055 	inode_init_early();
3056 }
3057 
3058 void __init vfs_caches_init(unsigned long mempages)
3059 {
3060 	unsigned long reserve;
3061 
3062 	/* Base hash sizes on available memory, with a reserve equal to
3063            150% of current kernel size */
3064 
3065 	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3066 	mempages -= reserve;
3067 
3068 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3069 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3070 
3071 	dcache_init();
3072 	inode_init();
3073 	files_init(mempages);
3074 	mnt_init();
3075 	bdev_cache_init();
3076 	chrdev_init();
3077 }
3078