xref: /linux/fs/dcache.c (revision 13abf8130139c2ccd4962a7e5a8902be5e6cb5a7)
1 /*
2  * fs/dcache.c
3  *
4  * Complete reimplementation
5  * (C) 1997 Thomas Schoebel-Theuer,
6  * with heavy changes by Linus Torvalds
7  */
8 
9 /*
10  * Notes on the allocation strategy:
11  *
12  * The dcache is a master of the icache - whenever a dcache entry
13  * exists, the inode will always exist. "iput()" is done either when
14  * the dcache entry is deleted or garbage collected.
15  */
16 
17 #include <linux/config.h>
18 #include <linux/syscalls.h>
19 #include <linux/string.h>
20 #include <linux/mm.h>
21 #include <linux/fs.h>
22 #include <linux/fsnotify.h>
23 #include <linux/slab.h>
24 #include <linux/init.h>
25 #include <linux/smp_lock.h>
26 #include <linux/hash.h>
27 #include <linux/cache.h>
28 #include <linux/module.h>
29 #include <linux/mount.h>
30 #include <linux/file.h>
31 #include <asm/uaccess.h>
32 #include <linux/security.h>
33 #include <linux/seqlock.h>
34 #include <linux/swap.h>
35 #include <linux/bootmem.h>
36 
37 /* #define DCACHE_DEBUG 1 */
38 
39 int sysctl_vfs_cache_pressure = 100;
40 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
41 
42  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
43 static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
44 
45 EXPORT_SYMBOL(dcache_lock);
46 
47 static kmem_cache_t *dentry_cache;
48 
49 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
50 
51 /*
52  * This is the single most critical data structure when it comes
53  * to the dcache: the hashtable for lookups. Somebody should try
54  * to make this good - I've just made it work.
55  *
56  * This hash-function tries to avoid losing too many bits of hash
57  * information, yet avoid using a prime hash-size or similar.
58  */
59 #define D_HASHBITS     d_hash_shift
60 #define D_HASHMASK     d_hash_mask
61 
62 static unsigned int d_hash_mask;
63 static unsigned int d_hash_shift;
64 static struct hlist_head *dentry_hashtable;
65 static LIST_HEAD(dentry_unused);
66 
67 /* Statistics gathering. */
68 struct dentry_stat_t dentry_stat = {
69 	.age_limit = 45,
70 };
71 
72 static void d_callback(struct rcu_head *head)
73 {
74 	struct dentry * dentry = container_of(head, struct dentry, d_rcu);
75 
76 	if (dname_external(dentry))
77 		kfree(dentry->d_name.name);
78 	kmem_cache_free(dentry_cache, dentry);
79 }
80 
81 /*
82  * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
83  * inside dcache_lock.
84  */
85 static void d_free(struct dentry *dentry)
86 {
87 	if (dentry->d_op && dentry->d_op->d_release)
88 		dentry->d_op->d_release(dentry);
89  	call_rcu(&dentry->d_rcu, d_callback);
90 }
91 
92 /*
93  * Release the dentry's inode, using the filesystem
94  * d_iput() operation if defined.
95  * Called with dcache_lock and per dentry lock held, drops both.
96  */
97 static inline void dentry_iput(struct dentry * dentry)
98 {
99 	struct inode *inode = dentry->d_inode;
100 	if (inode) {
101 		dentry->d_inode = NULL;
102 		list_del_init(&dentry->d_alias);
103 		spin_unlock(&dentry->d_lock);
104 		spin_unlock(&dcache_lock);
105 		fsnotify_inoderemove(inode);
106 		if (dentry->d_op && dentry->d_op->d_iput)
107 			dentry->d_op->d_iput(dentry, inode);
108 		else
109 			iput(inode);
110 	} else {
111 		spin_unlock(&dentry->d_lock);
112 		spin_unlock(&dcache_lock);
113 	}
114 }
115 
116 /*
117  * This is dput
118  *
119  * This is complicated by the fact that we do not want to put
120  * dentries that are no longer on any hash chain on the unused
121  * list: we'd much rather just get rid of them immediately.
122  *
123  * However, that implies that we have to traverse the dentry
124  * tree upwards to the parents which might _also_ now be
125  * scheduled for deletion (it may have been only waiting for
126  * its last child to go away).
127  *
128  * This tail recursion is done by hand as we don't want to depend
129  * on the compiler to always get this right (gcc generally doesn't).
130  * Real recursion would eat up our stack space.
131  */
132 
133 /*
134  * dput - release a dentry
135  * @dentry: dentry to release
136  *
137  * Release a dentry. This will drop the usage count and if appropriate
138  * call the dentry unlink method as well as removing it from the queues and
139  * releasing its resources. If the parent dentries were scheduled for release
140  * they too may now get deleted.
141  *
142  * no dcache lock, please.
143  */
144 
145 void dput(struct dentry *dentry)
146 {
147 	if (!dentry)
148 		return;
149 
150 repeat:
151 	if (atomic_read(&dentry->d_count) == 1)
152 		might_sleep();
153 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
154 		return;
155 
156 	spin_lock(&dentry->d_lock);
157 	if (atomic_read(&dentry->d_count)) {
158 		spin_unlock(&dentry->d_lock);
159 		spin_unlock(&dcache_lock);
160 		return;
161 	}
162 
163 	/*
164 	 * AV: ->d_delete() is _NOT_ allowed to block now.
165 	 */
166 	if (dentry->d_op && dentry->d_op->d_delete) {
167 		if (dentry->d_op->d_delete(dentry))
168 			goto unhash_it;
169 	}
170 	/* Unreachable? Get rid of it */
171  	if (d_unhashed(dentry))
172 		goto kill_it;
173   	if (list_empty(&dentry->d_lru)) {
174   		dentry->d_flags |= DCACHE_REFERENCED;
175   		list_add(&dentry->d_lru, &dentry_unused);
176   		dentry_stat.nr_unused++;
177   	}
178  	spin_unlock(&dentry->d_lock);
179 	spin_unlock(&dcache_lock);
180 	return;
181 
182 unhash_it:
183 	__d_drop(dentry);
184 
185 kill_it: {
186 		struct dentry *parent;
187 
188 		/* If dentry was on d_lru list
189 		 * delete it from there
190 		 */
191   		if (!list_empty(&dentry->d_lru)) {
192   			list_del(&dentry->d_lru);
193   			dentry_stat.nr_unused--;
194   		}
195   		list_del(&dentry->d_child);
196 		dentry_stat.nr_dentry--;	/* For d_free, below */
197 		/*drops the locks, at that point nobody can reach this dentry */
198 		dentry_iput(dentry);
199 		parent = dentry->d_parent;
200 		d_free(dentry);
201 		if (dentry == parent)
202 			return;
203 		dentry = parent;
204 		goto repeat;
205 	}
206 }
207 
208 /**
209  * d_invalidate - invalidate a dentry
210  * @dentry: dentry to invalidate
211  *
212  * Try to invalidate the dentry if it turns out to be
213  * possible. If there are other dentries that can be
214  * reached through this one we can't delete it and we
215  * return -EBUSY. On success we return 0.
216  *
217  * no dcache lock.
218  */
219 
220 int d_invalidate(struct dentry * dentry)
221 {
222 	/*
223 	 * If it's already been dropped, return OK.
224 	 */
225 	spin_lock(&dcache_lock);
226 	if (d_unhashed(dentry)) {
227 		spin_unlock(&dcache_lock);
228 		return 0;
229 	}
230 	/*
231 	 * Check whether to do a partial shrink_dcache
232 	 * to get rid of unused child entries.
233 	 */
234 	if (!list_empty(&dentry->d_subdirs)) {
235 		spin_unlock(&dcache_lock);
236 		shrink_dcache_parent(dentry);
237 		spin_lock(&dcache_lock);
238 	}
239 
240 	/*
241 	 * Somebody else still using it?
242 	 *
243 	 * If it's a directory, we can't drop it
244 	 * for fear of somebody re-populating it
245 	 * with children (even though dropping it
246 	 * would make it unreachable from the root,
247 	 * we might still populate it if it was a
248 	 * working directory or similar).
249 	 */
250 	spin_lock(&dentry->d_lock);
251 	if (atomic_read(&dentry->d_count) > 1) {
252 		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
253 			spin_unlock(&dentry->d_lock);
254 			spin_unlock(&dcache_lock);
255 			return -EBUSY;
256 		}
257 	}
258 
259 	__d_drop(dentry);
260 	spin_unlock(&dentry->d_lock);
261 	spin_unlock(&dcache_lock);
262 	return 0;
263 }
264 
265 /* This should be called _only_ with dcache_lock held */
266 
267 static inline struct dentry * __dget_locked(struct dentry *dentry)
268 {
269 	atomic_inc(&dentry->d_count);
270 	if (!list_empty(&dentry->d_lru)) {
271 		dentry_stat.nr_unused--;
272 		list_del_init(&dentry->d_lru);
273 	}
274 	return dentry;
275 }
276 
277 struct dentry * dget_locked(struct dentry *dentry)
278 {
279 	return __dget_locked(dentry);
280 }
281 
282 /**
283  * d_find_alias - grab a hashed alias of inode
284  * @inode: inode in question
285  * @want_discon:  flag, used by d_splice_alias, to request
286  *          that only a DISCONNECTED alias be returned.
287  *
288  * If inode has a hashed alias, or is a directory and has any alias,
289  * acquire the reference to alias and return it. Otherwise return NULL.
290  * Notice that if inode is a directory there can be only one alias and
291  * it can be unhashed only if it has no children, or if it is the root
292  * of a filesystem.
293  *
294  * If the inode has a DCACHE_DISCONNECTED alias, then prefer
295  * any other hashed alias over that one unless @want_discon is set,
296  * in which case only return a DCACHE_DISCONNECTED alias.
297  */
298 
299 static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
300 {
301 	struct list_head *head, *next, *tmp;
302 	struct dentry *alias, *discon_alias=NULL;
303 
304 	head = &inode->i_dentry;
305 	next = inode->i_dentry.next;
306 	while (next != head) {
307 		tmp = next;
308 		next = tmp->next;
309 		prefetch(next);
310 		alias = list_entry(tmp, struct dentry, d_alias);
311  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
312 			if (alias->d_flags & DCACHE_DISCONNECTED)
313 				discon_alias = alias;
314 			else if (!want_discon) {
315 				__dget_locked(alias);
316 				return alias;
317 			}
318 		}
319 	}
320 	if (discon_alias)
321 		__dget_locked(discon_alias);
322 	return discon_alias;
323 }
324 
325 struct dentry * d_find_alias(struct inode *inode)
326 {
327 	struct dentry *de;
328 	spin_lock(&dcache_lock);
329 	de = __d_find_alias(inode, 0);
330 	spin_unlock(&dcache_lock);
331 	return de;
332 }
333 
334 /*
335  *	Try to kill dentries associated with this inode.
336  * WARNING: you must own a reference to inode.
337  */
338 void d_prune_aliases(struct inode *inode)
339 {
340 	struct list_head *tmp, *head = &inode->i_dentry;
341 restart:
342 	spin_lock(&dcache_lock);
343 	tmp = head;
344 	while ((tmp = tmp->next) != head) {
345 		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
346 		spin_lock(&dentry->d_lock);
347 		if (!atomic_read(&dentry->d_count)) {
348 			__dget_locked(dentry);
349 			__d_drop(dentry);
350 			spin_unlock(&dentry->d_lock);
351 			spin_unlock(&dcache_lock);
352 			dput(dentry);
353 			goto restart;
354 		}
355 		spin_unlock(&dentry->d_lock);
356 	}
357 	spin_unlock(&dcache_lock);
358 }
359 
360 /*
361  * Throw away a dentry - free the inode, dput the parent.
362  * This requires that the LRU list has already been
363  * removed.
364  * Called with dcache_lock, drops it and then regains.
365  */
366 static inline void prune_one_dentry(struct dentry * dentry)
367 {
368 	struct dentry * parent;
369 
370 	__d_drop(dentry);
371 	list_del(&dentry->d_child);
372 	dentry_stat.nr_dentry--;	/* For d_free, below */
373 	dentry_iput(dentry);
374 	parent = dentry->d_parent;
375 	d_free(dentry);
376 	if (parent != dentry)
377 		dput(parent);
378 	spin_lock(&dcache_lock);
379 }
380 
381 /**
382  * prune_dcache - shrink the dcache
383  * @count: number of entries to try and free
384  *
385  * Shrink the dcache. This is done when we need
386  * more memory, or simply when we need to unmount
387  * something (at which point we need to unuse
388  * all dentries).
389  *
390  * This function may fail to free any resources if
391  * all the dentries are in use.
392  */
393 
394 static void prune_dcache(int count)
395 {
396 	spin_lock(&dcache_lock);
397 	for (; count ; count--) {
398 		struct dentry *dentry;
399 		struct list_head *tmp;
400 
401 		cond_resched_lock(&dcache_lock);
402 
403 		tmp = dentry_unused.prev;
404 		if (tmp == &dentry_unused)
405 			break;
406 		list_del_init(tmp);
407 		prefetch(dentry_unused.prev);
408  		dentry_stat.nr_unused--;
409 		dentry = list_entry(tmp, struct dentry, d_lru);
410 
411  		spin_lock(&dentry->d_lock);
412 		/*
413 		 * We found an inuse dentry which was not removed from
414 		 * dentry_unused because of laziness during lookup.  Do not free
415 		 * it - just keep it off the dentry_unused list.
416 		 */
417  		if (atomic_read(&dentry->d_count)) {
418  			spin_unlock(&dentry->d_lock);
419 			continue;
420 		}
421 		/* If the dentry was recently referenced, don't free it. */
422 		if (dentry->d_flags & DCACHE_REFERENCED) {
423 			dentry->d_flags &= ~DCACHE_REFERENCED;
424  			list_add(&dentry->d_lru, &dentry_unused);
425  			dentry_stat.nr_unused++;
426  			spin_unlock(&dentry->d_lock);
427 			continue;
428 		}
429 		prune_one_dentry(dentry);
430 	}
431 	spin_unlock(&dcache_lock);
432 }
433 
434 /*
435  * Shrink the dcache for the specified super block.
436  * This allows us to unmount a device without disturbing
437  * the dcache for the other devices.
438  *
439  * This implementation makes just two traversals of the
440  * unused list.  On the first pass we move the selected
441  * dentries to the most recent end, and on the second
442  * pass we free them.  The second pass must restart after
443  * each dput(), but since the target dentries are all at
444  * the end, it's really just a single traversal.
445  */
446 
447 /**
448  * shrink_dcache_sb - shrink dcache for a superblock
449  * @sb: superblock
450  *
451  * Shrink the dcache for the specified super block. This
452  * is used to free the dcache before unmounting a file
453  * system
454  */
455 
456 void shrink_dcache_sb(struct super_block * sb)
457 {
458 	struct list_head *tmp, *next;
459 	struct dentry *dentry;
460 
461 	/*
462 	 * Pass one ... move the dentries for the specified
463 	 * superblock to the most recent end of the unused list.
464 	 */
465 	spin_lock(&dcache_lock);
466 	next = dentry_unused.next;
467 	while (next != &dentry_unused) {
468 		tmp = next;
469 		next = tmp->next;
470 		dentry = list_entry(tmp, struct dentry, d_lru);
471 		if (dentry->d_sb != sb)
472 			continue;
473 		list_del(tmp);
474 		list_add(tmp, &dentry_unused);
475 	}
476 
477 	/*
478 	 * Pass two ... free the dentries for this superblock.
479 	 */
480 repeat:
481 	next = dentry_unused.next;
482 	while (next != &dentry_unused) {
483 		tmp = next;
484 		next = tmp->next;
485 		dentry = list_entry(tmp, struct dentry, d_lru);
486 		if (dentry->d_sb != sb)
487 			continue;
488 		dentry_stat.nr_unused--;
489 		list_del_init(tmp);
490 		spin_lock(&dentry->d_lock);
491 		if (atomic_read(&dentry->d_count)) {
492 			spin_unlock(&dentry->d_lock);
493 			continue;
494 		}
495 		prune_one_dentry(dentry);
496 		goto repeat;
497 	}
498 	spin_unlock(&dcache_lock);
499 }
500 
501 /*
502  * Search for at least 1 mount point in the dentry's subdirs.
503  * We descend to the next level whenever the d_subdirs
504  * list is non-empty and continue searching.
505  */
506 
507 /**
508  * have_submounts - check for mounts over a dentry
509  * @parent: dentry to check.
510  *
511  * Return true if the parent or its subdirectories contain
512  * a mount point
513  */
514 
515 int have_submounts(struct dentry *parent)
516 {
517 	struct dentry *this_parent = parent;
518 	struct list_head *next;
519 
520 	spin_lock(&dcache_lock);
521 	if (d_mountpoint(parent))
522 		goto positive;
523 repeat:
524 	next = this_parent->d_subdirs.next;
525 resume:
526 	while (next != &this_parent->d_subdirs) {
527 		struct list_head *tmp = next;
528 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
529 		next = tmp->next;
530 		/* Have we found a mount point ? */
531 		if (d_mountpoint(dentry))
532 			goto positive;
533 		if (!list_empty(&dentry->d_subdirs)) {
534 			this_parent = dentry;
535 			goto repeat;
536 		}
537 	}
538 	/*
539 	 * All done at this level ... ascend and resume the search.
540 	 */
541 	if (this_parent != parent) {
542 		next = this_parent->d_child.next;
543 		this_parent = this_parent->d_parent;
544 		goto resume;
545 	}
546 	spin_unlock(&dcache_lock);
547 	return 0; /* No mount points found in tree */
548 positive:
549 	spin_unlock(&dcache_lock);
550 	return 1;
551 }
552 
553 /*
554  * Search the dentry child list for the specified parent,
555  * and move any unused dentries to the end of the unused
556  * list for prune_dcache(). We descend to the next level
557  * whenever the d_subdirs list is non-empty and continue
558  * searching.
559  *
560  * It returns zero iff there are no unused children,
561  * otherwise  it returns the number of children moved to
562  * the end of the unused list. This may not be the total
563  * number of unused children, because select_parent can
564  * drop the lock and return early due to latency
565  * constraints.
566  */
567 static int select_parent(struct dentry * parent)
568 {
569 	struct dentry *this_parent = parent;
570 	struct list_head *next;
571 	int found = 0;
572 
573 	spin_lock(&dcache_lock);
574 repeat:
575 	next = this_parent->d_subdirs.next;
576 resume:
577 	while (next != &this_parent->d_subdirs) {
578 		struct list_head *tmp = next;
579 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
580 		next = tmp->next;
581 
582 		if (!list_empty(&dentry->d_lru)) {
583 			dentry_stat.nr_unused--;
584 			list_del_init(&dentry->d_lru);
585 		}
586 		/*
587 		 * move only zero ref count dentries to the end
588 		 * of the unused list for prune_dcache
589 		 */
590 		if (!atomic_read(&dentry->d_count)) {
591 			list_add(&dentry->d_lru, dentry_unused.prev);
592 			dentry_stat.nr_unused++;
593 			found++;
594 		}
595 
596 		/*
597 		 * We can return to the caller if we have found some (this
598 		 * ensures forward progress). We'll be coming back to find
599 		 * the rest.
600 		 */
601 		if (found && need_resched())
602 			goto out;
603 
604 		/*
605 		 * Descend a level if the d_subdirs list is non-empty.
606 		 */
607 		if (!list_empty(&dentry->d_subdirs)) {
608 			this_parent = dentry;
609 #ifdef DCACHE_DEBUG
610 printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
611 dentry->d_parent->d_name.name, dentry->d_name.name, found);
612 #endif
613 			goto repeat;
614 		}
615 	}
616 	/*
617 	 * All done at this level ... ascend and resume the search.
618 	 */
619 	if (this_parent != parent) {
620 		next = this_parent->d_child.next;
621 		this_parent = this_parent->d_parent;
622 #ifdef DCACHE_DEBUG
623 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
624 this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
625 #endif
626 		goto resume;
627 	}
628 out:
629 	spin_unlock(&dcache_lock);
630 	return found;
631 }
632 
633 /**
634  * shrink_dcache_parent - prune dcache
635  * @parent: parent of entries to prune
636  *
637  * Prune the dcache to remove unused children of the parent dentry.
638  */
639 
640 void shrink_dcache_parent(struct dentry * parent)
641 {
642 	int found;
643 
644 	while ((found = select_parent(parent)) != 0)
645 		prune_dcache(found);
646 }
647 
648 /**
649  * shrink_dcache_anon - further prune the cache
650  * @head: head of d_hash list of dentries to prune
651  *
652  * Prune the dentries that are anonymous
653  *
654  * parsing d_hash list does not hlist_for_each_rcu() as it
655  * done under dcache_lock.
656  *
657  */
658 void shrink_dcache_anon(struct hlist_head *head)
659 {
660 	struct hlist_node *lp;
661 	int found;
662 	do {
663 		found = 0;
664 		spin_lock(&dcache_lock);
665 		hlist_for_each(lp, head) {
666 			struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
667 			if (!list_empty(&this->d_lru)) {
668 				dentry_stat.nr_unused--;
669 				list_del_init(&this->d_lru);
670 			}
671 
672 			/*
673 			 * move only zero ref count dentries to the end
674 			 * of the unused list for prune_dcache
675 			 */
676 			if (!atomic_read(&this->d_count)) {
677 				list_add_tail(&this->d_lru, &dentry_unused);
678 				dentry_stat.nr_unused++;
679 				found++;
680 			}
681 		}
682 		spin_unlock(&dcache_lock);
683 		prune_dcache(found);
684 	} while(found);
685 }
686 
687 /*
688  * Scan `nr' dentries and return the number which remain.
689  *
690  * We need to avoid reentering the filesystem if the caller is performing a
691  * GFP_NOFS allocation attempt.  One example deadlock is:
692  *
693  * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
694  * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
695  * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
696  *
697  * In this case we return -1 to tell the caller that we baled.
698  */
699 static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
700 {
701 	if (nr) {
702 		if (!(gfp_mask & __GFP_FS))
703 			return -1;
704 		prune_dcache(nr);
705 	}
706 	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
707 }
708 
709 /**
710  * d_alloc	-	allocate a dcache entry
711  * @parent: parent of entry to allocate
712  * @name: qstr of the name
713  *
714  * Allocates a dentry. It returns %NULL if there is insufficient memory
715  * available. On a success the dentry is returned. The name passed in is
716  * copied and the copy passed in may be reused after this call.
717  */
718 
719 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
720 {
721 	struct dentry *dentry;
722 	char *dname;
723 
724 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
725 	if (!dentry)
726 		return NULL;
727 
728 	if (name->len > DNAME_INLINE_LEN-1) {
729 		dname = kmalloc(name->len + 1, GFP_KERNEL);
730 		if (!dname) {
731 			kmem_cache_free(dentry_cache, dentry);
732 			return NULL;
733 		}
734 	} else  {
735 		dname = dentry->d_iname;
736 	}
737 	dentry->d_name.name = dname;
738 
739 	dentry->d_name.len = name->len;
740 	dentry->d_name.hash = name->hash;
741 	memcpy(dname, name->name, name->len);
742 	dname[name->len] = 0;
743 
744 	atomic_set(&dentry->d_count, 1);
745 	dentry->d_flags = DCACHE_UNHASHED;
746 	spin_lock_init(&dentry->d_lock);
747 	dentry->d_inode = NULL;
748 	dentry->d_parent = NULL;
749 	dentry->d_sb = NULL;
750 	dentry->d_op = NULL;
751 	dentry->d_fsdata = NULL;
752 	dentry->d_mounted = 0;
753 	dentry->d_cookie = NULL;
754 	INIT_HLIST_NODE(&dentry->d_hash);
755 	INIT_LIST_HEAD(&dentry->d_lru);
756 	INIT_LIST_HEAD(&dentry->d_subdirs);
757 	INIT_LIST_HEAD(&dentry->d_alias);
758 
759 	if (parent) {
760 		dentry->d_parent = dget(parent);
761 		dentry->d_sb = parent->d_sb;
762 	} else {
763 		INIT_LIST_HEAD(&dentry->d_child);
764 	}
765 
766 	spin_lock(&dcache_lock);
767 	if (parent)
768 		list_add(&dentry->d_child, &parent->d_subdirs);
769 	dentry_stat.nr_dentry++;
770 	spin_unlock(&dcache_lock);
771 
772 	return dentry;
773 }
774 
775 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
776 {
777 	struct qstr q;
778 
779 	q.name = name;
780 	q.len = strlen(name);
781 	q.hash = full_name_hash(q.name, q.len);
782 	return d_alloc(parent, &q);
783 }
784 
785 /**
786  * d_instantiate - fill in inode information for a dentry
787  * @entry: dentry to complete
788  * @inode: inode to attach to this dentry
789  *
790  * Fill in inode information in the entry.
791  *
792  * This turns negative dentries into productive full members
793  * of society.
794  *
795  * NOTE! This assumes that the inode count has been incremented
796  * (or otherwise set) by the caller to indicate that it is now
797  * in use by the dcache.
798  */
799 
800 void d_instantiate(struct dentry *entry, struct inode * inode)
801 {
802 	if (!list_empty(&entry->d_alias)) BUG();
803 	spin_lock(&dcache_lock);
804 	if (inode)
805 		list_add(&entry->d_alias, &inode->i_dentry);
806 	entry->d_inode = inode;
807 	spin_unlock(&dcache_lock);
808 	security_d_instantiate(entry, inode);
809 }
810 
811 /**
812  * d_instantiate_unique - instantiate a non-aliased dentry
813  * @entry: dentry to instantiate
814  * @inode: inode to attach to this dentry
815  *
816  * Fill in inode information in the entry. On success, it returns NULL.
817  * If an unhashed alias of "entry" already exists, then we return the
818  * aliased dentry instead.
819  *
820  * Note that in order to avoid conflicts with rename() etc, the caller
821  * had better be holding the parent directory semaphore.
822  */
823 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
824 {
825 	struct dentry *alias;
826 	int len = entry->d_name.len;
827 	const char *name = entry->d_name.name;
828 	unsigned int hash = entry->d_name.hash;
829 
830 	BUG_ON(!list_empty(&entry->d_alias));
831 	spin_lock(&dcache_lock);
832 	if (!inode)
833 		goto do_negative;
834 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
835 		struct qstr *qstr = &alias->d_name;
836 
837 		if (qstr->hash != hash)
838 			continue;
839 		if (alias->d_parent != entry->d_parent)
840 			continue;
841 		if (qstr->len != len)
842 			continue;
843 		if (memcmp(qstr->name, name, len))
844 			continue;
845 		dget_locked(alias);
846 		spin_unlock(&dcache_lock);
847 		BUG_ON(!d_unhashed(alias));
848 		return alias;
849 	}
850 	list_add(&entry->d_alias, &inode->i_dentry);
851 do_negative:
852 	entry->d_inode = inode;
853 	spin_unlock(&dcache_lock);
854 	security_d_instantiate(entry, inode);
855 	return NULL;
856 }
857 EXPORT_SYMBOL(d_instantiate_unique);
858 
859 /**
860  * d_alloc_root - allocate root dentry
861  * @root_inode: inode to allocate the root for
862  *
863  * Allocate a root ("/") dentry for the inode given. The inode is
864  * instantiated and returned. %NULL is returned if there is insufficient
865  * memory or the inode passed is %NULL.
866  */
867 
868 struct dentry * d_alloc_root(struct inode * root_inode)
869 {
870 	struct dentry *res = NULL;
871 
872 	if (root_inode) {
873 		static const struct qstr name = { .name = "/", .len = 1 };
874 
875 		res = d_alloc(NULL, &name);
876 		if (res) {
877 			res->d_sb = root_inode->i_sb;
878 			res->d_parent = res;
879 			d_instantiate(res, root_inode);
880 		}
881 	}
882 	return res;
883 }
884 
885 static inline struct hlist_head *d_hash(struct dentry *parent,
886 					unsigned long hash)
887 {
888 	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
889 	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
890 	return dentry_hashtable + (hash & D_HASHMASK);
891 }
892 
893 /**
894  * d_alloc_anon - allocate an anonymous dentry
895  * @inode: inode to allocate the dentry for
896  *
897  * This is similar to d_alloc_root.  It is used by filesystems when
898  * creating a dentry for a given inode, often in the process of
899  * mapping a filehandle to a dentry.  The returned dentry may be
900  * anonymous, or may have a full name (if the inode was already
901  * in the cache).  The file system may need to make further
902  * efforts to connect this dentry into the dcache properly.
903  *
904  * When called on a directory inode, we must ensure that
905  * the inode only ever has one dentry.  If a dentry is
906  * found, that is returned instead of allocating a new one.
907  *
908  * On successful return, the reference to the inode has been transferred
909  * to the dentry.  If %NULL is returned (indicating kmalloc failure),
910  * the reference on the inode has not been released.
911  */
912 
913 struct dentry * d_alloc_anon(struct inode *inode)
914 {
915 	static const struct qstr anonstring = { .name = "" };
916 	struct dentry *tmp;
917 	struct dentry *res;
918 
919 	if ((res = d_find_alias(inode))) {
920 		iput(inode);
921 		return res;
922 	}
923 
924 	tmp = d_alloc(NULL, &anonstring);
925 	if (!tmp)
926 		return NULL;
927 
928 	tmp->d_parent = tmp; /* make sure dput doesn't croak */
929 
930 	spin_lock(&dcache_lock);
931 	res = __d_find_alias(inode, 0);
932 	if (!res) {
933 		/* attach a disconnected dentry */
934 		res = tmp;
935 		tmp = NULL;
936 		spin_lock(&res->d_lock);
937 		res->d_sb = inode->i_sb;
938 		res->d_parent = res;
939 		res->d_inode = inode;
940 		res->d_flags |= DCACHE_DISCONNECTED;
941 		res->d_flags &= ~DCACHE_UNHASHED;
942 		list_add(&res->d_alias, &inode->i_dentry);
943 		hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
944 		spin_unlock(&res->d_lock);
945 
946 		inode = NULL; /* don't drop reference */
947 	}
948 	spin_unlock(&dcache_lock);
949 
950 	if (inode)
951 		iput(inode);
952 	if (tmp)
953 		dput(tmp);
954 	return res;
955 }
956 
957 
958 /**
959  * d_splice_alias - splice a disconnected dentry into the tree if one exists
960  * @inode:  the inode which may have a disconnected dentry
961  * @dentry: a negative dentry which we want to point to the inode.
962  *
963  * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
964  * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
965  * and return it, else simply d_add the inode to the dentry and return NULL.
966  *
967  * This is needed in the lookup routine of any filesystem that is exportable
968  * (via knfsd) so that we can build dcache paths to directories effectively.
969  *
970  * If a dentry was found and moved, then it is returned.  Otherwise NULL
971  * is returned.  This matches the expected return value of ->lookup.
972  *
973  */
974 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
975 {
976 	struct dentry *new = NULL;
977 
978 	if (inode) {
979 		spin_lock(&dcache_lock);
980 		new = __d_find_alias(inode, 1);
981 		if (new) {
982 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
983 			spin_unlock(&dcache_lock);
984 			security_d_instantiate(new, inode);
985 			d_rehash(dentry);
986 			d_move(new, dentry);
987 			iput(inode);
988 		} else {
989 			/* d_instantiate takes dcache_lock, so we do it by hand */
990 			list_add(&dentry->d_alias, &inode->i_dentry);
991 			dentry->d_inode = inode;
992 			spin_unlock(&dcache_lock);
993 			security_d_instantiate(dentry, inode);
994 			d_rehash(dentry);
995 		}
996 	} else
997 		d_add(dentry, inode);
998 	return new;
999 }
1000 
1001 
1002 /**
1003  * d_lookup - search for a dentry
1004  * @parent: parent dentry
1005  * @name: qstr of name we wish to find
1006  *
1007  * Searches the children of the parent dentry for the name in question. If
1008  * the dentry is found its reference count is incremented and the dentry
1009  * is returned. The caller must use d_put to free the entry when it has
1010  * finished using it. %NULL is returned on failure.
1011  *
1012  * __d_lookup is dcache_lock free. The hash list is protected using RCU.
1013  * Memory barriers are used while updating and doing lockless traversal.
1014  * To avoid races with d_move while rename is happening, d_lock is used.
1015  *
1016  * Overflows in memcmp(), while d_move, are avoided by keeping the length
1017  * and name pointer in one structure pointed by d_qstr.
1018  *
1019  * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1020  * lookup is going on.
1021  *
1022  * dentry_unused list is not updated even if lookup finds the required dentry
1023  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1024  * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1025  * acquisition.
1026  *
1027  * d_lookup() is protected against the concurrent renames in some unrelated
1028  * directory using the seqlockt_t rename_lock.
1029  */
1030 
1031 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1032 {
1033 	struct dentry * dentry = NULL;
1034 	unsigned long seq;
1035 
1036         do {
1037                 seq = read_seqbegin(&rename_lock);
1038                 dentry = __d_lookup(parent, name);
1039                 if (dentry)
1040 			break;
1041 	} while (read_seqretry(&rename_lock, seq));
1042 	return dentry;
1043 }
1044 
1045 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1046 {
1047 	unsigned int len = name->len;
1048 	unsigned int hash = name->hash;
1049 	const unsigned char *str = name->name;
1050 	struct hlist_head *head = d_hash(parent,hash);
1051 	struct dentry *found = NULL;
1052 	struct hlist_node *node;
1053 
1054 	rcu_read_lock();
1055 
1056 	hlist_for_each_rcu(node, head) {
1057 		struct dentry *dentry;
1058 		struct qstr *qstr;
1059 
1060 		dentry = hlist_entry(node, struct dentry, d_hash);
1061 
1062 		if (dentry->d_name.hash != hash)
1063 			continue;
1064 		if (dentry->d_parent != parent)
1065 			continue;
1066 
1067 		spin_lock(&dentry->d_lock);
1068 
1069 		/*
1070 		 * Recheck the dentry after taking the lock - d_move may have
1071 		 * changed things.  Don't bother checking the hash because we're
1072 		 * about to compare the whole name anyway.
1073 		 */
1074 		if (dentry->d_parent != parent)
1075 			goto next;
1076 
1077 		/*
1078 		 * It is safe to compare names since d_move() cannot
1079 		 * change the qstr (protected by d_lock).
1080 		 */
1081 		qstr = &dentry->d_name;
1082 		if (parent->d_op && parent->d_op->d_compare) {
1083 			if (parent->d_op->d_compare(parent, qstr, name))
1084 				goto next;
1085 		} else {
1086 			if (qstr->len != len)
1087 				goto next;
1088 			if (memcmp(qstr->name, str, len))
1089 				goto next;
1090 		}
1091 
1092 		if (!d_unhashed(dentry)) {
1093 			atomic_inc(&dentry->d_count);
1094 			found = dentry;
1095 		}
1096 		spin_unlock(&dentry->d_lock);
1097 		break;
1098 next:
1099 		spin_unlock(&dentry->d_lock);
1100  	}
1101  	rcu_read_unlock();
1102 
1103  	return found;
1104 }
1105 
1106 /**
1107  * d_validate - verify dentry provided from insecure source
1108  * @dentry: The dentry alleged to be valid child of @dparent
1109  * @dparent: The parent dentry (known to be valid)
1110  * @hash: Hash of the dentry
1111  * @len: Length of the name
1112  *
1113  * An insecure source has sent us a dentry, here we verify it and dget() it.
1114  * This is used by ncpfs in its readdir implementation.
1115  * Zero is returned in the dentry is invalid.
1116  */
1117 
1118 int d_validate(struct dentry *dentry, struct dentry *dparent)
1119 {
1120 	struct hlist_head *base;
1121 	struct hlist_node *lhp;
1122 
1123 	/* Check whether the ptr might be valid at all.. */
1124 	if (!kmem_ptr_validate(dentry_cache, dentry))
1125 		goto out;
1126 
1127 	if (dentry->d_parent != dparent)
1128 		goto out;
1129 
1130 	spin_lock(&dcache_lock);
1131 	base = d_hash(dparent, dentry->d_name.hash);
1132 	hlist_for_each(lhp,base) {
1133 		/* hlist_for_each_rcu() not required for d_hash list
1134 		 * as it is parsed under dcache_lock
1135 		 */
1136 		if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1137 			__dget_locked(dentry);
1138 			spin_unlock(&dcache_lock);
1139 			return 1;
1140 		}
1141 	}
1142 	spin_unlock(&dcache_lock);
1143 out:
1144 	return 0;
1145 }
1146 
1147 /*
1148  * When a file is deleted, we have two options:
1149  * - turn this dentry into a negative dentry
1150  * - unhash this dentry and free it.
1151  *
1152  * Usually, we want to just turn this into
1153  * a negative dentry, but if anybody else is
1154  * currently using the dentry or the inode
1155  * we can't do that and we fall back on removing
1156  * it from the hash queues and waiting for
1157  * it to be deleted later when it has no users
1158  */
1159 
1160 /**
1161  * d_delete - delete a dentry
1162  * @dentry: The dentry to delete
1163  *
1164  * Turn the dentry into a negative dentry if possible, otherwise
1165  * remove it from the hash queues so it can be deleted later
1166  */
1167 
1168 void d_delete(struct dentry * dentry)
1169 {
1170 	int isdir = 0;
1171 	/*
1172 	 * Are we the only user?
1173 	 */
1174 	spin_lock(&dcache_lock);
1175 	spin_lock(&dentry->d_lock);
1176 	isdir = S_ISDIR(dentry->d_inode->i_mode);
1177 	if (atomic_read(&dentry->d_count) == 1) {
1178 		dentry_iput(dentry);
1179 		fsnotify_nameremove(dentry, isdir);
1180 		return;
1181 	}
1182 
1183 	if (!d_unhashed(dentry))
1184 		__d_drop(dentry);
1185 
1186 	spin_unlock(&dentry->d_lock);
1187 	spin_unlock(&dcache_lock);
1188 
1189 	fsnotify_nameremove(dentry, isdir);
1190 }
1191 
1192 static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1193 {
1194 
1195  	entry->d_flags &= ~DCACHE_UNHASHED;
1196  	hlist_add_head_rcu(&entry->d_hash, list);
1197 }
1198 
1199 /**
1200  * d_rehash	- add an entry back to the hash
1201  * @entry: dentry to add to the hash
1202  *
1203  * Adds a dentry to the hash according to its name.
1204  */
1205 
1206 void d_rehash(struct dentry * entry)
1207 {
1208 	struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
1209 
1210 	spin_lock(&dcache_lock);
1211 	spin_lock(&entry->d_lock);
1212 	__d_rehash(entry, list);
1213 	spin_unlock(&entry->d_lock);
1214 	spin_unlock(&dcache_lock);
1215 }
1216 
1217 #define do_switch(x,y) do { \
1218 	__typeof__ (x) __tmp = x; \
1219 	x = y; y = __tmp; } while (0)
1220 
1221 /*
1222  * When switching names, the actual string doesn't strictly have to
1223  * be preserved in the target - because we're dropping the target
1224  * anyway. As such, we can just do a simple memcpy() to copy over
1225  * the new name before we switch.
1226  *
1227  * Note that we have to be a lot more careful about getting the hash
1228  * switched - we have to switch the hash value properly even if it
1229  * then no longer matches the actual (corrupted) string of the target.
1230  * The hash value has to match the hash queue that the dentry is on..
1231  */
1232 static void switch_names(struct dentry *dentry, struct dentry *target)
1233 {
1234 	if (dname_external(target)) {
1235 		if (dname_external(dentry)) {
1236 			/*
1237 			 * Both external: swap the pointers
1238 			 */
1239 			do_switch(target->d_name.name, dentry->d_name.name);
1240 		} else {
1241 			/*
1242 			 * dentry:internal, target:external.  Steal target's
1243 			 * storage and make target internal.
1244 			 */
1245 			dentry->d_name.name = target->d_name.name;
1246 			target->d_name.name = target->d_iname;
1247 		}
1248 	} else {
1249 		if (dname_external(dentry)) {
1250 			/*
1251 			 * dentry:external, target:internal.  Give dentry's
1252 			 * storage to target and make dentry internal
1253 			 */
1254 			memcpy(dentry->d_iname, target->d_name.name,
1255 					target->d_name.len + 1);
1256 			target->d_name.name = dentry->d_name.name;
1257 			dentry->d_name.name = dentry->d_iname;
1258 		} else {
1259 			/*
1260 			 * Both are internal.  Just copy target to dentry
1261 			 */
1262 			memcpy(dentry->d_iname, target->d_name.name,
1263 					target->d_name.len + 1);
1264 		}
1265 	}
1266 }
1267 
1268 /*
1269  * We cannibalize "target" when moving dentry on top of it,
1270  * because it's going to be thrown away anyway. We could be more
1271  * polite about it, though.
1272  *
1273  * This forceful removal will result in ugly /proc output if
1274  * somebody holds a file open that got deleted due to a rename.
1275  * We could be nicer about the deleted file, and let it show
1276  * up under the name it got deleted rather than the name that
1277  * deleted it.
1278  */
1279 
1280 /**
1281  * d_move - move a dentry
1282  * @dentry: entry to move
1283  * @target: new dentry
1284  *
1285  * Update the dcache to reflect the move of a file name. Negative
1286  * dcache entries should not be moved in this way.
1287  */
1288 
1289 void d_move(struct dentry * dentry, struct dentry * target)
1290 {
1291 	struct hlist_head *list;
1292 
1293 	if (!dentry->d_inode)
1294 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1295 
1296 	spin_lock(&dcache_lock);
1297 	write_seqlock(&rename_lock);
1298 	/*
1299 	 * XXXX: do we really need to take target->d_lock?
1300 	 */
1301 	if (target < dentry) {
1302 		spin_lock(&target->d_lock);
1303 		spin_lock(&dentry->d_lock);
1304 	} else {
1305 		spin_lock(&dentry->d_lock);
1306 		spin_lock(&target->d_lock);
1307 	}
1308 
1309 	/* Move the dentry to the target hash queue, if on different bucket */
1310 	if (dentry->d_flags & DCACHE_UNHASHED)
1311 		goto already_unhashed;
1312 
1313 	hlist_del_rcu(&dentry->d_hash);
1314 
1315 already_unhashed:
1316 	list = d_hash(target->d_parent, target->d_name.hash);
1317 	__d_rehash(dentry, list);
1318 
1319 	/* Unhash the target: dput() will then get rid of it */
1320 	__d_drop(target);
1321 
1322 	list_del(&dentry->d_child);
1323 	list_del(&target->d_child);
1324 
1325 	/* Switch the names.. */
1326 	switch_names(dentry, target);
1327 	do_switch(dentry->d_name.len, target->d_name.len);
1328 	do_switch(dentry->d_name.hash, target->d_name.hash);
1329 
1330 	/* ... and switch the parents */
1331 	if (IS_ROOT(dentry)) {
1332 		dentry->d_parent = target->d_parent;
1333 		target->d_parent = target;
1334 		INIT_LIST_HEAD(&target->d_child);
1335 	} else {
1336 		do_switch(dentry->d_parent, target->d_parent);
1337 
1338 		/* And add them back to the (new) parent lists */
1339 		list_add(&target->d_child, &target->d_parent->d_subdirs);
1340 	}
1341 
1342 	list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
1343 	spin_unlock(&target->d_lock);
1344 	spin_unlock(&dentry->d_lock);
1345 	write_sequnlock(&rename_lock);
1346 	spin_unlock(&dcache_lock);
1347 }
1348 
1349 /**
1350  * d_path - return the path of a dentry
1351  * @dentry: dentry to report
1352  * @vfsmnt: vfsmnt to which the dentry belongs
1353  * @root: root dentry
1354  * @rootmnt: vfsmnt to which the root dentry belongs
1355  * @buffer: buffer to return value in
1356  * @buflen: buffer length
1357  *
1358  * Convert a dentry into an ASCII path name. If the entry has been deleted
1359  * the string " (deleted)" is appended. Note that this is ambiguous.
1360  *
1361  * Returns the buffer or an error code if the path was too long.
1362  *
1363  * "buflen" should be positive. Caller holds the dcache_lock.
1364  */
1365 static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
1366 			struct dentry *root, struct vfsmount *rootmnt,
1367 			char *buffer, int buflen)
1368 {
1369 	char * end = buffer+buflen;
1370 	char * retval;
1371 	int namelen;
1372 
1373 	*--end = '\0';
1374 	buflen--;
1375 	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
1376 		buflen -= 10;
1377 		end -= 10;
1378 		if (buflen < 0)
1379 			goto Elong;
1380 		memcpy(end, " (deleted)", 10);
1381 	}
1382 
1383 	if (buflen < 1)
1384 		goto Elong;
1385 	/* Get '/' right */
1386 	retval = end-1;
1387 	*retval = '/';
1388 
1389 	for (;;) {
1390 		struct dentry * parent;
1391 
1392 		if (dentry == root && vfsmnt == rootmnt)
1393 			break;
1394 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1395 			/* Global root? */
1396 			spin_lock(&vfsmount_lock);
1397 			if (vfsmnt->mnt_parent == vfsmnt) {
1398 				spin_unlock(&vfsmount_lock);
1399 				goto global_root;
1400 			}
1401 			dentry = vfsmnt->mnt_mountpoint;
1402 			vfsmnt = vfsmnt->mnt_parent;
1403 			spin_unlock(&vfsmount_lock);
1404 			continue;
1405 		}
1406 		parent = dentry->d_parent;
1407 		prefetch(parent);
1408 		namelen = dentry->d_name.len;
1409 		buflen -= namelen + 1;
1410 		if (buflen < 0)
1411 			goto Elong;
1412 		end -= namelen;
1413 		memcpy(end, dentry->d_name.name, namelen);
1414 		*--end = '/';
1415 		retval = end;
1416 		dentry = parent;
1417 	}
1418 
1419 	return retval;
1420 
1421 global_root:
1422 	namelen = dentry->d_name.len;
1423 	buflen -= namelen;
1424 	if (buflen < 0)
1425 		goto Elong;
1426 	retval -= namelen-1;	/* hit the slash */
1427 	memcpy(retval, dentry->d_name.name, namelen);
1428 	return retval;
1429 Elong:
1430 	return ERR_PTR(-ENAMETOOLONG);
1431 }
1432 
1433 /* write full pathname into buffer and return start of pathname */
1434 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1435 				char *buf, int buflen)
1436 {
1437 	char *res;
1438 	struct vfsmount *rootmnt;
1439 	struct dentry *root;
1440 
1441 	read_lock(&current->fs->lock);
1442 	rootmnt = mntget(current->fs->rootmnt);
1443 	root = dget(current->fs->root);
1444 	read_unlock(&current->fs->lock);
1445 	spin_lock(&dcache_lock);
1446 	res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
1447 	spin_unlock(&dcache_lock);
1448 	dput(root);
1449 	mntput(rootmnt);
1450 	return res;
1451 }
1452 
1453 /*
1454  * NOTE! The user-level library version returns a
1455  * character pointer. The kernel system call just
1456  * returns the length of the buffer filled (which
1457  * includes the ending '\0' character), or a negative
1458  * error value. So libc would do something like
1459  *
1460  *	char *getcwd(char * buf, size_t size)
1461  *	{
1462  *		int retval;
1463  *
1464  *		retval = sys_getcwd(buf, size);
1465  *		if (retval >= 0)
1466  *			return buf;
1467  *		errno = -retval;
1468  *		return NULL;
1469  *	}
1470  */
1471 asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1472 {
1473 	int error;
1474 	struct vfsmount *pwdmnt, *rootmnt;
1475 	struct dentry *pwd, *root;
1476 	char *page = (char *) __get_free_page(GFP_USER);
1477 
1478 	if (!page)
1479 		return -ENOMEM;
1480 
1481 	read_lock(&current->fs->lock);
1482 	pwdmnt = mntget(current->fs->pwdmnt);
1483 	pwd = dget(current->fs->pwd);
1484 	rootmnt = mntget(current->fs->rootmnt);
1485 	root = dget(current->fs->root);
1486 	read_unlock(&current->fs->lock);
1487 
1488 	error = -ENOENT;
1489 	/* Has the current directory has been unlinked? */
1490 	spin_lock(&dcache_lock);
1491 	if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
1492 		unsigned long len;
1493 		char * cwd;
1494 
1495 		cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
1496 		spin_unlock(&dcache_lock);
1497 
1498 		error = PTR_ERR(cwd);
1499 		if (IS_ERR(cwd))
1500 			goto out;
1501 
1502 		error = -ERANGE;
1503 		len = PAGE_SIZE + page - cwd;
1504 		if (len <= size) {
1505 			error = len;
1506 			if (copy_to_user(buf, cwd, len))
1507 				error = -EFAULT;
1508 		}
1509 	} else
1510 		spin_unlock(&dcache_lock);
1511 
1512 out:
1513 	dput(pwd);
1514 	mntput(pwdmnt);
1515 	dput(root);
1516 	mntput(rootmnt);
1517 	free_page((unsigned long) page);
1518 	return error;
1519 }
1520 
1521 /*
1522  * Test whether new_dentry is a subdirectory of old_dentry.
1523  *
1524  * Trivially implemented using the dcache structure
1525  */
1526 
1527 /**
1528  * is_subdir - is new dentry a subdirectory of old_dentry
1529  * @new_dentry: new dentry
1530  * @old_dentry: old dentry
1531  *
1532  * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
1533  * Returns 0 otherwise.
1534  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
1535  */
1536 
1537 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
1538 {
1539 	int result;
1540 	struct dentry * saved = new_dentry;
1541 	unsigned long seq;
1542 
1543 	/* need rcu_readlock to protect against the d_parent trashing due to
1544 	 * d_move
1545 	 */
1546 	rcu_read_lock();
1547         do {
1548 		/* for restarting inner loop in case of seq retry */
1549 		new_dentry = saved;
1550 		result = 0;
1551 		seq = read_seqbegin(&rename_lock);
1552 		for (;;) {
1553 			if (new_dentry != old_dentry) {
1554 				struct dentry * parent = new_dentry->d_parent;
1555 				if (parent == new_dentry)
1556 					break;
1557 				new_dentry = parent;
1558 				continue;
1559 			}
1560 			result = 1;
1561 			break;
1562 		}
1563 	} while (read_seqretry(&rename_lock, seq));
1564 	rcu_read_unlock();
1565 
1566 	return result;
1567 }
1568 
1569 void d_genocide(struct dentry *root)
1570 {
1571 	struct dentry *this_parent = root;
1572 	struct list_head *next;
1573 
1574 	spin_lock(&dcache_lock);
1575 repeat:
1576 	next = this_parent->d_subdirs.next;
1577 resume:
1578 	while (next != &this_parent->d_subdirs) {
1579 		struct list_head *tmp = next;
1580 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1581 		next = tmp->next;
1582 		if (d_unhashed(dentry)||!dentry->d_inode)
1583 			continue;
1584 		if (!list_empty(&dentry->d_subdirs)) {
1585 			this_parent = dentry;
1586 			goto repeat;
1587 		}
1588 		atomic_dec(&dentry->d_count);
1589 	}
1590 	if (this_parent != root) {
1591 		next = this_parent->d_child.next;
1592 		atomic_dec(&this_parent->d_count);
1593 		this_parent = this_parent->d_parent;
1594 		goto resume;
1595 	}
1596 	spin_unlock(&dcache_lock);
1597 }
1598 
1599 /**
1600  * find_inode_number - check for dentry with name
1601  * @dir: directory to check
1602  * @name: Name to find.
1603  *
1604  * Check whether a dentry already exists for the given name,
1605  * and return the inode number if it has an inode. Otherwise
1606  * 0 is returned.
1607  *
1608  * This routine is used to post-process directory listings for
1609  * filesystems using synthetic inode numbers, and is necessary
1610  * to keep getcwd() working.
1611  */
1612 
1613 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
1614 {
1615 	struct dentry * dentry;
1616 	ino_t ino = 0;
1617 
1618 	/*
1619 	 * Check for a fs-specific hash function. Note that we must
1620 	 * calculate the standard hash first, as the d_op->d_hash()
1621 	 * routine may choose to leave the hash value unchanged.
1622 	 */
1623 	name->hash = full_name_hash(name->name, name->len);
1624 	if (dir->d_op && dir->d_op->d_hash)
1625 	{
1626 		if (dir->d_op->d_hash(dir, name) != 0)
1627 			goto out;
1628 	}
1629 
1630 	dentry = d_lookup(dir, name);
1631 	if (dentry)
1632 	{
1633 		if (dentry->d_inode)
1634 			ino = dentry->d_inode->i_ino;
1635 		dput(dentry);
1636 	}
1637 out:
1638 	return ino;
1639 }
1640 
1641 static __initdata unsigned long dhash_entries;
1642 static int __init set_dhash_entries(char *str)
1643 {
1644 	if (!str)
1645 		return 0;
1646 	dhash_entries = simple_strtoul(str, &str, 0);
1647 	return 1;
1648 }
1649 __setup("dhash_entries=", set_dhash_entries);
1650 
1651 static void __init dcache_init_early(void)
1652 {
1653 	int loop;
1654 
1655 	/* If hashes are distributed across NUMA nodes, defer
1656 	 * hash allocation until vmalloc space is available.
1657 	 */
1658 	if (hashdist)
1659 		return;
1660 
1661 	dentry_hashtable =
1662 		alloc_large_system_hash("Dentry cache",
1663 					sizeof(struct hlist_head),
1664 					dhash_entries,
1665 					13,
1666 					HASH_EARLY,
1667 					&d_hash_shift,
1668 					&d_hash_mask,
1669 					0);
1670 
1671 	for (loop = 0; loop < (1 << d_hash_shift); loop++)
1672 		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
1673 }
1674 
1675 static void __init dcache_init(unsigned long mempages)
1676 {
1677 	int loop;
1678 
1679 	/*
1680 	 * A constructor could be added for stable state like the lists,
1681 	 * but it is probably not worth it because of the cache nature
1682 	 * of the dcache.
1683 	 */
1684 	dentry_cache = kmem_cache_create("dentry_cache",
1685 					 sizeof(struct dentry),
1686 					 0,
1687 					 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
1688 					 NULL, NULL);
1689 
1690 	set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
1691 
1692 	/* Hash may have been set up in dcache_init_early */
1693 	if (!hashdist)
1694 		return;
1695 
1696 	dentry_hashtable =
1697 		alloc_large_system_hash("Dentry cache",
1698 					sizeof(struct hlist_head),
1699 					dhash_entries,
1700 					13,
1701 					0,
1702 					&d_hash_shift,
1703 					&d_hash_mask,
1704 					0);
1705 
1706 	for (loop = 0; loop < (1 << d_hash_shift); loop++)
1707 		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
1708 }
1709 
1710 /* SLAB cache for __getname() consumers */
1711 kmem_cache_t *names_cachep;
1712 
1713 /* SLAB cache for file structures */
1714 kmem_cache_t *filp_cachep;
1715 
1716 EXPORT_SYMBOL(d_genocide);
1717 
1718 extern void bdev_cache_init(void);
1719 extern void chrdev_init(void);
1720 
1721 void __init vfs_caches_init_early(void)
1722 {
1723 	dcache_init_early();
1724 	inode_init_early();
1725 }
1726 
1727 void __init vfs_caches_init(unsigned long mempages)
1728 {
1729 	unsigned long reserve;
1730 
1731 	/* Base hash sizes on available memory, with a reserve equal to
1732            150% of current kernel size */
1733 
1734 	reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
1735 	mempages -= reserve;
1736 
1737 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
1738 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1739 
1740 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
1741 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
1742 
1743 	dcache_init(mempages);
1744 	inode_init(mempages);
1745 	files_init(mempages);
1746 	mnt_init(mempages);
1747 	bdev_cache_init();
1748 	chrdev_init();
1749 }
1750 
1751 EXPORT_SYMBOL(d_alloc);
1752 EXPORT_SYMBOL(d_alloc_anon);
1753 EXPORT_SYMBOL(d_alloc_root);
1754 EXPORT_SYMBOL(d_delete);
1755 EXPORT_SYMBOL(d_find_alias);
1756 EXPORT_SYMBOL(d_instantiate);
1757 EXPORT_SYMBOL(d_invalidate);
1758 EXPORT_SYMBOL(d_lookup);
1759 EXPORT_SYMBOL(d_move);
1760 EXPORT_SYMBOL(d_path);
1761 EXPORT_SYMBOL(d_prune_aliases);
1762 EXPORT_SYMBOL(d_rehash);
1763 EXPORT_SYMBOL(d_splice_alias);
1764 EXPORT_SYMBOL(d_validate);
1765 EXPORT_SYMBOL(dget_locked);
1766 EXPORT_SYMBOL(dput);
1767 EXPORT_SYMBOL(find_inode_number);
1768 EXPORT_SYMBOL(have_submounts);
1769 EXPORT_SYMBOL(names_cachep);
1770 EXPORT_SYMBOL(shrink_dcache_parent);
1771 EXPORT_SYMBOL(shrink_dcache_sb);
1772