xref: /linux/fs/sysfs/dir.c (revision aeb3f46252e26acdc60a1a8e31fb1ca6319d9a07)
1 /*
2  * dir.c - Operations for sysfs directories.
3  */
4 
5 #undef DEBUG
6 
7 #include <linux/fs.h>
8 #include <linux/mount.h>
9 #include <linux/module.h>
10 #include <linux/kobject.h>
11 #include <linux/namei.h>
12 #include <linux/idr.h>
13 #include <linux/completion.h>
14 #include <asm/semaphore.h>
15 #include "sysfs.h"
16 
17 DEFINE_MUTEX(sysfs_mutex);
18 spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19 
20 static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21 static DEFINE_IDA(sysfs_ino_ida);
22 
23 /**
24  *	sysfs_link_sibling - link sysfs_dirent into sibling list
25  *	@sd: sysfs_dirent of interest
26  *
27  *	Link @sd into its sibling list which starts from
28  *	sd->s_parent->s_children.
29  *
30  *	Locking:
31  *	mutex_lock(sysfs_mutex)
32  */
33 void sysfs_link_sibling(struct sysfs_dirent *sd)
34 {
35 	struct sysfs_dirent *parent_sd = sd->s_parent;
36 
37 	BUG_ON(sd->s_sibling);
38 	sd->s_sibling = parent_sd->s_children;
39 	parent_sd->s_children = sd;
40 }
41 
42 /**
43  *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
44  *	@sd: sysfs_dirent of interest
45  *
46  *	Unlink @sd from its sibling list which starts from
47  *	sd->s_parent->s_children.
48  *
49  *	Locking:
50  *	mutex_lock(sysfs_mutex)
51  */
52 void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53 {
54 	struct sysfs_dirent **pos;
55 
56 	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
57 		if (*pos == sd) {
58 			*pos = sd->s_sibling;
59 			sd->s_sibling = NULL;
60 			break;
61 		}
62 	}
63 }
64 
65 /**
66  *	sysfs_get_dentry - get dentry for the given sysfs_dirent
67  *	@sd: sysfs_dirent of interest
68  *
69  *	Get dentry for @sd.  Dentry is looked up if currently not
70  *	present.  This function climbs sysfs_dirent tree till it
71  *	reaches a sysfs_dirent with valid dentry attached and descends
72  *	down from there looking up dentry for each step.
73  *
74  *	LOCKING:
75  *	Kernel thread context (may sleep)
76  *
77  *	RETURNS:
78  *	Pointer to found dentry on success, ERR_PTR() value on error.
79  */
80 struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81 {
82 	struct sysfs_dirent *cur;
83 	struct dentry *parent_dentry, *dentry;
84 	int i, depth;
85 
86 	/* Find the first parent which has valid s_dentry and get the
87 	 * dentry.
88 	 */
89 	mutex_lock(&sysfs_mutex);
90  restart0:
91 	spin_lock(&sysfs_assoc_lock);
92  restart1:
93 	spin_lock(&dcache_lock);
94 
95 	dentry = NULL;
96 	depth = 0;
97 	cur = sd;
98 	while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 		if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 			dentry = ERR_PTR(-ENOENT);
101 			depth = 0;
102 			break;
103 		}
104 		cur = cur->s_parent;
105 		depth++;
106 	}
107 	if (!IS_ERR(dentry))
108 		dentry = dget_locked(cur->s_dentry);
109 
110 	spin_unlock(&dcache_lock);
111 	spin_unlock(&sysfs_assoc_lock);
112 
113 	/* from the found dentry, look up depth times */
114 	while (depth--) {
115 		/* find and get depth'th ancestor */
116 		for (cur = sd, i = 0; cur && i < depth; i++)
117 			cur = cur->s_parent;
118 
119 		/* This can happen if tree structure was modified due
120 		 * to move/rename.  Restart.
121 		 */
122 		if (i != depth) {
123 			dput(dentry);
124 			goto restart0;
125 		}
126 
127 		sysfs_get(cur);
128 
129 		mutex_unlock(&sysfs_mutex);
130 
131 		/* look it up */
132 		parent_dentry = dentry;
133 		dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
134 					     strlen(cur->s_name));
135 		dput(parent_dentry);
136 
137 		if (IS_ERR(dentry)) {
138 			sysfs_put(cur);
139 			return dentry;
140 		}
141 
142 		mutex_lock(&sysfs_mutex);
143 		spin_lock(&sysfs_assoc_lock);
144 
145 		/* This, again, can happen if tree structure has
146 		 * changed and we looked up the wrong thing.  Restart.
147 		 */
148 		if (cur->s_dentry != dentry) {
149 			dput(dentry);
150 			sysfs_put(cur);
151 			goto restart1;
152 		}
153 
154 		spin_unlock(&sysfs_assoc_lock);
155 
156 		sysfs_put(cur);
157 	}
158 
159 	mutex_unlock(&sysfs_mutex);
160 	return dentry;
161 }
162 
163 /**
164  *	sysfs_get_active - get an active reference to sysfs_dirent
165  *	@sd: sysfs_dirent to get an active reference to
166  *
167  *	Get an active reference of @sd.  This function is noop if @sd
168  *	is NULL.
169  *
170  *	RETURNS:
171  *	Pointer to @sd on success, NULL on failure.
172  */
173 struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
174 {
175 	if (unlikely(!sd))
176 		return NULL;
177 
178 	while (1) {
179 		int v, t;
180 
181 		v = atomic_read(&sd->s_active);
182 		if (unlikely(v < 0))
183 			return NULL;
184 
185 		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
186 		if (likely(t == v))
187 			return sd;
188 		if (t < 0)
189 			return NULL;
190 
191 		cpu_relax();
192 	}
193 }
194 
195 /**
196  *	sysfs_put_active - put an active reference to sysfs_dirent
197  *	@sd: sysfs_dirent to put an active reference to
198  *
199  *	Put an active reference to @sd.  This function is noop if @sd
200  *	is NULL.
201  */
202 void sysfs_put_active(struct sysfs_dirent *sd)
203 {
204 	struct completion *cmpl;
205 	int v;
206 
207 	if (unlikely(!sd))
208 		return;
209 
210 	v = atomic_dec_return(&sd->s_active);
211 	if (likely(v != SD_DEACTIVATED_BIAS))
212 		return;
213 
214 	/* atomic_dec_return() is a mb(), we'll always see the updated
215 	 * sd->s_sibling.
216 	 */
217 	cmpl = (void *)sd->s_sibling;
218 	complete(cmpl);
219 }
220 
221 /**
222  *	sysfs_get_active_two - get active references to sysfs_dirent and parent
223  *	@sd: sysfs_dirent of interest
224  *
225  *	Get active reference to @sd and its parent.  Parent's active
226  *	reference is grabbed first.  This function is noop if @sd is
227  *	NULL.
228  *
229  *	RETURNS:
230  *	Pointer to @sd on success, NULL on failure.
231  */
232 struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
233 {
234 	if (sd) {
235 		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
236 			return NULL;
237 		if (unlikely(!sysfs_get_active(sd))) {
238 			sysfs_put_active(sd->s_parent);
239 			return NULL;
240 		}
241 	}
242 	return sd;
243 }
244 
245 /**
246  *	sysfs_put_active_two - put active references to sysfs_dirent and parent
247  *	@sd: sysfs_dirent of interest
248  *
249  *	Put active references to @sd and its parent.  This function is
250  *	noop if @sd is NULL.
251  */
252 void sysfs_put_active_two(struct sysfs_dirent *sd)
253 {
254 	if (sd) {
255 		sysfs_put_active(sd);
256 		sysfs_put_active(sd->s_parent);
257 	}
258 }
259 
260 /**
261  *	sysfs_deactivate - deactivate sysfs_dirent
262  *	@sd: sysfs_dirent to deactivate
263  *
264  *	Deny new active references and drain existing ones.
265  */
266 static void sysfs_deactivate(struct sysfs_dirent *sd)
267 {
268 	DECLARE_COMPLETION_ONSTACK(wait);
269 	int v;
270 
271 	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
272 	sd->s_sibling = (void *)&wait;
273 
274 	/* atomic_add_return() is a mb(), put_active() will always see
275 	 * the updated sd->s_sibling.
276 	 */
277 	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
278 
279 	if (v != SD_DEACTIVATED_BIAS)
280 		wait_for_completion(&wait);
281 
282 	sd->s_sibling = NULL;
283 }
284 
285 static int sysfs_alloc_ino(ino_t *pino)
286 {
287 	int ino, rc;
288 
289  retry:
290 	spin_lock(&sysfs_ino_lock);
291 	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
292 	spin_unlock(&sysfs_ino_lock);
293 
294 	if (rc == -EAGAIN) {
295 		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
296 			goto retry;
297 		rc = -ENOMEM;
298 	}
299 
300 	*pino = ino;
301 	return rc;
302 }
303 
304 static void sysfs_free_ino(ino_t ino)
305 {
306 	spin_lock(&sysfs_ino_lock);
307 	ida_remove(&sysfs_ino_ida, ino);
308 	spin_unlock(&sysfs_ino_lock);
309 }
310 
311 void release_sysfs_dirent(struct sysfs_dirent * sd)
312 {
313 	struct sysfs_dirent *parent_sd;
314 
315  repeat:
316 	/* Moving/renaming is always done while holding reference.
317 	 * sd->s_parent won't change beneath us.
318 	 */
319 	parent_sd = sd->s_parent;
320 
321 	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 		sysfs_put(sd->s_elem.symlink.target_sd);
323 	if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 		kfree(sd->s_name);
325 	kfree(sd->s_iattr);
326 	sysfs_free_ino(sd->s_ino);
327 	kmem_cache_free(sysfs_dir_cachep, sd);
328 
329 	sd = parent_sd;
330 	if (sd && atomic_dec_and_test(&sd->s_count))
331 		goto repeat;
332 }
333 
334 static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
335 {
336 	struct sysfs_dirent * sd = dentry->d_fsdata;
337 
338 	if (sd) {
339 		/* sd->s_dentry is protected with sysfs_assoc_lock.
340 		 * This allows sysfs_drop_dentry() to dereference it.
341 		 */
342 		spin_lock(&sysfs_assoc_lock);
343 
344 		/* The dentry might have been deleted or another
345 		 * lookup could have happened updating sd->s_dentry to
346 		 * point the new dentry.  Ignore if it isn't pointing
347 		 * to this dentry.
348 		 */
349 		if (sd->s_dentry == dentry)
350 			sd->s_dentry = NULL;
351 		spin_unlock(&sysfs_assoc_lock);
352 		sysfs_put(sd);
353 	}
354 	iput(inode);
355 }
356 
357 static struct dentry_operations sysfs_dentry_ops = {
358 	.d_iput		= sysfs_d_iput,
359 };
360 
361 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
362 {
363 	char *dup_name = NULL;
364 	struct sysfs_dirent *sd;
365 
366 	if (type & SYSFS_COPY_NAME) {
367 		name = dup_name = kstrdup(name, GFP_KERNEL);
368 		if (!name)
369 			return NULL;
370 	}
371 
372 	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
373 	if (!sd)
374 		goto err_out1;
375 
376 	if (sysfs_alloc_ino(&sd->s_ino))
377 		goto err_out2;
378 
379 	atomic_set(&sd->s_count, 1);
380 	atomic_set(&sd->s_active, 0);
381 	atomic_set(&sd->s_event, 1);
382 
383 	sd->s_name = name;
384 	sd->s_mode = mode;
385 	sd->s_flags = type;
386 
387 	return sd;
388 
389  err_out2:
390 	kmem_cache_free(sysfs_dir_cachep, sd);
391  err_out1:
392 	kfree(dup_name);
393 	return NULL;
394 }
395 
396 /**
397  *	sysfs_attach_dentry - associate sysfs_dirent with dentry
398  *	@sd: target sysfs_dirent
399  *	@dentry: dentry to associate
400  *
401  *	Associate @sd with @dentry.  This is protected by
402  *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
403  *
404  *	LOCKING:
405  *	mutex_lock(sysfs_mutex)
406  */
407 static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
408 {
409 	dentry->d_op = &sysfs_dentry_ops;
410 	dentry->d_fsdata = sysfs_get(sd);
411 
412 	/* protect sd->s_dentry against sysfs_d_iput */
413 	spin_lock(&sysfs_assoc_lock);
414 	sd->s_dentry = dentry;
415 	spin_unlock(&sysfs_assoc_lock);
416 
417 	d_rehash(dentry);
418 }
419 
420 static int sysfs_ilookup_test(struct inode *inode, void *arg)
421 {
422 	struct sysfs_dirent *sd = arg;
423 	return inode->i_ino == sd->s_ino;
424 }
425 
426 /**
427  *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
428  *	@acxt: pointer to sysfs_addrm_cxt to be used
429  *	@parent_sd: parent sysfs_dirent
430  *
431  *	This function is called when the caller is about to add or
432  *	remove sysfs_dirent under @parent_sd.  This function acquires
433  *	sysfs_mutex, grabs inode for @parent_sd if available and lock
434  *	i_mutex of it.  @acxt is used to keep and pass context to
435  *	other addrm functions.
436  *
437  *	LOCKING:
438  *	Kernel thread context (may sleep).  sysfs_mutex is locked on
439  *	return.  i_mutex of parent inode is locked on return if
440  *	available.
441  */
442 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
443 		       struct sysfs_dirent *parent_sd)
444 {
445 	struct inode *inode;
446 
447 	memset(acxt, 0, sizeof(*acxt));
448 	acxt->parent_sd = parent_sd;
449 
450 	/* Lookup parent inode.  inode initialization and I_NEW
451 	 * clearing are protected by sysfs_mutex.  By grabbing it and
452 	 * looking up with _nowait variant, inode state can be
453 	 * determined reliably.
454 	 */
455 	mutex_lock(&sysfs_mutex);
456 
457 	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
458 				parent_sd);
459 
460 	if (inode && !(inode->i_state & I_NEW)) {
461 		/* parent inode available */
462 		acxt->parent_inode = inode;
463 
464 		/* sysfs_mutex is below i_mutex in lock hierarchy.
465 		 * First, trylock i_mutex.  If fails, unlock
466 		 * sysfs_mutex and lock them in order.
467 		 */
468 		if (!mutex_trylock(&inode->i_mutex)) {
469 			mutex_unlock(&sysfs_mutex);
470 			mutex_lock(&inode->i_mutex);
471 			mutex_lock(&sysfs_mutex);
472 		}
473 	} else
474 		iput(inode);
475 }
476 
477 /**
478  *	sysfs_add_one - add sysfs_dirent to parent
479  *	@acxt: addrm context to use
480  *	@sd: sysfs_dirent to be added
481  *
482  *	Get @acxt->parent_sd and set sd->s_parent to it and increment
483  *	nlink of parent inode if @sd is a directory.  @sd is NOT
484  *	linked into the children list of the parent.  The caller
485  *	should invoke sysfs_link_sibling() after this function
486  *	completes if @sd needs to be on the children list.
487  *
488  *	This function should be called between calls to
489  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
490  *	passed the same @acxt as passed to sysfs_addrm_start().
491  *
492  *	LOCKING:
493  *	Determined by sysfs_addrm_start().
494  */
495 void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
496 {
497 	sd->s_parent = sysfs_get(acxt->parent_sd);
498 
499 	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
500 		inc_nlink(acxt->parent_inode);
501 
502 	acxt->cnt++;
503 }
504 
505 /**
506  *	sysfs_remove_one - remove sysfs_dirent from parent
507  *	@acxt: addrm context to use
508  *	@sd: sysfs_dirent to be added
509  *
510  *	Mark @sd removed and drop nlink of parent inode if @sd is a
511  *	directory.  @sd is NOT unlinked from the children list of the
512  *	parent.  The caller is repsonsible for removing @sd from the
513  *	children list before calling this function.
514  *
515  *	This function should be called between calls to
516  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
517  *	passed the same @acxt as passed to sysfs_addrm_start().
518  *
519  *	LOCKING:
520  *	Determined by sysfs_addrm_start().
521  */
522 void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
523 {
524 	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
525 
526 	sd->s_flags |= SYSFS_FLAG_REMOVED;
527 	sd->s_sibling = acxt->removed;
528 	acxt->removed = sd;
529 
530 	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
531 		drop_nlink(acxt->parent_inode);
532 
533 	acxt->cnt++;
534 }
535 
536 /**
537  *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
538  *	@sd: target sysfs_dirent
539  *
540  *	Drop dentry for @sd.  @sd must have been unlinked from its
541  *	parent on entry to this function such that it can't be looked
542  *	up anymore.
543  *
544  *	@sd->s_dentry which is protected with sysfs_assoc_lock points
545  *	to the currently associated dentry but we're not holding a
546  *	reference to it and racing with dput().  Grab dcache_lock and
547  *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
548  *	dput() beats us, no need to bother.
549  */
550 static void sysfs_drop_dentry(struct sysfs_dirent *sd)
551 {
552 	struct dentry *dentry = NULL;
553 	struct inode *inode;
554 
555 	/* We're not holding a reference to ->s_dentry dentry but the
556 	 * field will stay valid as long as sysfs_assoc_lock is held.
557 	 */
558 	spin_lock(&sysfs_assoc_lock);
559 	spin_lock(&dcache_lock);
560 
561 	/* drop dentry if it's there and dput() didn't kill it yet */
562 	if (sd->s_dentry && sd->s_dentry->d_inode) {
563 		dentry = dget_locked(sd->s_dentry);
564 		spin_lock(&dentry->d_lock);
565 		__d_drop(dentry);
566 		spin_unlock(&dentry->d_lock);
567 	}
568 
569 	spin_unlock(&dcache_lock);
570 	spin_unlock(&sysfs_assoc_lock);
571 
572 	/* dentries for shadowed inodes are pinned, unpin */
573 	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
574 		dput(dentry);
575 	dput(dentry);
576 
577 	/* adjust nlink and update timestamp */
578 	inode = ilookup(sysfs_sb, sd->s_ino);
579 	if (inode) {
580 		mutex_lock(&inode->i_mutex);
581 
582 		inode->i_ctime = CURRENT_TIME;
583 		drop_nlink(inode);
584 		if (sysfs_type(sd) == SYSFS_DIR)
585 			drop_nlink(inode);
586 
587 		mutex_unlock(&inode->i_mutex);
588 		iput(inode);
589 	}
590 }
591 
592 /**
593  *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
594  *	@acxt: addrm context to finish up
595  *
596  *	Finish up sysfs_dirent add/remove.  Resources acquired by
597  *	sysfs_addrm_start() are released and removed sysfs_dirents are
598  *	cleaned up.  Timestamps on the parent inode are updated.
599  *
600  *	LOCKING:
601  *	All mutexes acquired by sysfs_addrm_start() are released.
602  *
603  *	RETURNS:
604  *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
605  */
606 int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
607 {
608 	/* release resources acquired by sysfs_addrm_start() */
609 	mutex_unlock(&sysfs_mutex);
610 	if (acxt->parent_inode) {
611 		struct inode *inode = acxt->parent_inode;
612 
613 		/* if added/removed, update timestamps on the parent */
614 		if (acxt->cnt)
615 			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
616 
617 		mutex_unlock(&inode->i_mutex);
618 		iput(inode);
619 	}
620 
621 	/* kill removed sysfs_dirents */
622 	while (acxt->removed) {
623 		struct sysfs_dirent *sd = acxt->removed;
624 
625 		acxt->removed = sd->s_sibling;
626 		sd->s_sibling = NULL;
627 
628 		sysfs_drop_dentry(sd);
629 		sysfs_deactivate(sd);
630 		sysfs_put(sd);
631 	}
632 
633 	return acxt->cnt;
634 }
635 
636 /**
637  *	sysfs_find_dirent - find sysfs_dirent with the given name
638  *	@parent_sd: sysfs_dirent to search under
639  *	@name: name to look for
640  *
641  *	Look for sysfs_dirent with name @name under @parent_sd.
642  *
643  *	LOCKING:
644  *	mutex_lock(sysfs_mutex)
645  *
646  *	RETURNS:
647  *	Pointer to sysfs_dirent if found, NULL if not.
648  */
649 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
650 				       const unsigned char *name)
651 {
652 	struct sysfs_dirent *sd;
653 
654 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
655 		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
656 			return sd;
657 	return NULL;
658 }
659 
660 /**
661  *	sysfs_get_dirent - find and get sysfs_dirent with the given name
662  *	@parent_sd: sysfs_dirent to search under
663  *	@name: name to look for
664  *
665  *	Look for sysfs_dirent with name @name under @parent_sd and get
666  *	it if found.
667  *
668  *	LOCKING:
669  *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
670  *
671  *	RETURNS:
672  *	Pointer to sysfs_dirent if found, NULL if not.
673  */
674 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
675 				      const unsigned char *name)
676 {
677 	struct sysfs_dirent *sd;
678 
679 	mutex_lock(&sysfs_mutex);
680 	sd = sysfs_find_dirent(parent_sd, name);
681 	sysfs_get(sd);
682 	mutex_unlock(&sysfs_mutex);
683 
684 	return sd;
685 }
686 
687 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
688 		      const char *name, struct sysfs_dirent **p_sd)
689 {
690 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
691 	struct sysfs_addrm_cxt acxt;
692 	struct sysfs_dirent *sd;
693 
694 	/* allocate */
695 	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
696 	if (!sd)
697 		return -ENOMEM;
698 	sd->s_elem.dir.kobj = kobj;
699 
700 	/* link in */
701 	sysfs_addrm_start(&acxt, parent_sd);
702 
703 	if (!sysfs_find_dirent(parent_sd, name)) {
704 		sysfs_add_one(&acxt, sd);
705 		sysfs_link_sibling(sd);
706 	}
707 
708 	if (!sysfs_addrm_finish(&acxt)) {
709 		sysfs_put(sd);
710 		return -EEXIST;
711 	}
712 
713 	*p_sd = sd;
714 	return 0;
715 }
716 
717 int sysfs_create_subdir(struct kobject *kobj, const char *name,
718 			struct sysfs_dirent **p_sd)
719 {
720 	return create_dir(kobj, kobj->sd, name, p_sd);
721 }
722 
723 /**
724  *	sysfs_create_dir - create a directory for an object.
725  *	@kobj:		object we're creating directory for.
726  *	@shadow_parent:	parent object.
727  */
728 int sysfs_create_dir(struct kobject *kobj,
729 		     struct sysfs_dirent *shadow_parent_sd)
730 {
731 	struct sysfs_dirent *parent_sd, *sd;
732 	int error = 0;
733 
734 	BUG_ON(!kobj);
735 
736 	if (shadow_parent_sd)
737 		parent_sd = shadow_parent_sd;
738 	else if (kobj->parent)
739 		parent_sd = kobj->parent->sd;
740 	else if (sysfs_mount && sysfs_mount->mnt_sb)
741 		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
742 	else
743 		return -EFAULT;
744 
745 	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
746 	if (!error)
747 		kobj->sd = sd;
748 	return error;
749 }
750 
751 static int sysfs_count_nlink(struct sysfs_dirent *sd)
752 {
753 	struct sysfs_dirent *child;
754 	int nr = 0;
755 
756 	for (child = sd->s_children; child; child = child->s_sibling)
757 		if (sysfs_type(child) == SYSFS_DIR)
758 			nr++;
759 	return nr + 2;
760 }
761 
762 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 				struct nameidata *nd)
764 {
765 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
766 	struct sysfs_dirent * sd;
767 	struct bin_attribute *bin_attr;
768 	struct inode *inode;
769 	int found = 0;
770 
771 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
772 		if (sysfs_type(sd) &&
773 		    !strcmp(sd->s_name, dentry->d_name.name)) {
774 			found = 1;
775 			break;
776 		}
777 	}
778 
779 	/* no such entry */
780 	if (!found)
781 		return NULL;
782 
783 	/* attach dentry and inode */
784 	inode = sysfs_get_inode(sd);
785 	if (!inode)
786 		return ERR_PTR(-ENOMEM);
787 
788 	mutex_lock(&sysfs_mutex);
789 
790 	if (inode->i_state & I_NEW) {
791 		/* initialize inode according to type */
792 		switch (sysfs_type(sd)) {
793 		case SYSFS_DIR:
794 			inode->i_op = &sysfs_dir_inode_operations;
795 			inode->i_fop = &sysfs_dir_operations;
796 			inode->i_nlink = sysfs_count_nlink(sd);
797 			break;
798 		case SYSFS_KOBJ_ATTR:
799 			inode->i_size = PAGE_SIZE;
800 			inode->i_fop = &sysfs_file_operations;
801 			break;
802 		case SYSFS_KOBJ_BIN_ATTR:
803 			bin_attr = sd->s_elem.bin_attr.bin_attr;
804 			inode->i_size = bin_attr->size;
805 			inode->i_fop = &bin_fops;
806 			break;
807 		case SYSFS_KOBJ_LINK:
808 			inode->i_op = &sysfs_symlink_inode_operations;
809 			break;
810 		default:
811 			BUG();
812 		}
813 	}
814 
815 	sysfs_instantiate(dentry, inode);
816 	sysfs_attach_dentry(sd, dentry);
817 
818 	mutex_unlock(&sysfs_mutex);
819 
820 	return NULL;
821 }
822 
823 const struct inode_operations sysfs_dir_inode_operations = {
824 	.lookup		= sysfs_lookup,
825 	.setattr	= sysfs_setattr,
826 };
827 
828 static void remove_dir(struct sysfs_dirent *sd)
829 {
830 	struct sysfs_addrm_cxt acxt;
831 
832 	sysfs_addrm_start(&acxt, sd->s_parent);
833 	sysfs_unlink_sibling(sd);
834 	sysfs_remove_one(&acxt, sd);
835 	sysfs_addrm_finish(&acxt);
836 }
837 
838 void sysfs_remove_subdir(struct sysfs_dirent *sd)
839 {
840 	remove_dir(sd);
841 }
842 
843 
844 static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
845 {
846 	struct sysfs_addrm_cxt acxt;
847 	struct sysfs_dirent **pos;
848 
849 	if (!dir_sd)
850 		return;
851 
852 	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
853 	sysfs_addrm_start(&acxt, dir_sd);
854 	pos = &dir_sd->s_children;
855 	while (*pos) {
856 		struct sysfs_dirent *sd = *pos;
857 
858 		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
859 			*pos = sd->s_sibling;
860 			sd->s_sibling = NULL;
861 			sysfs_remove_one(&acxt, sd);
862 		} else
863 			pos = &(*pos)->s_sibling;
864 	}
865 	sysfs_addrm_finish(&acxt);
866 
867 	remove_dir(dir_sd);
868 }
869 
870 /**
871  *	sysfs_remove_dir - remove an object's directory.
872  *	@kobj:	object.
873  *
874  *	The only thing special about this is that we remove any files in
875  *	the directory before we remove the directory, and we've inlined
876  *	what used to be sysfs_rmdir() below, instead of calling separately.
877  */
878 
879 void sysfs_remove_dir(struct kobject * kobj)
880 {
881 	struct sysfs_dirent *sd = kobj->sd;
882 
883 	spin_lock(&sysfs_assoc_lock);
884 	kobj->sd = NULL;
885 	spin_unlock(&sysfs_assoc_lock);
886 
887 	__sysfs_remove_dir(sd);
888 }
889 
890 int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
891 		     const char *new_name)
892 {
893 	struct sysfs_dirent *sd = kobj->sd;
894 	struct dentry *new_parent = NULL;
895 	struct dentry *old_dentry = NULL, *new_dentry = NULL;
896 	const char *dup_name = NULL;
897 	int error;
898 
899 	/* get dentries */
900 	old_dentry = sysfs_get_dentry(sd);
901 	if (IS_ERR(old_dentry)) {
902 		error = PTR_ERR(old_dentry);
903 		goto out_dput;
904 	}
905 
906 	new_parent = sysfs_get_dentry(new_parent_sd);
907 	if (IS_ERR(new_parent)) {
908 		error = PTR_ERR(new_parent);
909 		goto out_dput;
910 	}
911 
912 	/* lock new_parent and get dentry for new name */
913 	mutex_lock(&new_parent->d_inode->i_mutex);
914 
915 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
916 	if (IS_ERR(new_dentry)) {
917 		error = PTR_ERR(new_dentry);
918 		goto out_unlock;
919 	}
920 
921 	/* By allowing two different directories with the same
922 	 * d_parent we allow this routine to move between different
923 	 * shadows of the same directory
924 	 */
925 	error = -EINVAL;
926 	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
927 	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
928 	    old_dentry == new_dentry)
929 		goto out_unlock;
930 
931 	error = -EEXIST;
932 	if (new_dentry->d_inode)
933 		goto out_unlock;
934 
935 	/* rename kobject and sysfs_dirent */
936 	error = -ENOMEM;
937 	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
938 	if (!new_name)
939 		goto out_drop;
940 
941 	error = kobject_set_name(kobj, "%s", new_name);
942 	if (error)
943 		goto out_drop;
944 
945 	dup_name = sd->s_name;
946 	sd->s_name = new_name;
947 
948 	/* move under the new parent */
949 	d_add(new_dentry, NULL);
950 	d_move(sd->s_dentry, new_dentry);
951 
952 	mutex_lock(&sysfs_mutex);
953 
954 	sysfs_unlink_sibling(sd);
955 	sysfs_get(new_parent_sd);
956 	sysfs_put(sd->s_parent);
957 	sd->s_parent = new_parent_sd;
958 	sysfs_link_sibling(sd);
959 
960 	mutex_unlock(&sysfs_mutex);
961 
962 	error = 0;
963 	goto out_unlock;
964 
965  out_drop:
966 	d_drop(new_dentry);
967  out_unlock:
968 	mutex_unlock(&new_parent->d_inode->i_mutex);
969  out_dput:
970 	kfree(dup_name);
971 	dput(new_parent);
972 	dput(old_dentry);
973 	dput(new_dentry);
974 	return error;
975 }
976 
977 int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
978 {
979 	struct sysfs_dirent *sd = kobj->sd;
980 	struct sysfs_dirent *new_parent_sd;
981 	struct dentry *old_parent, *new_parent = NULL;
982 	struct dentry *old_dentry = NULL, *new_dentry = NULL;
983 	int error;
984 
985 	BUG_ON(!sd->s_parent);
986 	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
987 
988 	/* get dentries */
989 	old_dentry = sysfs_get_dentry(sd);
990 	if (IS_ERR(old_dentry)) {
991 		error = PTR_ERR(old_dentry);
992 		goto out_dput;
993 	}
994 	old_parent = sd->s_parent->s_dentry;
995 
996 	new_parent = sysfs_get_dentry(new_parent_sd);
997 	if (IS_ERR(new_parent)) {
998 		error = PTR_ERR(new_parent);
999 		goto out_dput;
1000 	}
1001 
1002 	if (old_parent->d_inode == new_parent->d_inode) {
1003 		error = 0;
1004 		goto out_dput;	/* nothing to move */
1005 	}
1006 again:
1007 	mutex_lock(&old_parent->d_inode->i_mutex);
1008 	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
1009 		mutex_unlock(&old_parent->d_inode->i_mutex);
1010 		goto again;
1011 	}
1012 
1013 	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
1014 	if (IS_ERR(new_dentry)) {
1015 		error = PTR_ERR(new_dentry);
1016 		goto out_unlock;
1017 	} else
1018 		error = 0;
1019 	d_add(new_dentry, NULL);
1020 	d_move(sd->s_dentry, new_dentry);
1021 	dput(new_dentry);
1022 
1023 	/* Remove from old parent's list and insert into new parent's list. */
1024 	mutex_lock(&sysfs_mutex);
1025 
1026 	sysfs_unlink_sibling(sd);
1027 	sysfs_get(new_parent_sd);
1028 	sysfs_put(sd->s_parent);
1029 	sd->s_parent = new_parent_sd;
1030 	sysfs_link_sibling(sd);
1031 
1032 	mutex_unlock(&sysfs_mutex);
1033 
1034  out_unlock:
1035 	mutex_unlock(&new_parent->d_inode->i_mutex);
1036 	mutex_unlock(&old_parent->d_inode->i_mutex);
1037  out_dput:
1038 	dput(new_parent);
1039 	dput(old_dentry);
1040 	dput(new_dentry);
1041 	return error;
1042 }
1043 
1044 static int sysfs_dir_open(struct inode *inode, struct file *file)
1045 {
1046 	struct dentry * dentry = file->f_path.dentry;
1047 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1048 	struct sysfs_dirent * sd;
1049 
1050 	sd = sysfs_new_dirent("_DIR_", 0, 0);
1051 	if (sd) {
1052 		mutex_lock(&sysfs_mutex);
1053 		sd->s_parent = sysfs_get(parent_sd);
1054 		sysfs_link_sibling(sd);
1055 		mutex_unlock(&sysfs_mutex);
1056 	}
1057 
1058 	file->private_data = sd;
1059 	return sd ? 0 : -ENOMEM;
1060 }
1061 
1062 static int sysfs_dir_close(struct inode *inode, struct file *file)
1063 {
1064 	struct sysfs_dirent * cursor = file->private_data;
1065 
1066 	mutex_lock(&sysfs_mutex);
1067 	sysfs_unlink_sibling(cursor);
1068 	mutex_unlock(&sysfs_mutex);
1069 
1070 	release_sysfs_dirent(cursor);
1071 
1072 	return 0;
1073 }
1074 
1075 /* Relationship between s_mode and the DT_xxx types */
1076 static inline unsigned char dt_type(struct sysfs_dirent *sd)
1077 {
1078 	return (sd->s_mode >> 12) & 15;
1079 }
1080 
1081 static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1082 {
1083 	struct dentry *dentry = filp->f_path.dentry;
1084 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1085 	struct sysfs_dirent *cursor = filp->private_data;
1086 	struct sysfs_dirent **pos;
1087 	ino_t ino;
1088 	int i = filp->f_pos;
1089 
1090 	switch (i) {
1091 		case 0:
1092 			ino = parent_sd->s_ino;
1093 			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1094 				break;
1095 			filp->f_pos++;
1096 			i++;
1097 			/* fallthrough */
1098 		case 1:
1099 			if (parent_sd->s_parent)
1100 				ino = parent_sd->s_parent->s_ino;
1101 			else
1102 				ino = parent_sd->s_ino;
1103 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1104 				break;
1105 			filp->f_pos++;
1106 			i++;
1107 			/* fallthrough */
1108 		default:
1109 			mutex_lock(&sysfs_mutex);
1110 
1111 			pos = &parent_sd->s_children;
1112 			while (*pos != cursor)
1113 				pos = &(*pos)->s_sibling;
1114 
1115 			/* unlink cursor */
1116 			*pos = cursor->s_sibling;
1117 
1118 			if (filp->f_pos == 2)
1119 				pos = &parent_sd->s_children;
1120 
1121 			for ( ; *pos; pos = &(*pos)->s_sibling) {
1122 				struct sysfs_dirent *next = *pos;
1123 				const char * name;
1124 				int len;
1125 
1126 				if (!sysfs_type(next))
1127 					continue;
1128 
1129 				name = next->s_name;
1130 				len = strlen(name);
1131 				ino = next->s_ino;
1132 
1133 				if (filldir(dirent, name, len, filp->f_pos, ino,
1134 						 dt_type(next)) < 0)
1135 					break;
1136 
1137 				filp->f_pos++;
1138 			}
1139 
1140 			/* put cursor back in */
1141 			cursor->s_sibling = *pos;
1142 			*pos = cursor;
1143 
1144 			mutex_unlock(&sysfs_mutex);
1145 	}
1146 	return 0;
1147 }
1148 
1149 static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
1150 {
1151 	struct dentry * dentry = file->f_path.dentry;
1152 
1153 	switch (origin) {
1154 		case 1:
1155 			offset += file->f_pos;
1156 		case 0:
1157 			if (offset >= 0)
1158 				break;
1159 		default:
1160 			return -EINVAL;
1161 	}
1162 	if (offset != file->f_pos) {
1163 		mutex_lock(&sysfs_mutex);
1164 
1165 		file->f_pos = offset;
1166 		if (file->f_pos >= 2) {
1167 			struct sysfs_dirent *sd = dentry->d_fsdata;
1168 			struct sysfs_dirent *cursor = file->private_data;
1169 			struct sysfs_dirent **pos;
1170 			loff_t n = file->f_pos - 2;
1171 
1172 			sysfs_unlink_sibling(cursor);
1173 
1174 			pos = &sd->s_children;
1175 			while (n && *pos) {
1176 				struct sysfs_dirent *next = *pos;
1177 				if (sysfs_type(next))
1178 					n--;
1179 				pos = &(*pos)->s_sibling;
1180 			}
1181 
1182 			cursor->s_sibling = *pos;
1183 			*pos = cursor;
1184 		}
1185 
1186 		mutex_unlock(&sysfs_mutex);
1187 	}
1188 
1189 	return offset;
1190 }
1191 
1192 
1193 /**
1194  *	sysfs_make_shadowed_dir - Setup so a directory can be shadowed
1195  *	@kobj:	object we're creating shadow of.
1196  */
1197 
1198 int sysfs_make_shadowed_dir(struct kobject *kobj,
1199 	void * (*follow_link)(struct dentry *, struct nameidata *))
1200 {
1201 	struct dentry *dentry;
1202 	struct inode *inode;
1203 	struct inode_operations *i_op;
1204 
1205 	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
1206 	dentry = sysfs_get_dentry(kobj->sd);
1207 	if (IS_ERR(dentry))
1208 		return PTR_ERR(dentry);
1209 
1210 	inode = dentry->d_inode;
1211 	if (inode->i_op != &sysfs_dir_inode_operations) {
1212 		dput(dentry);
1213 		return -EINVAL;
1214 	}
1215 
1216 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
1217 	if (!i_op)
1218 		return -ENOMEM;
1219 
1220 	memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
1221 	i_op->follow_link = follow_link;
1222 
1223 	/* Locking of inode->i_op?
1224 	 * Since setting i_op is a single word write and they
1225 	 * are atomic we should be ok here.
1226 	 */
1227 	inode->i_op = i_op;
1228 	return 0;
1229 }
1230 
1231 /**
1232  *	sysfs_create_shadow_dir - create a shadow directory for an object.
1233  *	@kobj:	object we're creating directory for.
1234  *
1235  *	sysfs_make_shadowed_dir must already have been called on this
1236  *	directory.
1237  */
1238 
1239 struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
1240 {
1241 	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
1242 	struct dentry *dir, *parent, *shadow;
1243 	struct inode *inode;
1244 	struct sysfs_dirent *sd;
1245 	struct sysfs_addrm_cxt acxt;
1246 
1247 	dir = sysfs_get_dentry(kobj->sd);
1248 	if (IS_ERR(dir)) {
1249 		sd = (void *)dir;
1250 		goto out;
1251 	}
1252 	parent = dir->d_parent;
1253 
1254 	inode = dir->d_inode;
1255 	sd = ERR_PTR(-EINVAL);
1256 	if (!sysfs_is_shadowed_inode(inode))
1257 		goto out_dput;
1258 
1259 	shadow = d_alloc(parent, &dir->d_name);
1260 	if (!shadow)
1261 		goto nomem;
1262 
1263 	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
1264 	if (!sd)
1265 		goto nomem;
1266 	sd->s_elem.dir.kobj = kobj;
1267 
1268 	sysfs_addrm_start(&acxt, parent_sd);
1269 
1270 	/* add but don't link into children list */
1271 	sysfs_add_one(&acxt, sd);
1272 
1273 	/* attach and instantiate dentry */
1274 	sysfs_attach_dentry(sd, shadow);
1275 	d_instantiate(shadow, igrab(inode));
1276 	inc_nlink(inode);	/* tj: synchronization? */
1277 
1278 	sysfs_addrm_finish(&acxt);
1279 
1280 	dget(shadow);		/* Extra count - pin the dentry in core */
1281 
1282 	goto out_dput;
1283 
1284  nomem:
1285 	dput(shadow);
1286 	sd = ERR_PTR(-ENOMEM);
1287  out_dput:
1288 	dput(dir);
1289  out:
1290 	return sd;
1291 }
1292 
1293 /**
1294  *	sysfs_remove_shadow_dir - remove an object's directory.
1295  *	@shadow_sd: sysfs_dirent of shadow directory
1296  *
1297  *	The only thing special about this is that we remove any files in
1298  *	the directory before we remove the directory, and we've inlined
1299  *	what used to be sysfs_rmdir() below, instead of calling separately.
1300  */
1301 
1302 void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
1303 {
1304 	__sysfs_remove_dir(shadow_sd);
1305 }
1306 
1307 const struct file_operations sysfs_dir_operations = {
1308 	.open		= sysfs_dir_open,
1309 	.release	= sysfs_dir_close,
1310 	.llseek		= sysfs_dir_lseek,
1311 	.read		= generic_read_dir,
1312 	.readdir	= sysfs_readdir,
1313 };
1314