xref: /linux/fs/sysfs/dir.c (revision a0f97e06a43cf524e616f09e6af3398e1e9c1c5b)
1 /*
2  * dir.c - Operations for sysfs directories.
3  */
4 
5 #undef DEBUG
6 
7 #include <linux/fs.h>
8 #include <linux/mount.h>
9 #include <linux/module.h>
10 #include <linux/kobject.h>
11 #include <linux/namei.h>
12 #include <linux/idr.h>
13 #include <linux/completion.h>
14 #include <asm/semaphore.h>
15 #include "sysfs.h"
16 
17 DEFINE_MUTEX(sysfs_mutex);
18 spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19 
20 static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21 static DEFINE_IDA(sysfs_ino_ida);
22 
23 /**
24  *	sysfs_link_sibling - link sysfs_dirent into sibling list
25  *	@sd: sysfs_dirent of interest
26  *
27  *	Link @sd into its sibling list which starts from
28  *	sd->s_parent->s_children.
29  *
30  *	Locking:
31  *	mutex_lock(sysfs_mutex)
32  */
33 void sysfs_link_sibling(struct sysfs_dirent *sd)
34 {
35 	struct sysfs_dirent *parent_sd = sd->s_parent;
36 
37 	BUG_ON(sd->s_sibling);
38 	sd->s_sibling = parent_sd->s_children;
39 	parent_sd->s_children = sd;
40 }
41 
42 /**
43  *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
44  *	@sd: sysfs_dirent of interest
45  *
46  *	Unlink @sd from its sibling list which starts from
47  *	sd->s_parent->s_children.
48  *
49  *	Locking:
50  *	mutex_lock(sysfs_mutex)
51  */
52 void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53 {
54 	struct sysfs_dirent **pos;
55 
56 	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
57 		if (*pos == sd) {
58 			*pos = sd->s_sibling;
59 			sd->s_sibling = NULL;
60 			break;
61 		}
62 	}
63 }
64 
65 /**
66  *	sysfs_get_dentry - get dentry for the given sysfs_dirent
67  *	@sd: sysfs_dirent of interest
68  *
69  *	Get dentry for @sd.  Dentry is looked up if currently not
70  *	present.  This function climbs sysfs_dirent tree till it
71  *	reaches a sysfs_dirent with valid dentry attached and descends
72  *	down from there looking up dentry for each step.
73  *
74  *	LOCKING:
75  *	Kernel thread context (may sleep)
76  *
77  *	RETURNS:
78  *	Pointer to found dentry on success, ERR_PTR() value on error.
79  */
80 struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81 {
82 	struct sysfs_dirent *cur;
83 	struct dentry *parent_dentry, *dentry;
84 	int i, depth;
85 
86 	/* Find the first parent which has valid s_dentry and get the
87 	 * dentry.
88 	 */
89 	mutex_lock(&sysfs_mutex);
90  restart0:
91 	spin_lock(&sysfs_assoc_lock);
92  restart1:
93 	spin_lock(&dcache_lock);
94 
95 	dentry = NULL;
96 	depth = 0;
97 	cur = sd;
98 	while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 		if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 			dentry = ERR_PTR(-ENOENT);
101 			depth = 0;
102 			break;
103 		}
104 		cur = cur->s_parent;
105 		depth++;
106 	}
107 	if (!IS_ERR(dentry))
108 		dentry = dget_locked(cur->s_dentry);
109 
110 	spin_unlock(&dcache_lock);
111 	spin_unlock(&sysfs_assoc_lock);
112 
113 	/* from the found dentry, look up depth times */
114 	while (depth--) {
115 		/* find and get depth'th ancestor */
116 		for (cur = sd, i = 0; cur && i < depth; i++)
117 			cur = cur->s_parent;
118 
119 		/* This can happen if tree structure was modified due
120 		 * to move/rename.  Restart.
121 		 */
122 		if (i != depth) {
123 			dput(dentry);
124 			goto restart0;
125 		}
126 
127 		sysfs_get(cur);
128 
129 		mutex_unlock(&sysfs_mutex);
130 
131 		/* look it up */
132 		parent_dentry = dentry;
133 		dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
134 					     strlen(cur->s_name));
135 		dput(parent_dentry);
136 
137 		if (IS_ERR(dentry)) {
138 			sysfs_put(cur);
139 			return dentry;
140 		}
141 
142 		mutex_lock(&sysfs_mutex);
143 		spin_lock(&sysfs_assoc_lock);
144 
145 		/* This, again, can happen if tree structure has
146 		 * changed and we looked up the wrong thing.  Restart.
147 		 */
148 		if (cur->s_dentry != dentry) {
149 			dput(dentry);
150 			sysfs_put(cur);
151 			goto restart1;
152 		}
153 
154 		spin_unlock(&sysfs_assoc_lock);
155 
156 		sysfs_put(cur);
157 	}
158 
159 	mutex_unlock(&sysfs_mutex);
160 	return dentry;
161 }
162 
163 /**
164  *	sysfs_get_active - get an active reference to sysfs_dirent
165  *	@sd: sysfs_dirent to get an active reference to
166  *
167  *	Get an active reference of @sd.  This function is noop if @sd
168  *	is NULL.
169  *
170  *	RETURNS:
171  *	Pointer to @sd on success, NULL on failure.
172  */
173 struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
174 {
175 	if (unlikely(!sd))
176 		return NULL;
177 
178 	while (1) {
179 		int v, t;
180 
181 		v = atomic_read(&sd->s_active);
182 		if (unlikely(v < 0))
183 			return NULL;
184 
185 		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
186 		if (likely(t == v))
187 			return sd;
188 		if (t < 0)
189 			return NULL;
190 
191 		cpu_relax();
192 	}
193 }
194 
195 /**
196  *	sysfs_put_active - put an active reference to sysfs_dirent
197  *	@sd: sysfs_dirent to put an active reference to
198  *
199  *	Put an active reference to @sd.  This function is noop if @sd
200  *	is NULL.
201  */
202 void sysfs_put_active(struct sysfs_dirent *sd)
203 {
204 	struct completion *cmpl;
205 	int v;
206 
207 	if (unlikely(!sd))
208 		return;
209 
210 	v = atomic_dec_return(&sd->s_active);
211 	if (likely(v != SD_DEACTIVATED_BIAS))
212 		return;
213 
214 	/* atomic_dec_return() is a mb(), we'll always see the updated
215 	 * sd->s_sibling.
216 	 */
217 	cmpl = (void *)sd->s_sibling;
218 	complete(cmpl);
219 }
220 
221 /**
222  *	sysfs_get_active_two - get active references to sysfs_dirent and parent
223  *	@sd: sysfs_dirent of interest
224  *
225  *	Get active reference to @sd and its parent.  Parent's active
226  *	reference is grabbed first.  This function is noop if @sd is
227  *	NULL.
228  *
229  *	RETURNS:
230  *	Pointer to @sd on success, NULL on failure.
231  */
232 struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
233 {
234 	if (sd) {
235 		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
236 			return NULL;
237 		if (unlikely(!sysfs_get_active(sd))) {
238 			sysfs_put_active(sd->s_parent);
239 			return NULL;
240 		}
241 	}
242 	return sd;
243 }
244 
245 /**
246  *	sysfs_put_active_two - put active references to sysfs_dirent and parent
247  *	@sd: sysfs_dirent of interest
248  *
249  *	Put active references to @sd and its parent.  This function is
250  *	noop if @sd is NULL.
251  */
252 void sysfs_put_active_two(struct sysfs_dirent *sd)
253 {
254 	if (sd) {
255 		sysfs_put_active(sd);
256 		sysfs_put_active(sd->s_parent);
257 	}
258 }
259 
260 /**
261  *	sysfs_deactivate - deactivate sysfs_dirent
262  *	@sd: sysfs_dirent to deactivate
263  *
264  *	Deny new active references and drain existing ones.
265  */
266 static void sysfs_deactivate(struct sysfs_dirent *sd)
267 {
268 	DECLARE_COMPLETION_ONSTACK(wait);
269 	int v;
270 
271 	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
272 	sd->s_sibling = (void *)&wait;
273 
274 	/* atomic_add_return() is a mb(), put_active() will always see
275 	 * the updated sd->s_sibling.
276 	 */
277 	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
278 
279 	if (v != SD_DEACTIVATED_BIAS)
280 		wait_for_completion(&wait);
281 
282 	sd->s_sibling = NULL;
283 }
284 
285 static int sysfs_alloc_ino(ino_t *pino)
286 {
287 	int ino, rc;
288 
289  retry:
290 	spin_lock(&sysfs_ino_lock);
291 	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
292 	spin_unlock(&sysfs_ino_lock);
293 
294 	if (rc == -EAGAIN) {
295 		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
296 			goto retry;
297 		rc = -ENOMEM;
298 	}
299 
300 	*pino = ino;
301 	return rc;
302 }
303 
304 static void sysfs_free_ino(ino_t ino)
305 {
306 	spin_lock(&sysfs_ino_lock);
307 	ida_remove(&sysfs_ino_ida, ino);
308 	spin_unlock(&sysfs_ino_lock);
309 }
310 
311 void release_sysfs_dirent(struct sysfs_dirent * sd)
312 {
313 	struct sysfs_dirent *parent_sd;
314 
315  repeat:
316 	/* Moving/renaming is always done while holding reference.
317 	 * sd->s_parent won't change beneath us.
318 	 */
319 	parent_sd = sd->s_parent;
320 
321 	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 		sysfs_put(sd->s_elem.symlink.target_sd);
323 	if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 		kfree(sd->s_name);
325 	kfree(sd->s_iattr);
326 	sysfs_free_ino(sd->s_ino);
327 	kmem_cache_free(sysfs_dir_cachep, sd);
328 
329 	sd = parent_sd;
330 	if (sd && atomic_dec_and_test(&sd->s_count))
331 		goto repeat;
332 }
333 
334 static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
335 {
336 	struct sysfs_dirent * sd = dentry->d_fsdata;
337 
338 	if (sd) {
339 		/* sd->s_dentry is protected with sysfs_assoc_lock.
340 		 * This allows sysfs_drop_dentry() to dereference it.
341 		 */
342 		spin_lock(&sysfs_assoc_lock);
343 
344 		/* The dentry might have been deleted or another
345 		 * lookup could have happened updating sd->s_dentry to
346 		 * point the new dentry.  Ignore if it isn't pointing
347 		 * to this dentry.
348 		 */
349 		if (sd->s_dentry == dentry)
350 			sd->s_dentry = NULL;
351 		spin_unlock(&sysfs_assoc_lock);
352 		sysfs_put(sd);
353 	}
354 	iput(inode);
355 }
356 
357 static struct dentry_operations sysfs_dentry_ops = {
358 	.d_iput		= sysfs_d_iput,
359 };
360 
361 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
362 {
363 	char *dup_name = NULL;
364 	struct sysfs_dirent *sd;
365 
366 	if (type & SYSFS_COPY_NAME) {
367 		name = dup_name = kstrdup(name, GFP_KERNEL);
368 		if (!name)
369 			return NULL;
370 	}
371 
372 	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
373 	if (!sd)
374 		goto err_out1;
375 
376 	if (sysfs_alloc_ino(&sd->s_ino))
377 		goto err_out2;
378 
379 	atomic_set(&sd->s_count, 1);
380 	atomic_set(&sd->s_active, 0);
381 	atomic_set(&sd->s_event, 1);
382 
383 	sd->s_name = name;
384 	sd->s_mode = mode;
385 	sd->s_flags = type;
386 
387 	return sd;
388 
389  err_out2:
390 	kmem_cache_free(sysfs_dir_cachep, sd);
391  err_out1:
392 	kfree(dup_name);
393 	return NULL;
394 }
395 
396 /**
397  *	sysfs_attach_dentry - associate sysfs_dirent with dentry
398  *	@sd: target sysfs_dirent
399  *	@dentry: dentry to associate
400  *
401  *	Associate @sd with @dentry.  This is protected by
402  *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
403  *
404  *	LOCKING:
405  *	mutex_lock(sysfs_mutex)
406  */
407 static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
408 {
409 	dentry->d_op = &sysfs_dentry_ops;
410 	dentry->d_fsdata = sysfs_get(sd);
411 
412 	/* protect sd->s_dentry against sysfs_d_iput */
413 	spin_lock(&sysfs_assoc_lock);
414 	sd->s_dentry = dentry;
415 	spin_unlock(&sysfs_assoc_lock);
416 
417 	d_rehash(dentry);
418 }
419 
420 static int sysfs_ilookup_test(struct inode *inode, void *arg)
421 {
422 	struct sysfs_dirent *sd = arg;
423 	return inode->i_ino == sd->s_ino;
424 }
425 
426 /**
427  *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
428  *	@acxt: pointer to sysfs_addrm_cxt to be used
429  *	@parent_sd: parent sysfs_dirent
430  *
431  *	This function is called when the caller is about to add or
432  *	remove sysfs_dirent under @parent_sd.  This function acquires
433  *	sysfs_mutex, grabs inode for @parent_sd if available and lock
434  *	i_mutex of it.  @acxt is used to keep and pass context to
435  *	other addrm functions.
436  *
437  *	LOCKING:
438  *	Kernel thread context (may sleep).  sysfs_mutex is locked on
439  *	return.  i_mutex of parent inode is locked on return if
440  *	available.
441  */
442 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
443 		       struct sysfs_dirent *parent_sd)
444 {
445 	struct inode *inode;
446 
447 	memset(acxt, 0, sizeof(*acxt));
448 	acxt->parent_sd = parent_sd;
449 
450 	/* Lookup parent inode.  inode initialization and I_NEW
451 	 * clearing are protected by sysfs_mutex.  By grabbing it and
452 	 * looking up with _nowait variant, inode state can be
453 	 * determined reliably.
454 	 */
455 	mutex_lock(&sysfs_mutex);
456 
457 	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
458 				parent_sd);
459 
460 	if (inode && !(inode->i_state & I_NEW)) {
461 		/* parent inode available */
462 		acxt->parent_inode = inode;
463 
464 		/* sysfs_mutex is below i_mutex in lock hierarchy.
465 		 * First, trylock i_mutex.  If fails, unlock
466 		 * sysfs_mutex and lock them in order.
467 		 */
468 		if (!mutex_trylock(&inode->i_mutex)) {
469 			mutex_unlock(&sysfs_mutex);
470 			mutex_lock(&inode->i_mutex);
471 			mutex_lock(&sysfs_mutex);
472 		}
473 	} else
474 		iput(inode);
475 }
476 
477 /**
478  *	sysfs_add_one - add sysfs_dirent to parent
479  *	@acxt: addrm context to use
480  *	@sd: sysfs_dirent to be added
481  *
482  *	Get @acxt->parent_sd and set sd->s_parent to it and increment
483  *	nlink of parent inode if @sd is a directory.  @sd is NOT
484  *	linked into the children list of the parent.  The caller
485  *	should invoke sysfs_link_sibling() after this function
486  *	completes if @sd needs to be on the children list.
487  *
488  *	This function should be called between calls to
489  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
490  *	passed the same @acxt as passed to sysfs_addrm_start().
491  *
492  *	LOCKING:
493  *	Determined by sysfs_addrm_start().
494  */
495 void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
496 {
497 	sd->s_parent = sysfs_get(acxt->parent_sd);
498 
499 	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
500 		inc_nlink(acxt->parent_inode);
501 
502 	acxt->cnt++;
503 }
504 
505 /**
506  *	sysfs_remove_one - remove sysfs_dirent from parent
507  *	@acxt: addrm context to use
508  *	@sd: sysfs_dirent to be added
509  *
510  *	Mark @sd removed and drop nlink of parent inode if @sd is a
511  *	directory.  @sd is NOT unlinked from the children list of the
512  *	parent.  The caller is repsonsible for removing @sd from the
513  *	children list before calling this function.
514  *
515  *	This function should be called between calls to
516  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
517  *	passed the same @acxt as passed to sysfs_addrm_start().
518  *
519  *	LOCKING:
520  *	Determined by sysfs_addrm_start().
521  */
522 void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
523 {
524 	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
525 
526 	sd->s_flags |= SYSFS_FLAG_REMOVED;
527 	sd->s_sibling = acxt->removed;
528 	acxt->removed = sd;
529 
530 	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
531 		drop_nlink(acxt->parent_inode);
532 
533 	acxt->cnt++;
534 }
535 
536 /**
537  *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
538  *	@sd: target sysfs_dirent
539  *
540  *	Drop dentry for @sd.  @sd must have been unlinked from its
541  *	parent on entry to this function such that it can't be looked
542  *	up anymore.
543  *
544  *	@sd->s_dentry which is protected with sysfs_assoc_lock points
545  *	to the currently associated dentry but we're not holding a
546  *	reference to it and racing with dput().  Grab dcache_lock and
547  *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
548  *	dput() beats us, no need to bother.
549  */
550 static void sysfs_drop_dentry(struct sysfs_dirent *sd)
551 {
552 	struct dentry *dentry = NULL;
553 	struct inode *inode;
554 
555 	/* We're not holding a reference to ->s_dentry dentry but the
556 	 * field will stay valid as long as sysfs_assoc_lock is held.
557 	 */
558 	spin_lock(&sysfs_assoc_lock);
559 	spin_lock(&dcache_lock);
560 
561 	/* drop dentry if it's there and dput() didn't kill it yet */
562 	if (sd->s_dentry && sd->s_dentry->d_inode) {
563 		dentry = dget_locked(sd->s_dentry);
564 		spin_lock(&dentry->d_lock);
565 		__d_drop(dentry);
566 		spin_unlock(&dentry->d_lock);
567 	}
568 
569 	spin_unlock(&dcache_lock);
570 	spin_unlock(&sysfs_assoc_lock);
571 
572 	/* dentries for shadowed inodes are pinned, unpin */
573 	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
574 		dput(dentry);
575 	dput(dentry);
576 
577 	/* adjust nlink and update timestamp */
578 	inode = ilookup(sysfs_sb, sd->s_ino);
579 	if (inode) {
580 		mutex_lock(&inode->i_mutex);
581 
582 		inode->i_ctime = CURRENT_TIME;
583 		drop_nlink(inode);
584 		if (sysfs_type(sd) == SYSFS_DIR)
585 			drop_nlink(inode);
586 
587 		mutex_unlock(&inode->i_mutex);
588 		iput(inode);
589 	}
590 }
591 
592 /**
593  *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
594  *	@acxt: addrm context to finish up
595  *
596  *	Finish up sysfs_dirent add/remove.  Resources acquired by
597  *	sysfs_addrm_start() are released and removed sysfs_dirents are
598  *	cleaned up.  Timestamps on the parent inode are updated.
599  *
600  *	LOCKING:
601  *	All mutexes acquired by sysfs_addrm_start() are released.
602  *
603  *	RETURNS:
604  *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
605  */
606 int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
607 {
608 	/* release resources acquired by sysfs_addrm_start() */
609 	mutex_unlock(&sysfs_mutex);
610 	if (acxt->parent_inode) {
611 		struct inode *inode = acxt->parent_inode;
612 
613 		/* if added/removed, update timestamps on the parent */
614 		if (acxt->cnt)
615 			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
616 
617 		mutex_unlock(&inode->i_mutex);
618 		iput(inode);
619 	}
620 
621 	/* kill removed sysfs_dirents */
622 	while (acxt->removed) {
623 		struct sysfs_dirent *sd = acxt->removed;
624 
625 		acxt->removed = sd->s_sibling;
626 		sd->s_sibling = NULL;
627 
628 		sysfs_drop_dentry(sd);
629 		sysfs_deactivate(sd);
630 		sysfs_put(sd);
631 	}
632 
633 	return acxt->cnt;
634 }
635 
636 /**
637  *	sysfs_find_dirent - find sysfs_dirent with the given name
638  *	@parent_sd: sysfs_dirent to search under
639  *	@name: name to look for
640  *
641  *	Look for sysfs_dirent with name @name under @parent_sd.
642  *
643  *	LOCKING:
644  *	mutex_lock(sysfs_mutex)
645  *
646  *	RETURNS:
647  *	Pointer to sysfs_dirent if found, NULL if not.
648  */
649 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
650 				       const unsigned char *name)
651 {
652 	struct sysfs_dirent *sd;
653 
654 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
655 		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
656 			return sd;
657 	return NULL;
658 }
659 
660 /**
661  *	sysfs_get_dirent - find and get sysfs_dirent with the given name
662  *	@parent_sd: sysfs_dirent to search under
663  *	@name: name to look for
664  *
665  *	Look for sysfs_dirent with name @name under @parent_sd and get
666  *	it if found.
667  *
668  *	LOCKING:
669  *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
670  *
671  *	RETURNS:
672  *	Pointer to sysfs_dirent if found, NULL if not.
673  */
674 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
675 				      const unsigned char *name)
676 {
677 	struct sysfs_dirent *sd;
678 
679 	mutex_lock(&sysfs_mutex);
680 	sd = sysfs_find_dirent(parent_sd, name);
681 	sysfs_get(sd);
682 	mutex_unlock(&sysfs_mutex);
683 
684 	return sd;
685 }
686 
687 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
688 		      const char *name, struct sysfs_dirent **p_sd)
689 {
690 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
691 	struct sysfs_addrm_cxt acxt;
692 	struct sysfs_dirent *sd;
693 
694 	/* allocate */
695 	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
696 	if (!sd)
697 		return -ENOMEM;
698 	sd->s_elem.dir.kobj = kobj;
699 
700 	/* link in */
701 	sysfs_addrm_start(&acxt, parent_sd);
702 
703 	if (!sysfs_find_dirent(parent_sd, name)) {
704 		sysfs_add_one(&acxt, sd);
705 		sysfs_link_sibling(sd);
706 	}
707 
708 	if (!sysfs_addrm_finish(&acxt)) {
709 		sysfs_put(sd);
710 		return -EEXIST;
711 	}
712 
713 	*p_sd = sd;
714 	return 0;
715 }
716 
717 int sysfs_create_subdir(struct kobject *kobj, const char *name,
718 			struct sysfs_dirent **p_sd)
719 {
720 	return create_dir(kobj, kobj->sd, name, p_sd);
721 }
722 
723 /**
724  *	sysfs_create_dir - create a directory for an object.
725  *	@kobj:		object we're creating directory for.
726  *	@shadow_parent:	parent object.
727  */
728 int sysfs_create_dir(struct kobject *kobj,
729 		     struct sysfs_dirent *shadow_parent_sd)
730 {
731 	struct sysfs_dirent *parent_sd, *sd;
732 	int error = 0;
733 
734 	BUG_ON(!kobj);
735 
736 	if (shadow_parent_sd)
737 		parent_sd = shadow_parent_sd;
738 	else if (kobj->parent)
739 		parent_sd = kobj->parent->sd;
740 	else if (sysfs_mount && sysfs_mount->mnt_sb)
741 		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
742 	else
743 		return -EFAULT;
744 
745 	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
746 	if (!error)
747 		kobj->sd = sd;
748 	return error;
749 }
750 
751 static int sysfs_count_nlink(struct sysfs_dirent *sd)
752 {
753 	struct sysfs_dirent *child;
754 	int nr = 0;
755 
756 	for (child = sd->s_children; child; child = child->s_sibling)
757 		if (sysfs_type(child) == SYSFS_DIR)
758 			nr++;
759 	return nr + 2;
760 }
761 
762 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 				struct nameidata *nd)
764 {
765 	struct dentry *ret = NULL;
766 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
767 	struct sysfs_dirent * sd;
768 	struct bin_attribute *bin_attr;
769 	struct inode *inode;
770 	int found = 0;
771 
772 	mutex_lock(&sysfs_mutex);
773 
774 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
775 		if (sysfs_type(sd) &&
776 		    !strcmp(sd->s_name, dentry->d_name.name)) {
777 			found = 1;
778 			break;
779 		}
780 	}
781 
782 	/* no such entry */
783 	if (!found)
784 		goto out_unlock;
785 
786 	/* attach dentry and inode */
787 	inode = sysfs_get_inode(sd);
788 	if (!inode) {
789 		ret = ERR_PTR(-ENOMEM);
790 		goto out_unlock;
791 	}
792 
793 	if (inode->i_state & I_NEW) {
794 		/* initialize inode according to type */
795 		switch (sysfs_type(sd)) {
796 		case SYSFS_DIR:
797 			inode->i_op = &sysfs_dir_inode_operations;
798 			inode->i_fop = &sysfs_dir_operations;
799 			inode->i_nlink = sysfs_count_nlink(sd);
800 			break;
801 		case SYSFS_KOBJ_ATTR:
802 			inode->i_size = PAGE_SIZE;
803 			inode->i_fop = &sysfs_file_operations;
804 			break;
805 		case SYSFS_KOBJ_BIN_ATTR:
806 			bin_attr = sd->s_elem.bin_attr.bin_attr;
807 			inode->i_size = bin_attr->size;
808 			inode->i_fop = &bin_fops;
809 			break;
810 		case SYSFS_KOBJ_LINK:
811 			inode->i_op = &sysfs_symlink_inode_operations;
812 			break;
813 		default:
814 			BUG();
815 		}
816 	}
817 
818 	sysfs_instantiate(dentry, inode);
819 	sysfs_attach_dentry(sd, dentry);
820 
821  out_unlock:
822 	mutex_unlock(&sysfs_mutex);
823 	return ret;
824 }
825 
826 const struct inode_operations sysfs_dir_inode_operations = {
827 	.lookup		= sysfs_lookup,
828 	.setattr	= sysfs_setattr,
829 };
830 
831 static void remove_dir(struct sysfs_dirent *sd)
832 {
833 	struct sysfs_addrm_cxt acxt;
834 
835 	sysfs_addrm_start(&acxt, sd->s_parent);
836 	sysfs_unlink_sibling(sd);
837 	sysfs_remove_one(&acxt, sd);
838 	sysfs_addrm_finish(&acxt);
839 }
840 
841 void sysfs_remove_subdir(struct sysfs_dirent *sd)
842 {
843 	remove_dir(sd);
844 }
845 
846 
847 static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
848 {
849 	struct sysfs_addrm_cxt acxt;
850 	struct sysfs_dirent **pos;
851 
852 	if (!dir_sd)
853 		return;
854 
855 	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
856 	sysfs_addrm_start(&acxt, dir_sd);
857 	pos = &dir_sd->s_children;
858 	while (*pos) {
859 		struct sysfs_dirent *sd = *pos;
860 
861 		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
862 			*pos = sd->s_sibling;
863 			sd->s_sibling = NULL;
864 			sysfs_remove_one(&acxt, sd);
865 		} else
866 			pos = &(*pos)->s_sibling;
867 	}
868 	sysfs_addrm_finish(&acxt);
869 
870 	remove_dir(dir_sd);
871 }
872 
873 /**
874  *	sysfs_remove_dir - remove an object's directory.
875  *	@kobj:	object.
876  *
877  *	The only thing special about this is that we remove any files in
878  *	the directory before we remove the directory, and we've inlined
879  *	what used to be sysfs_rmdir() below, instead of calling separately.
880  */
881 
882 void sysfs_remove_dir(struct kobject * kobj)
883 {
884 	struct sysfs_dirent *sd = kobj->sd;
885 
886 	spin_lock(&sysfs_assoc_lock);
887 	kobj->sd = NULL;
888 	spin_unlock(&sysfs_assoc_lock);
889 
890 	__sysfs_remove_dir(sd);
891 }
892 
893 int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
894 		     const char *new_name)
895 {
896 	struct sysfs_dirent *sd = kobj->sd;
897 	struct dentry *new_parent = NULL;
898 	struct dentry *old_dentry = NULL, *new_dentry = NULL;
899 	const char *dup_name = NULL;
900 	int error;
901 
902 	/* get dentries */
903 	old_dentry = sysfs_get_dentry(sd);
904 	if (IS_ERR(old_dentry)) {
905 		error = PTR_ERR(old_dentry);
906 		goto out_dput;
907 	}
908 
909 	new_parent = sysfs_get_dentry(new_parent_sd);
910 	if (IS_ERR(new_parent)) {
911 		error = PTR_ERR(new_parent);
912 		goto out_dput;
913 	}
914 
915 	/* lock new_parent and get dentry for new name */
916 	mutex_lock(&new_parent->d_inode->i_mutex);
917 
918 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
919 	if (IS_ERR(new_dentry)) {
920 		error = PTR_ERR(new_dentry);
921 		goto out_unlock;
922 	}
923 
924 	/* By allowing two different directories with the same
925 	 * d_parent we allow this routine to move between different
926 	 * shadows of the same directory
927 	 */
928 	error = -EINVAL;
929 	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
930 	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
931 	    old_dentry == new_dentry)
932 		goto out_unlock;
933 
934 	error = -EEXIST;
935 	if (new_dentry->d_inode)
936 		goto out_unlock;
937 
938 	/* rename kobject and sysfs_dirent */
939 	error = -ENOMEM;
940 	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
941 	if (!new_name)
942 		goto out_drop;
943 
944 	error = kobject_set_name(kobj, "%s", new_name);
945 	if (error)
946 		goto out_drop;
947 
948 	mutex_lock(&sysfs_mutex);
949 
950 	dup_name = sd->s_name;
951 	sd->s_name = new_name;
952 
953 	/* move under the new parent */
954 	d_add(new_dentry, NULL);
955 	d_move(sd->s_dentry, new_dentry);
956 
957 	sysfs_unlink_sibling(sd);
958 	sysfs_get(new_parent_sd);
959 	sysfs_put(sd->s_parent);
960 	sd->s_parent = new_parent_sd;
961 	sysfs_link_sibling(sd);
962 
963 	mutex_unlock(&sysfs_mutex);
964 
965 	error = 0;
966 	goto out_unlock;
967 
968  out_drop:
969 	d_drop(new_dentry);
970  out_unlock:
971 	mutex_unlock(&new_parent->d_inode->i_mutex);
972  out_dput:
973 	kfree(dup_name);
974 	dput(new_parent);
975 	dput(old_dentry);
976 	dput(new_dentry);
977 	return error;
978 }
979 
980 int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
981 {
982 	struct sysfs_dirent *sd = kobj->sd;
983 	struct sysfs_dirent *new_parent_sd;
984 	struct dentry *old_parent, *new_parent = NULL;
985 	struct dentry *old_dentry = NULL, *new_dentry = NULL;
986 	int error;
987 
988 	BUG_ON(!sd->s_parent);
989 	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
990 
991 	/* get dentries */
992 	old_dentry = sysfs_get_dentry(sd);
993 	if (IS_ERR(old_dentry)) {
994 		error = PTR_ERR(old_dentry);
995 		goto out_dput;
996 	}
997 	old_parent = sd->s_parent->s_dentry;
998 
999 	new_parent = sysfs_get_dentry(new_parent_sd);
1000 	if (IS_ERR(new_parent)) {
1001 		error = PTR_ERR(new_parent);
1002 		goto out_dput;
1003 	}
1004 
1005 	if (old_parent->d_inode == new_parent->d_inode) {
1006 		error = 0;
1007 		goto out_dput;	/* nothing to move */
1008 	}
1009 again:
1010 	mutex_lock(&old_parent->d_inode->i_mutex);
1011 	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
1012 		mutex_unlock(&old_parent->d_inode->i_mutex);
1013 		goto again;
1014 	}
1015 
1016 	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
1017 	if (IS_ERR(new_dentry)) {
1018 		error = PTR_ERR(new_dentry);
1019 		goto out_unlock;
1020 	} else
1021 		error = 0;
1022 	d_add(new_dentry, NULL);
1023 	d_move(sd->s_dentry, new_dentry);
1024 	dput(new_dentry);
1025 
1026 	/* Remove from old parent's list and insert into new parent's list. */
1027 	mutex_lock(&sysfs_mutex);
1028 
1029 	sysfs_unlink_sibling(sd);
1030 	sysfs_get(new_parent_sd);
1031 	sysfs_put(sd->s_parent);
1032 	sd->s_parent = new_parent_sd;
1033 	sysfs_link_sibling(sd);
1034 
1035 	mutex_unlock(&sysfs_mutex);
1036 
1037  out_unlock:
1038 	mutex_unlock(&new_parent->d_inode->i_mutex);
1039 	mutex_unlock(&old_parent->d_inode->i_mutex);
1040  out_dput:
1041 	dput(new_parent);
1042 	dput(old_dentry);
1043 	dput(new_dentry);
1044 	return error;
1045 }
1046 
1047 static int sysfs_dir_open(struct inode *inode, struct file *file)
1048 {
1049 	struct dentry * dentry = file->f_path.dentry;
1050 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1051 	struct sysfs_dirent * sd;
1052 
1053 	sd = sysfs_new_dirent("_DIR_", 0, 0);
1054 	if (sd) {
1055 		mutex_lock(&sysfs_mutex);
1056 		sd->s_parent = sysfs_get(parent_sd);
1057 		sysfs_link_sibling(sd);
1058 		mutex_unlock(&sysfs_mutex);
1059 	}
1060 
1061 	file->private_data = sd;
1062 	return sd ? 0 : -ENOMEM;
1063 }
1064 
1065 static int sysfs_dir_close(struct inode *inode, struct file *file)
1066 {
1067 	struct sysfs_dirent * cursor = file->private_data;
1068 
1069 	mutex_lock(&sysfs_mutex);
1070 	sysfs_unlink_sibling(cursor);
1071 	mutex_unlock(&sysfs_mutex);
1072 
1073 	release_sysfs_dirent(cursor);
1074 
1075 	return 0;
1076 }
1077 
1078 /* Relationship between s_mode and the DT_xxx types */
1079 static inline unsigned char dt_type(struct sysfs_dirent *sd)
1080 {
1081 	return (sd->s_mode >> 12) & 15;
1082 }
1083 
1084 static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1085 {
1086 	struct dentry *dentry = filp->f_path.dentry;
1087 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1088 	struct sysfs_dirent *cursor = filp->private_data;
1089 	struct sysfs_dirent **pos;
1090 	ino_t ino;
1091 	int i = filp->f_pos;
1092 
1093 	switch (i) {
1094 		case 0:
1095 			ino = parent_sd->s_ino;
1096 			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1097 				break;
1098 			filp->f_pos++;
1099 			i++;
1100 			/* fallthrough */
1101 		case 1:
1102 			if (parent_sd->s_parent)
1103 				ino = parent_sd->s_parent->s_ino;
1104 			else
1105 				ino = parent_sd->s_ino;
1106 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1107 				break;
1108 			filp->f_pos++;
1109 			i++;
1110 			/* fallthrough */
1111 		default:
1112 			mutex_lock(&sysfs_mutex);
1113 
1114 			pos = &parent_sd->s_children;
1115 			while (*pos != cursor)
1116 				pos = &(*pos)->s_sibling;
1117 
1118 			/* unlink cursor */
1119 			*pos = cursor->s_sibling;
1120 
1121 			if (filp->f_pos == 2)
1122 				pos = &parent_sd->s_children;
1123 
1124 			for ( ; *pos; pos = &(*pos)->s_sibling) {
1125 				struct sysfs_dirent *next = *pos;
1126 				const char * name;
1127 				int len;
1128 
1129 				if (!sysfs_type(next))
1130 					continue;
1131 
1132 				name = next->s_name;
1133 				len = strlen(name);
1134 				ino = next->s_ino;
1135 
1136 				if (filldir(dirent, name, len, filp->f_pos, ino,
1137 						 dt_type(next)) < 0)
1138 					break;
1139 
1140 				filp->f_pos++;
1141 			}
1142 
1143 			/* put cursor back in */
1144 			cursor->s_sibling = *pos;
1145 			*pos = cursor;
1146 
1147 			mutex_unlock(&sysfs_mutex);
1148 	}
1149 	return 0;
1150 }
1151 
1152 static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
1153 {
1154 	struct dentry * dentry = file->f_path.dentry;
1155 
1156 	switch (origin) {
1157 		case 1:
1158 			offset += file->f_pos;
1159 		case 0:
1160 			if (offset >= 0)
1161 				break;
1162 		default:
1163 			return -EINVAL;
1164 	}
1165 	if (offset != file->f_pos) {
1166 		mutex_lock(&sysfs_mutex);
1167 
1168 		file->f_pos = offset;
1169 		if (file->f_pos >= 2) {
1170 			struct sysfs_dirent *sd = dentry->d_fsdata;
1171 			struct sysfs_dirent *cursor = file->private_data;
1172 			struct sysfs_dirent **pos;
1173 			loff_t n = file->f_pos - 2;
1174 
1175 			sysfs_unlink_sibling(cursor);
1176 
1177 			pos = &sd->s_children;
1178 			while (n && *pos) {
1179 				struct sysfs_dirent *next = *pos;
1180 				if (sysfs_type(next))
1181 					n--;
1182 				pos = &(*pos)->s_sibling;
1183 			}
1184 
1185 			cursor->s_sibling = *pos;
1186 			*pos = cursor;
1187 		}
1188 
1189 		mutex_unlock(&sysfs_mutex);
1190 	}
1191 
1192 	return offset;
1193 }
1194 
1195 
1196 /**
1197  *	sysfs_make_shadowed_dir - Setup so a directory can be shadowed
1198  *	@kobj:	object we're creating shadow of.
1199  */
1200 
1201 int sysfs_make_shadowed_dir(struct kobject *kobj,
1202 	void * (*follow_link)(struct dentry *, struct nameidata *))
1203 {
1204 	struct dentry *dentry;
1205 	struct inode *inode;
1206 	struct inode_operations *i_op;
1207 
1208 	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
1209 	dentry = sysfs_get_dentry(kobj->sd);
1210 	if (IS_ERR(dentry))
1211 		return PTR_ERR(dentry);
1212 
1213 	inode = dentry->d_inode;
1214 	if (inode->i_op != &sysfs_dir_inode_operations) {
1215 		dput(dentry);
1216 		return -EINVAL;
1217 	}
1218 
1219 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
1220 	if (!i_op)
1221 		return -ENOMEM;
1222 
1223 	memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
1224 	i_op->follow_link = follow_link;
1225 
1226 	/* Locking of inode->i_op?
1227 	 * Since setting i_op is a single word write and they
1228 	 * are atomic we should be ok here.
1229 	 */
1230 	inode->i_op = i_op;
1231 	return 0;
1232 }
1233 
1234 /**
1235  *	sysfs_create_shadow_dir - create a shadow directory for an object.
1236  *	@kobj:	object we're creating directory for.
1237  *
1238  *	sysfs_make_shadowed_dir must already have been called on this
1239  *	directory.
1240  */
1241 
1242 struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
1243 {
1244 	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
1245 	struct dentry *dir, *parent, *shadow;
1246 	struct inode *inode;
1247 	struct sysfs_dirent *sd;
1248 	struct sysfs_addrm_cxt acxt;
1249 
1250 	dir = sysfs_get_dentry(kobj->sd);
1251 	if (IS_ERR(dir)) {
1252 		sd = (void *)dir;
1253 		goto out;
1254 	}
1255 	parent = dir->d_parent;
1256 
1257 	inode = dir->d_inode;
1258 	sd = ERR_PTR(-EINVAL);
1259 	if (!sysfs_is_shadowed_inode(inode))
1260 		goto out_dput;
1261 
1262 	shadow = d_alloc(parent, &dir->d_name);
1263 	if (!shadow)
1264 		goto nomem;
1265 
1266 	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
1267 	if (!sd)
1268 		goto nomem;
1269 	sd->s_elem.dir.kobj = kobj;
1270 
1271 	sysfs_addrm_start(&acxt, parent_sd);
1272 
1273 	/* add but don't link into children list */
1274 	sysfs_add_one(&acxt, sd);
1275 
1276 	/* attach and instantiate dentry */
1277 	sysfs_attach_dentry(sd, shadow);
1278 	d_instantiate(shadow, igrab(inode));
1279 	inc_nlink(inode);	/* tj: synchronization? */
1280 
1281 	sysfs_addrm_finish(&acxt);
1282 
1283 	dget(shadow);		/* Extra count - pin the dentry in core */
1284 
1285 	goto out_dput;
1286 
1287  nomem:
1288 	dput(shadow);
1289 	sd = ERR_PTR(-ENOMEM);
1290  out_dput:
1291 	dput(dir);
1292  out:
1293 	return sd;
1294 }
1295 
1296 /**
1297  *	sysfs_remove_shadow_dir - remove an object's directory.
1298  *	@shadow_sd: sysfs_dirent of shadow directory
1299  *
1300  *	The only thing special about this is that we remove any files in
1301  *	the directory before we remove the directory, and we've inlined
1302  *	what used to be sysfs_rmdir() below, instead of calling separately.
1303  */
1304 
1305 void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
1306 {
1307 	__sysfs_remove_dir(shadow_sd);
1308 }
1309 
1310 const struct file_operations sysfs_dir_operations = {
1311 	.open		= sysfs_dir_open,
1312 	.release	= sysfs_dir_close,
1313 	.llseek		= sysfs_dir_lseek,
1314 	.read		= generic_read_dir,
1315 	.readdir	= sysfs_readdir,
1316 };
1317