xref: /linux/fs/sysfs/dir.c (revision 8b4a40809e5330c9da5d20107d693d92d73b31dc)
1 /*
2  * dir.c - Operations for sysfs directories.
3  */
4 
5 #undef DEBUG
6 
7 #include <linux/fs.h>
8 #include <linux/mount.h>
9 #include <linux/module.h>
10 #include <linux/kobject.h>
11 #include <linux/namei.h>
12 #include <linux/idr.h>
13 #include <linux/completion.h>
14 #include <asm/semaphore.h>
15 #include "sysfs.h"
16 
17 DEFINE_MUTEX(sysfs_mutex);
18 spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19 
20 static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21 static DEFINE_IDA(sysfs_ino_ida);
22 
23 /**
24  *	sysfs_link_sibling - link sysfs_dirent into sibling list
25  *	@sd: sysfs_dirent of interest
26  *
27  *	Link @sd into its sibling list which starts from
28  *	sd->s_parent->s_children.
29  *
30  *	Locking:
31  *	mutex_lock(sysfs_mutex)
32  */
33 void sysfs_link_sibling(struct sysfs_dirent *sd)
34 {
35 	struct sysfs_dirent *parent_sd = sd->s_parent;
36 
37 	BUG_ON(sd->s_sibling);
38 	sd->s_sibling = parent_sd->s_children;
39 	parent_sd->s_children = sd;
40 }
41 
42 /**
43  *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
44  *	@sd: sysfs_dirent of interest
45  *
46  *	Unlink @sd from its sibling list which starts from
47  *	sd->s_parent->s_children.
48  *
49  *	Locking:
50  *	mutex_lock(sysfs_mutex)
51  */
52 void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53 {
54 	struct sysfs_dirent **pos;
55 
56 	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
57 		if (*pos == sd) {
58 			*pos = sd->s_sibling;
59 			sd->s_sibling = NULL;
60 			break;
61 		}
62 	}
63 }
64 
65 /**
66  *	sysfs_get_dentry - get dentry for the given sysfs_dirent
67  *	@sd: sysfs_dirent of interest
68  *
69  *	Get dentry for @sd.  Dentry is looked up if currently not
70  *	present.  This function climbs sysfs_dirent tree till it
71  *	reaches a sysfs_dirent with valid dentry attached and descends
72  *	down from there looking up dentry for each step.
73  *
74  *	LOCKING:
75  *	Kernel thread context (may sleep)
76  *
77  *	RETURNS:
78  *	Pointer to found dentry on success, ERR_PTR() value on error.
79  */
80 struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81 {
82 	struct sysfs_dirent *cur;
83 	struct dentry *parent_dentry, *dentry;
84 	int i, depth;
85 
86 	/* Find the first parent which has valid s_dentry and get the
87 	 * dentry.
88 	 */
89 	mutex_lock(&sysfs_mutex);
90  restart0:
91 	spin_lock(&sysfs_assoc_lock);
92  restart1:
93 	spin_lock(&dcache_lock);
94 
95 	dentry = NULL;
96 	depth = 0;
97 	cur = sd;
98 	while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 		if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 			dentry = ERR_PTR(-ENOENT);
101 			depth = 0;
102 			break;
103 		}
104 		cur = cur->s_parent;
105 		depth++;
106 	}
107 	if (!IS_ERR(dentry))
108 		dentry = dget_locked(cur->s_dentry);
109 
110 	spin_unlock(&dcache_lock);
111 	spin_unlock(&sysfs_assoc_lock);
112 
113 	/* from the found dentry, look up depth times */
114 	while (depth--) {
115 		/* find and get depth'th ancestor */
116 		for (cur = sd, i = 0; cur && i < depth; i++)
117 			cur = cur->s_parent;
118 
119 		/* This can happen if tree structure was modified due
120 		 * to move/rename.  Restart.
121 		 */
122 		if (i != depth) {
123 			dput(dentry);
124 			goto restart0;
125 		}
126 
127 		sysfs_get(cur);
128 
129 		mutex_unlock(&sysfs_mutex);
130 
131 		/* look it up */
132 		parent_dentry = dentry;
133 		dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
134 					     strlen(cur->s_name));
135 		dput(parent_dentry);
136 
137 		if (IS_ERR(dentry)) {
138 			sysfs_put(cur);
139 			return dentry;
140 		}
141 
142 		mutex_lock(&sysfs_mutex);
143 		spin_lock(&sysfs_assoc_lock);
144 
145 		/* This, again, can happen if tree structure has
146 		 * changed and we looked up the wrong thing.  Restart.
147 		 */
148 		if (cur->s_dentry != dentry) {
149 			dput(dentry);
150 			sysfs_put(cur);
151 			goto restart1;
152 		}
153 
154 		spin_unlock(&sysfs_assoc_lock);
155 
156 		sysfs_put(cur);
157 	}
158 
159 	mutex_unlock(&sysfs_mutex);
160 	return dentry;
161 }
162 
163 /**
164  *	sysfs_get_active - get an active reference to sysfs_dirent
165  *	@sd: sysfs_dirent to get an active reference to
166  *
167  *	Get an active reference of @sd.  This function is noop if @sd
168  *	is NULL.
169  *
170  *	RETURNS:
171  *	Pointer to @sd on success, NULL on failure.
172  */
173 struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
174 {
175 	if (unlikely(!sd))
176 		return NULL;
177 
178 	while (1) {
179 		int v, t;
180 
181 		v = atomic_read(&sd->s_active);
182 		if (unlikely(v < 0))
183 			return NULL;
184 
185 		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
186 		if (likely(t == v))
187 			return sd;
188 		if (t < 0)
189 			return NULL;
190 
191 		cpu_relax();
192 	}
193 }
194 
195 /**
196  *	sysfs_put_active - put an active reference to sysfs_dirent
197  *	@sd: sysfs_dirent to put an active reference to
198  *
199  *	Put an active reference to @sd.  This function is noop if @sd
200  *	is NULL.
201  */
202 void sysfs_put_active(struct sysfs_dirent *sd)
203 {
204 	struct completion *cmpl;
205 	int v;
206 
207 	if (unlikely(!sd))
208 		return;
209 
210 	v = atomic_dec_return(&sd->s_active);
211 	if (likely(v != SD_DEACTIVATED_BIAS))
212 		return;
213 
214 	/* atomic_dec_return() is a mb(), we'll always see the updated
215 	 * sd->s_sibling.
216 	 */
217 	cmpl = (void *)sd->s_sibling;
218 	complete(cmpl);
219 }
220 
221 /**
222  *	sysfs_get_active_two - get active references to sysfs_dirent and parent
223  *	@sd: sysfs_dirent of interest
224  *
225  *	Get active reference to @sd and its parent.  Parent's active
226  *	reference is grabbed first.  This function is noop if @sd is
227  *	NULL.
228  *
229  *	RETURNS:
230  *	Pointer to @sd on success, NULL on failure.
231  */
232 struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
233 {
234 	if (sd) {
235 		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
236 			return NULL;
237 		if (unlikely(!sysfs_get_active(sd))) {
238 			sysfs_put_active(sd->s_parent);
239 			return NULL;
240 		}
241 	}
242 	return sd;
243 }
244 
245 /**
246  *	sysfs_put_active_two - put active references to sysfs_dirent and parent
247  *	@sd: sysfs_dirent of interest
248  *
249  *	Put active references to @sd and its parent.  This function is
250  *	noop if @sd is NULL.
251  */
252 void sysfs_put_active_two(struct sysfs_dirent *sd)
253 {
254 	if (sd) {
255 		sysfs_put_active(sd);
256 		sysfs_put_active(sd->s_parent);
257 	}
258 }
259 
260 /**
261  *	sysfs_deactivate - deactivate sysfs_dirent
262  *	@sd: sysfs_dirent to deactivate
263  *
264  *	Deny new active references and drain existing ones.
265  */
266 static void sysfs_deactivate(struct sysfs_dirent *sd)
267 {
268 	DECLARE_COMPLETION_ONSTACK(wait);
269 	int v;
270 
271 	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
272 	sd->s_sibling = (void *)&wait;
273 
274 	/* atomic_add_return() is a mb(), put_active() will always see
275 	 * the updated sd->s_sibling.
276 	 */
277 	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
278 
279 	if (v != SD_DEACTIVATED_BIAS)
280 		wait_for_completion(&wait);
281 
282 	sd->s_sibling = NULL;
283 }
284 
285 static int sysfs_alloc_ino(ino_t *pino)
286 {
287 	int ino, rc;
288 
289  retry:
290 	spin_lock(&sysfs_ino_lock);
291 	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
292 	spin_unlock(&sysfs_ino_lock);
293 
294 	if (rc == -EAGAIN) {
295 		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
296 			goto retry;
297 		rc = -ENOMEM;
298 	}
299 
300 	*pino = ino;
301 	return rc;
302 }
303 
304 static void sysfs_free_ino(ino_t ino)
305 {
306 	spin_lock(&sysfs_ino_lock);
307 	ida_remove(&sysfs_ino_ida, ino);
308 	spin_unlock(&sysfs_ino_lock);
309 }
310 
311 void release_sysfs_dirent(struct sysfs_dirent * sd)
312 {
313 	struct sysfs_dirent *parent_sd;
314 
315  repeat:
316 	/* Moving/renaming is always done while holding reference.
317 	 * sd->s_parent won't change beneath us.
318 	 */
319 	parent_sd = sd->s_parent;
320 
321 	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 		sysfs_put(sd->s_elem.symlink.target_sd);
323 	if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 		kfree(sd->s_name);
325 	kfree(sd->s_iattr);
326 	sysfs_free_ino(sd->s_ino);
327 	kmem_cache_free(sysfs_dir_cachep, sd);
328 
329 	sd = parent_sd;
330 	if (sd && atomic_dec_and_test(&sd->s_count))
331 		goto repeat;
332 }
333 
334 static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
335 {
336 	struct sysfs_dirent * sd = dentry->d_fsdata;
337 
338 	if (sd) {
339 		/* sd->s_dentry is protected with sysfs_assoc_lock.
340 		 * This allows sysfs_drop_dentry() to dereference it.
341 		 */
342 		spin_lock(&sysfs_assoc_lock);
343 
344 		/* The dentry might have been deleted or another
345 		 * lookup could have happened updating sd->s_dentry to
346 		 * point the new dentry.  Ignore if it isn't pointing
347 		 * to this dentry.
348 		 */
349 		if (sd->s_dentry == dentry)
350 			sd->s_dentry = NULL;
351 		spin_unlock(&sysfs_assoc_lock);
352 		sysfs_put(sd);
353 	}
354 	iput(inode);
355 }
356 
357 static struct dentry_operations sysfs_dentry_ops = {
358 	.d_iput		= sysfs_d_iput,
359 };
360 
361 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
362 {
363 	char *dup_name = NULL;
364 	struct sysfs_dirent *sd = NULL;
365 
366 	if (type & SYSFS_COPY_NAME) {
367 		name = dup_name = kstrdup(name, GFP_KERNEL);
368 		if (!name)
369 			goto err_out;
370 	}
371 
372 	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
373 	if (!sd)
374 		goto err_out;
375 
376 	if (sysfs_alloc_ino(&sd->s_ino))
377 		goto err_out;
378 
379 	atomic_set(&sd->s_count, 1);
380 	atomic_set(&sd->s_active, 0);
381 	atomic_set(&sd->s_event, 1);
382 
383 	sd->s_name = name;
384 	sd->s_mode = mode;
385 	sd->s_flags = type;
386 
387 	return sd;
388 
389  err_out:
390 	kfree(dup_name);
391 	kmem_cache_free(sysfs_dir_cachep, sd);
392 	return NULL;
393 }
394 
395 /**
396  *	sysfs_attach_dentry - associate sysfs_dirent with dentry
397  *	@sd: target sysfs_dirent
398  *	@dentry: dentry to associate
399  *
400  *	Associate @sd with @dentry.  This is protected by
401  *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
402  *
403  *	LOCKING:
404  *	mutex_lock(sysfs_mutex)
405  */
406 static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
407 {
408 	dentry->d_op = &sysfs_dentry_ops;
409 	dentry->d_fsdata = sysfs_get(sd);
410 
411 	/* protect sd->s_dentry against sysfs_d_iput */
412 	spin_lock(&sysfs_assoc_lock);
413 	sd->s_dentry = dentry;
414 	spin_unlock(&sysfs_assoc_lock);
415 
416 	d_rehash(dentry);
417 }
418 
419 static int sysfs_ilookup_test(struct inode *inode, void *arg)
420 {
421 	struct sysfs_dirent *sd = arg;
422 	return inode->i_ino == sd->s_ino;
423 }
424 
425 /**
426  *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
427  *	@acxt: pointer to sysfs_addrm_cxt to be used
428  *	@parent_sd: parent sysfs_dirent
429  *
430  *	This function is called when the caller is about to add or
431  *	remove sysfs_dirent under @parent_sd.  This function acquires
432  *	sysfs_mutex, grabs inode for @parent_sd if available and lock
433  *	i_mutex of it.  @acxt is used to keep and pass context to
434  *	other addrm functions.
435  *
436  *	LOCKING:
437  *	Kernel thread context (may sleep).  sysfs_mutex is locked on
438  *	return.  i_mutex of parent inode is locked on return if
439  *	available.
440  */
441 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
442 		       struct sysfs_dirent *parent_sd)
443 {
444 	struct inode *inode;
445 
446 	memset(acxt, 0, sizeof(*acxt));
447 	acxt->parent_sd = parent_sd;
448 
449 	/* Lookup parent inode.  inode initialization and I_NEW
450 	 * clearing are protected by sysfs_mutex.  By grabbing it and
451 	 * looking up with _nowait variant, inode state can be
452 	 * determined reliably.
453 	 */
454 	mutex_lock(&sysfs_mutex);
455 
456 	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
457 				parent_sd);
458 
459 	if (inode && !(inode->i_state & I_NEW)) {
460 		/* parent inode available */
461 		acxt->parent_inode = inode;
462 
463 		/* sysfs_mutex is below i_mutex in lock hierarchy.
464 		 * First, trylock i_mutex.  If fails, unlock
465 		 * sysfs_mutex and lock them in order.
466 		 */
467 		if (!mutex_trylock(&inode->i_mutex)) {
468 			mutex_unlock(&sysfs_mutex);
469 			mutex_lock(&inode->i_mutex);
470 			mutex_lock(&sysfs_mutex);
471 		}
472 	} else
473 		iput(inode);
474 }
475 
476 /**
477  *	sysfs_add_one - add sysfs_dirent to parent
478  *	@acxt: addrm context to use
479  *	@sd: sysfs_dirent to be added
480  *
481  *	Get @acxt->parent_sd and set sd->s_parent to it and increment
482  *	nlink of parent inode if @sd is a directory.  @sd is NOT
483  *	linked into the children list of the parent.  The caller
484  *	should invoke sysfs_link_sibling() after this function
485  *	completes if @sd needs to be on the children list.
486  *
487  *	This function should be called between calls to
488  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
489  *	passed the same @acxt as passed to sysfs_addrm_start().
490  *
491  *	LOCKING:
492  *	Determined by sysfs_addrm_start().
493  */
494 void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
495 {
496 	sd->s_parent = sysfs_get(acxt->parent_sd);
497 
498 	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
499 		inc_nlink(acxt->parent_inode);
500 
501 	acxt->cnt++;
502 }
503 
504 /**
505  *	sysfs_remove_one - remove sysfs_dirent from parent
506  *	@acxt: addrm context to use
507  *	@sd: sysfs_dirent to be added
508  *
509  *	Mark @sd removed and drop nlink of parent inode if @sd is a
510  *	directory.  @sd is NOT unlinked from the children list of the
511  *	parent.  The caller is repsonsible for removing @sd from the
512  *	children list before calling this function.
513  *
514  *	This function should be called between calls to
515  *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
516  *	passed the same @acxt as passed to sysfs_addrm_start().
517  *
518  *	LOCKING:
519  *	Determined by sysfs_addrm_start().
520  */
521 void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
522 {
523 	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
524 
525 	sd->s_flags |= SYSFS_FLAG_REMOVED;
526 	sd->s_sibling = acxt->removed;
527 	acxt->removed = sd;
528 
529 	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
530 		drop_nlink(acxt->parent_inode);
531 
532 	acxt->cnt++;
533 }
534 
535 /**
536  *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
537  *	@sd: target sysfs_dirent
538  *
539  *	Drop dentry for @sd.  @sd must have been unlinked from its
540  *	parent on entry to this function such that it can't be looked
541  *	up anymore.
542  *
543  *	@sd->s_dentry which is protected with sysfs_assoc_lock points
544  *	to the currently associated dentry but we're not holding a
545  *	reference to it and racing with dput().  Grab dcache_lock and
546  *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
547  *	dput() beats us, no need to bother.
548  */
549 static void sysfs_drop_dentry(struct sysfs_dirent *sd)
550 {
551 	struct dentry *dentry = NULL;
552 	struct inode *inode;
553 
554 	/* We're not holding a reference to ->s_dentry dentry but the
555 	 * field will stay valid as long as sysfs_assoc_lock is held.
556 	 */
557 	spin_lock(&sysfs_assoc_lock);
558 	spin_lock(&dcache_lock);
559 
560 	/* drop dentry if it's there and dput() didn't kill it yet */
561 	if (sd->s_dentry && sd->s_dentry->d_inode) {
562 		dentry = dget_locked(sd->s_dentry);
563 		spin_lock(&dentry->d_lock);
564 		__d_drop(dentry);
565 		spin_unlock(&dentry->d_lock);
566 	}
567 
568 	spin_unlock(&dcache_lock);
569 	spin_unlock(&sysfs_assoc_lock);
570 
571 	/* dentries for shadowed inodes are pinned, unpin */
572 	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
573 		dput(dentry);
574 	dput(dentry);
575 
576 	/* adjust nlink and update timestamp */
577 	inode = ilookup(sysfs_sb, sd->s_ino);
578 	if (inode) {
579 		mutex_lock(&inode->i_mutex);
580 
581 		inode->i_ctime = CURRENT_TIME;
582 		drop_nlink(inode);
583 		if (sysfs_type(sd) == SYSFS_DIR)
584 			drop_nlink(inode);
585 
586 		mutex_unlock(&inode->i_mutex);
587 		iput(inode);
588 	}
589 }
590 
591 /**
592  *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
593  *	@acxt: addrm context to finish up
594  *
595  *	Finish up sysfs_dirent add/remove.  Resources acquired by
596  *	sysfs_addrm_start() are released and removed sysfs_dirents are
597  *	cleaned up.  Timestamps on the parent inode are updated.
598  *
599  *	LOCKING:
600  *	All mutexes acquired by sysfs_addrm_start() are released.
601  *
602  *	RETURNS:
603  *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
604  */
605 int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
606 {
607 	/* release resources acquired by sysfs_addrm_start() */
608 	mutex_unlock(&sysfs_mutex);
609 	if (acxt->parent_inode) {
610 		struct inode *inode = acxt->parent_inode;
611 
612 		/* if added/removed, update timestamps on the parent */
613 		if (acxt->cnt)
614 			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
615 
616 		mutex_unlock(&inode->i_mutex);
617 		iput(inode);
618 	}
619 
620 	/* kill removed sysfs_dirents */
621 	while (acxt->removed) {
622 		struct sysfs_dirent *sd = acxt->removed;
623 
624 		acxt->removed = sd->s_sibling;
625 		sd->s_sibling = NULL;
626 
627 		sysfs_drop_dentry(sd);
628 		sysfs_deactivate(sd);
629 		sysfs_put(sd);
630 	}
631 
632 	return acxt->cnt;
633 }
634 
635 /**
636  *	sysfs_find_dirent - find sysfs_dirent with the given name
637  *	@parent_sd: sysfs_dirent to search under
638  *	@name: name to look for
639  *
640  *	Look for sysfs_dirent with name @name under @parent_sd.
641  *
642  *	LOCKING:
643  *	mutex_lock(sysfs_mutex)
644  *
645  *	RETURNS:
646  *	Pointer to sysfs_dirent if found, NULL if not.
647  */
648 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
649 				       const unsigned char *name)
650 {
651 	struct sysfs_dirent *sd;
652 
653 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
654 		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
655 			return sd;
656 	return NULL;
657 }
658 
659 /**
660  *	sysfs_get_dirent - find and get sysfs_dirent with the given name
661  *	@parent_sd: sysfs_dirent to search under
662  *	@name: name to look for
663  *
664  *	Look for sysfs_dirent with name @name under @parent_sd and get
665  *	it if found.
666  *
667  *	LOCKING:
668  *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
669  *
670  *	RETURNS:
671  *	Pointer to sysfs_dirent if found, NULL if not.
672  */
673 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
674 				      const unsigned char *name)
675 {
676 	struct sysfs_dirent *sd;
677 
678 	mutex_lock(&sysfs_mutex);
679 	sd = sysfs_find_dirent(parent_sd, name);
680 	sysfs_get(sd);
681 	mutex_unlock(&sysfs_mutex);
682 
683 	return sd;
684 }
685 
686 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
687 		      const char *name, struct sysfs_dirent **p_sd)
688 {
689 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
690 	struct sysfs_addrm_cxt acxt;
691 	struct sysfs_dirent *sd;
692 
693 	/* allocate */
694 	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
695 	if (!sd)
696 		return -ENOMEM;
697 	sd->s_elem.dir.kobj = kobj;
698 
699 	/* link in */
700 	sysfs_addrm_start(&acxt, parent_sd);
701 	if (!sysfs_find_dirent(parent_sd, name)) {
702 		sysfs_add_one(&acxt, sd);
703 		sysfs_link_sibling(sd);
704 	}
705 	if (sysfs_addrm_finish(&acxt)) {
706 		*p_sd = sd;
707 		return 0;
708 	}
709 
710 	sysfs_put(sd);
711 	return -EEXIST;
712 }
713 
714 int sysfs_create_subdir(struct kobject *kobj, const char *name,
715 			struct sysfs_dirent **p_sd)
716 {
717 	return create_dir(kobj, kobj->sd, name, p_sd);
718 }
719 
720 /**
721  *	sysfs_create_dir - create a directory for an object.
722  *	@kobj:		object we're creating directory for.
723  *	@shadow_parent:	parent object.
724  */
725 int sysfs_create_dir(struct kobject *kobj,
726 		     struct sysfs_dirent *shadow_parent_sd)
727 {
728 	struct sysfs_dirent *parent_sd, *sd;
729 	int error = 0;
730 
731 	BUG_ON(!kobj);
732 
733 	if (shadow_parent_sd)
734 		parent_sd = shadow_parent_sd;
735 	else if (kobj->parent)
736 		parent_sd = kobj->parent->sd;
737 	else if (sysfs_mount && sysfs_mount->mnt_sb)
738 		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
739 	else
740 		return -EFAULT;
741 
742 	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
743 	if (!error)
744 		kobj->sd = sd;
745 	return error;
746 }
747 
748 static int sysfs_count_nlink(struct sysfs_dirent *sd)
749 {
750 	struct sysfs_dirent *child;
751 	int nr = 0;
752 
753 	for (child = sd->s_children; child; child = child->s_sibling)
754 		if (sysfs_type(child) == SYSFS_DIR)
755 			nr++;
756 	return nr + 2;
757 }
758 
759 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
760 				struct nameidata *nd)
761 {
762 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
763 	struct sysfs_dirent * sd;
764 	struct bin_attribute *bin_attr;
765 	struct inode *inode;
766 	int found = 0;
767 
768 	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
769 		if (sysfs_type(sd) &&
770 		    !strcmp(sd->s_name, dentry->d_name.name)) {
771 			found = 1;
772 			break;
773 		}
774 	}
775 
776 	/* no such entry */
777 	if (!found)
778 		return NULL;
779 
780 	/* attach dentry and inode */
781 	inode = sysfs_get_inode(sd);
782 	if (!inode)
783 		return ERR_PTR(-ENOMEM);
784 
785 	mutex_lock(&sysfs_mutex);
786 
787 	if (inode->i_state & I_NEW) {
788 		/* initialize inode according to type */
789 		switch (sysfs_type(sd)) {
790 		case SYSFS_DIR:
791 			inode->i_op = &sysfs_dir_inode_operations;
792 			inode->i_fop = &sysfs_dir_operations;
793 			inode->i_nlink = sysfs_count_nlink(sd);
794 			break;
795 		case SYSFS_KOBJ_ATTR:
796 			inode->i_size = PAGE_SIZE;
797 			inode->i_fop = &sysfs_file_operations;
798 			break;
799 		case SYSFS_KOBJ_BIN_ATTR:
800 			bin_attr = sd->s_elem.bin_attr.bin_attr;
801 			inode->i_size = bin_attr->size;
802 			inode->i_fop = &bin_fops;
803 			break;
804 		case SYSFS_KOBJ_LINK:
805 			inode->i_op = &sysfs_symlink_inode_operations;
806 			break;
807 		default:
808 			BUG();
809 		}
810 	}
811 
812 	sysfs_instantiate(dentry, inode);
813 	sysfs_attach_dentry(sd, dentry);
814 
815 	mutex_unlock(&sysfs_mutex);
816 
817 	return NULL;
818 }
819 
820 const struct inode_operations sysfs_dir_inode_operations = {
821 	.lookup		= sysfs_lookup,
822 	.setattr	= sysfs_setattr,
823 };
824 
825 static void remove_dir(struct sysfs_dirent *sd)
826 {
827 	struct sysfs_addrm_cxt acxt;
828 
829 	sysfs_addrm_start(&acxt, sd->s_parent);
830 	sysfs_unlink_sibling(sd);
831 	sysfs_remove_one(&acxt, sd);
832 	sysfs_addrm_finish(&acxt);
833 }
834 
835 void sysfs_remove_subdir(struct sysfs_dirent *sd)
836 {
837 	remove_dir(sd);
838 }
839 
840 
841 static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
842 {
843 	struct sysfs_addrm_cxt acxt;
844 	struct sysfs_dirent **pos;
845 
846 	if (!dir_sd)
847 		return;
848 
849 	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
850 	sysfs_addrm_start(&acxt, dir_sd);
851 	pos = &dir_sd->s_children;
852 	while (*pos) {
853 		struct sysfs_dirent *sd = *pos;
854 
855 		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
856 			*pos = sd->s_sibling;
857 			sd->s_sibling = NULL;
858 			sysfs_remove_one(&acxt, sd);
859 		} else
860 			pos = &(*pos)->s_sibling;
861 	}
862 	sysfs_addrm_finish(&acxt);
863 
864 	remove_dir(dir_sd);
865 }
866 
867 /**
868  *	sysfs_remove_dir - remove an object's directory.
869  *	@kobj:	object.
870  *
871  *	The only thing special about this is that we remove any files in
872  *	the directory before we remove the directory, and we've inlined
873  *	what used to be sysfs_rmdir() below, instead of calling separately.
874  */
875 
876 void sysfs_remove_dir(struct kobject * kobj)
877 {
878 	struct sysfs_dirent *sd = kobj->sd;
879 
880 	spin_lock(&sysfs_assoc_lock);
881 	kobj->sd = NULL;
882 	spin_unlock(&sysfs_assoc_lock);
883 
884 	__sysfs_remove_dir(sd);
885 }
886 
887 int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
888 		     const char *new_name)
889 {
890 	struct sysfs_dirent *sd = kobj->sd;
891 	struct dentry *new_parent = NULL;
892 	struct dentry *old_dentry = NULL, *new_dentry = NULL;
893 	const char *dup_name = NULL;
894 	int error;
895 
896 	/* get dentries */
897 	old_dentry = sysfs_get_dentry(sd);
898 	if (IS_ERR(old_dentry)) {
899 		error = PTR_ERR(old_dentry);
900 		goto out_dput;
901 	}
902 
903 	new_parent = sysfs_get_dentry(new_parent_sd);
904 	if (IS_ERR(new_parent)) {
905 		error = PTR_ERR(new_parent);
906 		goto out_dput;
907 	}
908 
909 	/* lock new_parent and get dentry for new name */
910 	mutex_lock(&new_parent->d_inode->i_mutex);
911 
912 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
913 	if (IS_ERR(new_dentry)) {
914 		error = PTR_ERR(new_dentry);
915 		goto out_unlock;
916 	}
917 
918 	/* By allowing two different directories with the same
919 	 * d_parent we allow this routine to move between different
920 	 * shadows of the same directory
921 	 */
922 	error = -EINVAL;
923 	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
924 	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
925 	    old_dentry == new_dentry)
926 		goto out_unlock;
927 
928 	error = -EEXIST;
929 	if (new_dentry->d_inode)
930 		goto out_unlock;
931 
932 	/* rename kobject and sysfs_dirent */
933 	error = -ENOMEM;
934 	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
935 	if (!new_name)
936 		goto out_drop;
937 
938 	error = kobject_set_name(kobj, "%s", new_name);
939 	if (error)
940 		goto out_drop;
941 
942 	dup_name = sd->s_name;
943 	sd->s_name = new_name;
944 
945 	/* move under the new parent */
946 	d_add(new_dentry, NULL);
947 	d_move(sd->s_dentry, new_dentry);
948 
949 	mutex_lock(&sysfs_mutex);
950 
951 	sysfs_unlink_sibling(sd);
952 	sysfs_get(new_parent_sd);
953 	sysfs_put(sd->s_parent);
954 	sd->s_parent = new_parent_sd;
955 	sysfs_link_sibling(sd);
956 
957 	mutex_unlock(&sysfs_mutex);
958 
959 	error = 0;
960 	goto out_unlock;
961 
962  out_drop:
963 	d_drop(new_dentry);
964  out_unlock:
965 	mutex_unlock(&new_parent->d_inode->i_mutex);
966  out_dput:
967 	kfree(dup_name);
968 	dput(new_parent);
969 	dput(old_dentry);
970 	dput(new_dentry);
971 	return error;
972 }
973 
974 int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
975 {
976 	struct sysfs_dirent *sd = kobj->sd;
977 	struct sysfs_dirent *new_parent_sd;
978 	struct dentry *old_parent, *new_parent = NULL;
979 	struct dentry *old_dentry = NULL, *new_dentry = NULL;
980 	int error;
981 
982 	BUG_ON(!sd->s_parent);
983 	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
984 
985 	/* get dentries */
986 	old_dentry = sysfs_get_dentry(sd);
987 	if (IS_ERR(old_dentry)) {
988 		error = PTR_ERR(old_dentry);
989 		goto out_dput;
990 	}
991 	old_parent = sd->s_parent->s_dentry;
992 
993 	new_parent = sysfs_get_dentry(new_parent_sd);
994 	if (IS_ERR(new_parent)) {
995 		error = PTR_ERR(new_parent);
996 		goto out_dput;
997 	}
998 
999 	if (old_parent->d_inode == new_parent->d_inode) {
1000 		error = 0;
1001 		goto out_dput;	/* nothing to move */
1002 	}
1003 again:
1004 	mutex_lock(&old_parent->d_inode->i_mutex);
1005 	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
1006 		mutex_unlock(&old_parent->d_inode->i_mutex);
1007 		goto again;
1008 	}
1009 
1010 	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
1011 	if (IS_ERR(new_dentry)) {
1012 		error = PTR_ERR(new_dentry);
1013 		goto out_unlock;
1014 	} else
1015 		error = 0;
1016 	d_add(new_dentry, NULL);
1017 	d_move(sd->s_dentry, new_dentry);
1018 	dput(new_dentry);
1019 
1020 	/* Remove from old parent's list and insert into new parent's list. */
1021 	mutex_lock(&sysfs_mutex);
1022 
1023 	sysfs_unlink_sibling(sd);
1024 	sysfs_get(new_parent_sd);
1025 	sysfs_put(sd->s_parent);
1026 	sd->s_parent = new_parent_sd;
1027 	sysfs_link_sibling(sd);
1028 
1029 	mutex_unlock(&sysfs_mutex);
1030 
1031  out_unlock:
1032 	mutex_unlock(&new_parent->d_inode->i_mutex);
1033 	mutex_unlock(&old_parent->d_inode->i_mutex);
1034  out_dput:
1035 	dput(new_parent);
1036 	dput(old_dentry);
1037 	dput(new_dentry);
1038 	return error;
1039 }
1040 
1041 static int sysfs_dir_open(struct inode *inode, struct file *file)
1042 {
1043 	struct dentry * dentry = file->f_path.dentry;
1044 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1045 	struct sysfs_dirent * sd;
1046 
1047 	sd = sysfs_new_dirent("_DIR_", 0, 0);
1048 	if (sd) {
1049 		mutex_lock(&sysfs_mutex);
1050 		sd->s_parent = sysfs_get(parent_sd);
1051 		sysfs_link_sibling(sd);
1052 		mutex_unlock(&sysfs_mutex);
1053 	}
1054 
1055 	file->private_data = sd;
1056 	return sd ? 0 : -ENOMEM;
1057 }
1058 
1059 static int sysfs_dir_close(struct inode *inode, struct file *file)
1060 {
1061 	struct sysfs_dirent * cursor = file->private_data;
1062 
1063 	mutex_lock(&sysfs_mutex);
1064 	sysfs_unlink_sibling(cursor);
1065 	mutex_unlock(&sysfs_mutex);
1066 
1067 	release_sysfs_dirent(cursor);
1068 
1069 	return 0;
1070 }
1071 
1072 /* Relationship between s_mode and the DT_xxx types */
1073 static inline unsigned char dt_type(struct sysfs_dirent *sd)
1074 {
1075 	return (sd->s_mode >> 12) & 15;
1076 }
1077 
1078 static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1079 {
1080 	struct dentry *dentry = filp->f_path.dentry;
1081 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1082 	struct sysfs_dirent *cursor = filp->private_data;
1083 	struct sysfs_dirent **pos;
1084 	ino_t ino;
1085 	int i = filp->f_pos;
1086 
1087 	switch (i) {
1088 		case 0:
1089 			ino = parent_sd->s_ino;
1090 			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1091 				break;
1092 			filp->f_pos++;
1093 			i++;
1094 			/* fallthrough */
1095 		case 1:
1096 			if (parent_sd->s_parent)
1097 				ino = parent_sd->s_parent->s_ino;
1098 			else
1099 				ino = parent_sd->s_ino;
1100 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1101 				break;
1102 			filp->f_pos++;
1103 			i++;
1104 			/* fallthrough */
1105 		default:
1106 			mutex_lock(&sysfs_mutex);
1107 
1108 			pos = &parent_sd->s_children;
1109 			while (*pos != cursor)
1110 				pos = &(*pos)->s_sibling;
1111 
1112 			/* unlink cursor */
1113 			*pos = cursor->s_sibling;
1114 
1115 			if (filp->f_pos == 2)
1116 				pos = &parent_sd->s_children;
1117 
1118 			for ( ; *pos; pos = &(*pos)->s_sibling) {
1119 				struct sysfs_dirent *next = *pos;
1120 				const char * name;
1121 				int len;
1122 
1123 				if (!sysfs_type(next))
1124 					continue;
1125 
1126 				name = next->s_name;
1127 				len = strlen(name);
1128 				ino = next->s_ino;
1129 
1130 				if (filldir(dirent, name, len, filp->f_pos, ino,
1131 						 dt_type(next)) < 0)
1132 					break;
1133 
1134 				filp->f_pos++;
1135 			}
1136 
1137 			/* put cursor back in */
1138 			cursor->s_sibling = *pos;
1139 			*pos = cursor;
1140 
1141 			mutex_unlock(&sysfs_mutex);
1142 	}
1143 	return 0;
1144 }
1145 
1146 static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
1147 {
1148 	struct dentry * dentry = file->f_path.dentry;
1149 
1150 	switch (origin) {
1151 		case 1:
1152 			offset += file->f_pos;
1153 		case 0:
1154 			if (offset >= 0)
1155 				break;
1156 		default:
1157 			return -EINVAL;
1158 	}
1159 	if (offset != file->f_pos) {
1160 		mutex_lock(&sysfs_mutex);
1161 
1162 		file->f_pos = offset;
1163 		if (file->f_pos >= 2) {
1164 			struct sysfs_dirent *sd = dentry->d_fsdata;
1165 			struct sysfs_dirent *cursor = file->private_data;
1166 			struct sysfs_dirent **pos;
1167 			loff_t n = file->f_pos - 2;
1168 
1169 			sysfs_unlink_sibling(cursor);
1170 
1171 			pos = &sd->s_children;
1172 			while (n && *pos) {
1173 				struct sysfs_dirent *next = *pos;
1174 				if (sysfs_type(next))
1175 					n--;
1176 				pos = &(*pos)->s_sibling;
1177 			}
1178 
1179 			cursor->s_sibling = *pos;
1180 			*pos = cursor;
1181 		}
1182 
1183 		mutex_unlock(&sysfs_mutex);
1184 	}
1185 
1186 	return offset;
1187 }
1188 
1189 
1190 /**
1191  *	sysfs_make_shadowed_dir - Setup so a directory can be shadowed
1192  *	@kobj:	object we're creating shadow of.
1193  */
1194 
1195 int sysfs_make_shadowed_dir(struct kobject *kobj,
1196 	void * (*follow_link)(struct dentry *, struct nameidata *))
1197 {
1198 	struct dentry *dentry;
1199 	struct inode *inode;
1200 	struct inode_operations *i_op;
1201 
1202 	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
1203 	dentry = sysfs_get_dentry(kobj->sd);
1204 	if (IS_ERR(dentry))
1205 		return PTR_ERR(dentry);
1206 
1207 	inode = dentry->d_inode;
1208 	if (inode->i_op != &sysfs_dir_inode_operations) {
1209 		dput(dentry);
1210 		return -EINVAL;
1211 	}
1212 
1213 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
1214 	if (!i_op)
1215 		return -ENOMEM;
1216 
1217 	memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
1218 	i_op->follow_link = follow_link;
1219 
1220 	/* Locking of inode->i_op?
1221 	 * Since setting i_op is a single word write and they
1222 	 * are atomic we should be ok here.
1223 	 */
1224 	inode->i_op = i_op;
1225 	return 0;
1226 }
1227 
1228 /**
1229  *	sysfs_create_shadow_dir - create a shadow directory for an object.
1230  *	@kobj:	object we're creating directory for.
1231  *
1232  *	sysfs_make_shadowed_dir must already have been called on this
1233  *	directory.
1234  */
1235 
1236 struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
1237 {
1238 	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
1239 	struct dentry *dir, *parent, *shadow;
1240 	struct inode *inode;
1241 	struct sysfs_dirent *sd;
1242 	struct sysfs_addrm_cxt acxt;
1243 
1244 	dir = sysfs_get_dentry(kobj->sd);
1245 	if (IS_ERR(dir)) {
1246 		sd = (void *)dir;
1247 		goto out;
1248 	}
1249 	parent = dir->d_parent;
1250 
1251 	inode = dir->d_inode;
1252 	sd = ERR_PTR(-EINVAL);
1253 	if (!sysfs_is_shadowed_inode(inode))
1254 		goto out_dput;
1255 
1256 	shadow = d_alloc(parent, &dir->d_name);
1257 	if (!shadow)
1258 		goto nomem;
1259 
1260 	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
1261 	if (!sd)
1262 		goto nomem;
1263 	sd->s_elem.dir.kobj = kobj;
1264 
1265 	sysfs_addrm_start(&acxt, parent_sd);
1266 
1267 	/* add but don't link into children list */
1268 	sysfs_add_one(&acxt, sd);
1269 
1270 	/* attach and instantiate dentry */
1271 	sysfs_attach_dentry(sd, shadow);
1272 	d_instantiate(shadow, igrab(inode));
1273 	inc_nlink(inode);	/* tj: synchronization? */
1274 
1275 	sysfs_addrm_finish(&acxt);
1276 
1277 	dget(shadow);		/* Extra count - pin the dentry in core */
1278 
1279 	goto out_dput;
1280 
1281  nomem:
1282 	dput(shadow);
1283 	sd = ERR_PTR(-ENOMEM);
1284  out_dput:
1285 	dput(dir);
1286  out:
1287 	return sd;
1288 }
1289 
1290 /**
1291  *	sysfs_remove_shadow_dir - remove an object's directory.
1292  *	@shadow_sd: sysfs_dirent of shadow directory
1293  *
1294  *	The only thing special about this is that we remove any files in
1295  *	the directory before we remove the directory, and we've inlined
1296  *	what used to be sysfs_rmdir() below, instead of calling separately.
1297  */
1298 
1299 void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
1300 {
1301 	__sysfs_remove_dir(shadow_sd);
1302 }
1303 
1304 const struct file_operations sysfs_dir_operations = {
1305 	.open		= sysfs_dir_open,
1306 	.release	= sysfs_dir_close,
1307 	.llseek		= sysfs_dir_lseek,
1308 	.read		= generic_read_dir,
1309 	.readdir	= sysfs_readdir,
1310 };
1311