xref: /freebsd/sys/fs/devfs/devfs_devs.c (revision b2db760808f74bb53c232900091c9da801ebbfcc)
1 /*-
2  * Copyright (c) 2000,2004
3  *	Poul-Henning Kamp.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Neither the name of the University nor the names of its contributors
11  *    may be used to endorse or promote products derived from this software
12  *    without specific prior written permission.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/dirent.h>
35 #include <sys/kernel.h>
36 #include <sys/limits.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/proc.h>
40 #include <sys/sx.h>
41 #include <sys/sysctl.h>
42 #include <sys/vnode.h>
43 
44 #include <sys/kdb.h>
45 
46 #include <fs/devfs/devfs.h>
47 #include <fs/devfs/devfs_int.h>
48 
49 #include <security/mac/mac_framework.h>
50 
51 /*
52  * The one true (but secret) list of active devices in the system.
53  * Locked by dev_lock()/devmtx
54  */
55 struct cdev_priv_list cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list);
56 
57 struct unrhdr *devfs_inos;
58 
59 
60 static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2");
61 static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3");
62 static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage");
63 
64 static SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem");
65 
66 static unsigned devfs_generation;
67 SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD,
68 	&devfs_generation, 0, "DEVFS generation number");
69 
70 unsigned devfs_rule_depth = 1;
71 SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW,
72 	&devfs_rule_depth, 0, "Max depth of ruleset include");
73 
74 /*
75  * Helper sysctl for devname(3).  We're given a dev_t and return the
76  * name, if any, registered by the device driver.
77  */
78 static int
79 sysctl_devname(SYSCTL_HANDLER_ARGS)
80 {
81 	int error;
82 	dev_t ud;
83 	struct cdev_priv *cdp;
84 	struct cdev *dev;
85 
86 	error = SYSCTL_IN(req, &ud, sizeof (ud));
87 	if (error)
88 		return (error);
89 	if (ud == NODEV)
90 		return (EINVAL);
91 	dev = NULL;
92 	dev_lock();
93 	TAILQ_FOREACH(cdp, &cdevp_list, cdp_list)
94 		if (cdp->cdp_inode == ud) {
95 			dev = &cdp->cdp_c;
96 			dev_refl(dev);
97 			break;
98 		}
99 	dev_unlock();
100 	if (dev == NULL)
101 		return (ENOENT);
102 	error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1);
103 	dev_rel(dev);
104 	return (error);
105 }
106 
107 SYSCTL_PROC(_kern, OID_AUTO, devname,
108     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE,
109     NULL, 0, sysctl_devname, "", "devname(3) handler");
110 
111 SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD,
112     0, sizeof(struct cdev), "sizeof(struct cdev)");
113 
114 SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD,
115     0, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)");
116 
117 struct cdev *
118 devfs_alloc(int flags)
119 {
120 	struct cdev_priv *cdp;
121 	struct cdev *cdev;
122 	struct timespec ts;
123 
124 	cdp = malloc(sizeof *cdp, M_CDEVP, M_USE_RESERVE | M_ZERO |
125 	    ((flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK));
126 	if (cdp == NULL)
127 		return (NULL);
128 
129 	cdp->cdp_dirents = &cdp->cdp_dirent0;
130 	cdp->cdp_dirent0 = NULL;
131 	cdp->cdp_maxdirent = 0;
132 	cdp->cdp_inode = 0;
133 
134 	cdev = &cdp->cdp_c;
135 
136 	cdev->si_name = cdev->__si_namebuf;
137 	LIST_INIT(&cdev->si_children);
138 	vfs_timestamp(&ts);
139 	cdev->si_atime = cdev->si_mtime = cdev->si_ctime = ts;
140 	cdev->si_cred = NULL;
141 
142 	return (cdev);
143 }
144 
145 void
146 devfs_free(struct cdev *cdev)
147 {
148 	struct cdev_priv *cdp;
149 
150 	cdp = cdev2priv(cdev);
151 	if (cdev->si_cred != NULL)
152 		crfree(cdev->si_cred);
153 	if (cdp->cdp_inode > 0)
154 		free_unr(devfs_inos, cdp->cdp_inode);
155 	if (cdp->cdp_maxdirent > 0)
156 		free(cdp->cdp_dirents, M_DEVFS2);
157 	free(cdp, M_CDEVP);
158 }
159 
160 struct devfs_dirent *
161 devfs_find(struct devfs_dirent *dd, const char *name, int namelen, int type)
162 {
163 	struct devfs_dirent *de;
164 
165 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
166 		if (namelen != de->de_dirent->d_namlen)
167 			continue;
168 		if (type != 0 && type != de->de_dirent->d_type)
169 			continue;
170 		if (bcmp(name, de->de_dirent->d_name, namelen) != 0)
171 			continue;
172 		break;
173 	}
174 	return (de);
175 }
176 
177 struct devfs_dirent *
178 devfs_newdirent(char *name, int namelen)
179 {
180 	int i;
181 	struct devfs_dirent *de;
182 	struct dirent d;
183 
184 	d.d_namlen = namelen;
185 	i = sizeof (*de) + GENERIC_DIRSIZ(&d);
186 	de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO);
187 	de->de_dirent = (struct dirent *)(de + 1);
188 	de->de_dirent->d_namlen = namelen;
189 	de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d);
190 	bcopy(name, de->de_dirent->d_name, namelen);
191 	de->de_dirent->d_name[namelen] = '\0';
192 	vfs_timestamp(&de->de_ctime);
193 	de->de_mtime = de->de_atime = de->de_ctime;
194 	de->de_links = 1;
195 	de->de_holdcnt = 1;
196 #ifdef MAC
197 	mac_devfs_init(de);
198 #endif
199 	return (de);
200 }
201 
202 struct devfs_dirent *
203 devfs_parent_dirent(struct devfs_dirent *de)
204 {
205 
206 	if (de->de_dirent->d_type != DT_DIR)
207 		return (de->de_dir);
208 
209 	if (de->de_flags & (DE_DOT | DE_DOTDOT))
210 		return (NULL);
211 
212 	de = TAILQ_FIRST(&de->de_dlist);	/* "." */
213 	if (de == NULL)
214 		return (NULL);
215 	de = TAILQ_NEXT(de, de_list);		/* ".." */
216 	if (de == NULL)
217 		return (NULL);
218 
219 	return (de->de_dir);
220 }
221 
222 struct devfs_dirent *
223 devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode)
224 {
225 	struct devfs_dirent *dd;
226 	struct devfs_dirent *de;
227 
228 	/* Create the new directory */
229 	dd = devfs_newdirent(name, namelen);
230 	TAILQ_INIT(&dd->de_dlist);
231 	dd->de_dirent->d_type = DT_DIR;
232 	dd->de_mode = 0555;
233 	dd->de_links = 2;
234 	dd->de_dir = dd;
235 	if (inode != 0)
236 		dd->de_inode = inode;
237 	else
238 		dd->de_inode = alloc_unr(devfs_inos);
239 
240 	/*
241 	 * "." and ".." are always the two first entries in the
242 	 * de_dlist list.
243 	 *
244 	 * Create the "." entry in the new directory.
245 	 */
246 	de = devfs_newdirent(".", 1);
247 	de->de_dirent->d_type = DT_DIR;
248 	de->de_flags |= DE_DOT;
249 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
250 	de->de_dir = dd;
251 
252 	/* Create the ".." entry in the new directory. */
253 	de = devfs_newdirent("..", 2);
254 	de->de_dirent->d_type = DT_DIR;
255 	de->de_flags |= DE_DOTDOT;
256 	TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
257 	if (dotdot == NULL) {
258 		de->de_dir = dd;
259 	} else {
260 		de->de_dir = dotdot;
261 		TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list);
262 		dotdot->de_links++;
263 	}
264 
265 #ifdef MAC
266 	mac_devfs_create_directory(dmp->dm_mount, name, namelen, dd);
267 #endif
268 	return (dd);
269 }
270 
271 void
272 devfs_dirent_free(struct devfs_dirent *de)
273 {
274 	free(de, M_DEVFS3);
275 }
276 
277 /*
278  * The caller needs to hold the dm for the duration of the call since
279  * dm->dm_lock may be temporary dropped.
280  */
281 void
282 devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int vp_locked)
283 {
284 	struct vnode *vp;
285 
286 	KASSERT((de->de_flags & DE_DOOMED) == 0,
287 		("devfs_delete doomed dirent"));
288 	de->de_flags |= DE_DOOMED;
289 	mtx_lock(&devfs_de_interlock);
290 	vp = de->de_vnode;
291 	if (vp != NULL) {
292 		VI_LOCK(vp);
293 		mtx_unlock(&devfs_de_interlock);
294 		vholdl(vp);
295 		sx_unlock(&dm->dm_lock);
296 		if (!vp_locked)
297 			vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
298 		else
299 			VI_UNLOCK(vp);
300 		vgone(vp);
301 		if (!vp_locked)
302 			VOP_UNLOCK(vp, 0);
303 		vdrop(vp);
304 		sx_xlock(&dm->dm_lock);
305 	} else
306 		mtx_unlock(&devfs_de_interlock);
307 	if (de->de_symlink) {
308 		free(de->de_symlink, M_DEVFS);
309 		de->de_symlink = NULL;
310 	}
311 #ifdef MAC
312 	mac_devfs_destroy(de);
313 #endif
314 	if (de->de_inode > DEVFS_ROOTINO) {
315 		free_unr(devfs_inos, de->de_inode);
316 		de->de_inode = 0;
317 	}
318 	if (DEVFS_DE_DROP(de))
319 		devfs_dirent_free(de);
320 }
321 
322 /*
323  * Called on unmount.
324  * Recursively removes the entire tree.
325  * The caller needs to hold the dm for the duration of the call.
326  */
327 
328 static void
329 devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd)
330 {
331 	struct devfs_dirent *de;
332 
333 	sx_assert(&dm->dm_lock, SX_XLOCKED);
334 	for (;;) {
335 		de = TAILQ_FIRST(&dd->de_dlist);
336 		if (de == NULL)
337 			break;
338 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
339 		if (de->de_flags & (DE_DOT|DE_DOTDOT))
340 			devfs_delete(dm, de, 0);
341 		else if (de->de_dirent->d_type == DT_DIR)
342 			devfs_purge(dm, de);
343 		else
344 			devfs_delete(dm, de, 0);
345 	}
346 	devfs_delete(dm, dd, 0);
347 }
348 
349 /*
350  * Each cdev_priv has an array of pointers to devfs_dirent which is indexed
351  * by the mount points dm_idx.
352  * This function extends the array when necessary, taking into account that
353  * the default array is 1 element and not malloc'ed.
354  */
355 static void
356 devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm)
357 {
358 	struct devfs_dirent **dep;
359 	int siz;
360 
361 	siz = (dm->dm_idx + 1) * sizeof *dep;
362 	dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO);
363 	dev_lock();
364 	if (dm->dm_idx <= cdp->cdp_maxdirent) {
365 		/* We got raced */
366 		dev_unlock();
367 		free(dep, M_DEVFS2);
368 		return;
369 	}
370 	memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep);
371 	if (cdp->cdp_maxdirent > 0)
372 		free(cdp->cdp_dirents, M_DEVFS2);
373 	cdp->cdp_dirents = dep;
374 	/*
375 	 * XXX: if malloc told us how much we actually got this could
376 	 * XXX: be optimized.
377 	 */
378 	cdp->cdp_maxdirent = dm->dm_idx;
379 	dev_unlock();
380 }
381 
382 /*
383  * The caller needs to hold the dm for the duration of the call.
384  */
385 static int
386 devfs_populate_loop(struct devfs_mount *dm, int cleanup)
387 {
388 	struct cdev_priv *cdp;
389 	struct devfs_dirent *de;
390 	struct devfs_dirent *dd;
391 	struct cdev *pdev;
392 	int de_flags, j;
393 	char *q, *s;
394 
395 	sx_assert(&dm->dm_lock, SX_XLOCKED);
396 	dev_lock();
397 	TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) {
398 
399 		KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents"));
400 
401 		/*
402 		 * If we are unmounting, or the device has been destroyed,
403 		 * clean up our dirent.
404 		 */
405 		if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) &&
406 		    dm->dm_idx <= cdp->cdp_maxdirent &&
407 		    cdp->cdp_dirents[dm->dm_idx] != NULL) {
408 			de = cdp->cdp_dirents[dm->dm_idx];
409 			cdp->cdp_dirents[dm->dm_idx] = NULL;
410 			KASSERT(cdp == de->de_cdp,
411 			    ("%s %d %s %p %p", __func__, __LINE__,
412 			    cdp->cdp_c.si_name, cdp, de->de_cdp));
413 			KASSERT(de->de_dir != NULL, ("Null de->de_dir"));
414 			dev_unlock();
415 
416 			TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list);
417 			de->de_cdp = NULL;
418 			de->de_inode = 0;
419 			devfs_delete(dm, de, 0);
420 			dev_lock();
421 			cdp->cdp_inuse--;
422 			dev_unlock();
423 			return (1);
424 		}
425 		/*
426 	 	 * GC any lingering devices
427 		 */
428 		if (!(cdp->cdp_flags & CDP_ACTIVE)) {
429 			if (cdp->cdp_inuse > 0)
430 				continue;
431 			TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
432 			dev_unlock();
433 			dev_rel(&cdp->cdp_c);
434 			return (1);
435 		}
436 		/*
437 		 * Don't create any new dirents if we are unmounting
438 		 */
439 		if (cleanup)
440 			continue;
441 		KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!"));
442 
443 		if (dm->dm_idx <= cdp->cdp_maxdirent &&
444 		    cdp->cdp_dirents[dm->dm_idx] != NULL) {
445 			de = cdp->cdp_dirents[dm->dm_idx];
446 			KASSERT(cdp == de->de_cdp, ("inconsistent cdp"));
447 			continue;
448 		}
449 
450 
451 		cdp->cdp_inuse++;
452 		dev_unlock();
453 
454 		if (dm->dm_idx > cdp->cdp_maxdirent)
455 		        devfs_metoo(cdp, dm);
456 
457 		dd = dm->dm_rootdir;
458 		s = cdp->cdp_c.si_name;
459 		for (;;) {
460 			for (q = s; *q != '/' && *q != '\0'; q++)
461 				continue;
462 			if (*q != '/')
463 				break;
464 			de = devfs_find(dd, s, q - s, 0);
465 			if (de == NULL)
466 				de = devfs_vmkdir(dm, s, q - s, dd, 0);
467 			else if (de->de_dirent->d_type == DT_LNK) {
468 				de = devfs_find(dd, s, q - s, DT_DIR);
469 				if (de == NULL)
470 					de = devfs_vmkdir(dm, s, q - s, dd, 0);
471 				de->de_flags |= DE_COVERED;
472 			}
473 			s = q + 1;
474 			dd = de;
475 			KASSERT(dd->de_dirent->d_type == DT_DIR &&
476 			    (dd->de_flags & (DE_DOT | DE_DOTDOT)) == 0,
477 			    ("%s: invalid directory (si_name=%s)",
478 			    __func__, cdp->cdp_c.si_name));
479 
480 		}
481 		de_flags = 0;
482 		de = devfs_find(dd, s, q - s, DT_LNK);
483 		if (de != NULL)
484 			de_flags |= DE_COVERED;
485 
486 		de = devfs_newdirent(s, q - s);
487 		if (cdp->cdp_c.si_flags & SI_ALIAS) {
488 			de->de_uid = 0;
489 			de->de_gid = 0;
490 			de->de_mode = 0755;
491 			de->de_dirent->d_type = DT_LNK;
492 			pdev = cdp->cdp_c.si_parent;
493 			j = strlen(pdev->si_name) + 1;
494 			de->de_symlink = malloc(j, M_DEVFS, M_WAITOK);
495 			bcopy(pdev->si_name, de->de_symlink, j);
496 		} else {
497 			de->de_uid = cdp->cdp_c.si_uid;
498 			de->de_gid = cdp->cdp_c.si_gid;
499 			de->de_mode = cdp->cdp_c.si_mode;
500 			de->de_dirent->d_type = DT_CHR;
501 		}
502 		de->de_flags |= de_flags;
503 		de->de_inode = cdp->cdp_inode;
504 		de->de_cdp = cdp;
505 #ifdef MAC
506 		mac_devfs_create_device(cdp->cdp_c.si_cred, dm->dm_mount,
507 		    &cdp->cdp_c, de);
508 #endif
509 		de->de_dir = dd;
510 		TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list);
511 		devfs_rules_apply(dm, de);
512 		dev_lock();
513 		/* XXX: could check that cdp is still active here */
514 		KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL,
515 		    ("%s %d\n", __func__, __LINE__));
516 		cdp->cdp_dirents[dm->dm_idx] = de;
517 		KASSERT(de->de_cdp != (void *)0xdeadc0de,
518 		    ("%s %d\n", __func__, __LINE__));
519 		dev_unlock();
520 		return (1);
521 	}
522 	dev_unlock();
523 	return (0);
524 }
525 
526 /*
527  * The caller needs to hold the dm for the duration of the call.
528  */
529 void
530 devfs_populate(struct devfs_mount *dm)
531 {
532 
533 	sx_assert(&dm->dm_lock, SX_XLOCKED);
534 	if (dm->dm_generation == devfs_generation)
535 		return;
536 	while (devfs_populate_loop(dm, 0))
537 		continue;
538 	dm->dm_generation = devfs_generation;
539 }
540 
541 /*
542  * The caller needs to hold the dm for the duration of the call.
543  */
544 void
545 devfs_cleanup(struct devfs_mount *dm)
546 {
547 
548 	sx_assert(&dm->dm_lock, SX_XLOCKED);
549 	while (devfs_populate_loop(dm, 1))
550 		continue;
551 	devfs_purge(dm, dm->dm_rootdir);
552 }
553 
554 /*
555  * devfs_create() and devfs_destroy() are called from kern_conf.c and
556  * in both cases the devlock() mutex is held, so no further locking
557  * is necesary and no sleeping allowed.
558  */
559 
560 void
561 devfs_create(struct cdev *dev)
562 {
563 	struct cdev_priv *cdp;
564 
565 	mtx_assert(&devmtx, MA_OWNED);
566 	cdp = cdev2priv(dev);
567 	cdp->cdp_flags |= CDP_ACTIVE;
568 	cdp->cdp_inode = alloc_unrl(devfs_inos);
569 	dev_refl(dev);
570 	TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list);
571 	devfs_generation++;
572 }
573 
574 void
575 devfs_destroy(struct cdev *dev)
576 {
577 	struct cdev_priv *cdp;
578 
579 	mtx_assert(&devmtx, MA_OWNED);
580 	cdp = cdev2priv(dev);
581 	cdp->cdp_flags &= ~CDP_ACTIVE;
582 	devfs_generation++;
583 }
584 
585 static void
586 devfs_devs_init(void *junk __unused)
587 {
588 
589 	devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx);
590 }
591 
592 SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL);
593