xref: /titanic_52/usr/src/uts/common/fs/devfs/devfs_vfsops.c (revision 1ae0874509b6811fdde1dfd46f0d93fd09867a3f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This is the device filesystem.
31  *
32  * It is a combination of a namer to drive autoconfiguration,
33  * plus the access methods for the device drivers of the system.
34  *
35  * The prototype is fairly dependent on specfs for the latter part
36  * of its implementation, though a final version would integrate the two.
37  */
38 #include <sys/types.h>
39 #include <sys/param.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/kmem.h>
43 #include <sys/time.h>
44 #include <sys/pathname.h>
45 #include <sys/vfs.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/stat.h>
50 #include <sys/errno.h>
51 #include <sys/cmn_err.h>
52 #include <sys/cred.h>
53 #include <sys/statvfs.h>
54 #include <sys/mount.h>
55 #include <sys/debug.h>
56 #include <sys/modctl.h>
57 #include <fs/fs_subr.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/snode.h>
60 #include <sys/sunndi.h>
61 #include <sys/policy.h>
62 
63 /*
64  * devfs vfs operations.
65  */
66 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
67     struct cred *);
68 static int devfs_unmount(struct vfs *, int, struct cred *);
69 static int devfs_root(struct vfs *, struct vnode **);
70 static int devfs_statvfs(struct vfs *, struct statvfs64 *);
71 static int devfs_mountroot(struct vfs *, enum whymountroot);
72 
73 static int devfsinit(int, char *);
74 
75 static vfsdef_t devfs_vfssw = {
76 	VFSDEF_VERSION,
77 	"devfs",	/* type name string */
78 	devfsinit,	/* init routine */
79 	0,		/* flags */
80 	NULL		/* mount options table prototype */
81 };
82 
83 static kmutex_t devfs_lock;	/* protects global data */
84 static int devfstype;		/* fstype */
85 static dev_t devfsdev;		/* the fictious 'device' we live on */
86 static struct devfs_data *devfs_mntinfo;	/* linked list of instances */
87 
88 /*
89  * Module linkage information
90  */
91 static struct modlfs modlfs = {
92 	&mod_fsops, "devices filesystem %I%", &devfs_vfssw
93 };
94 
95 static struct modlinkage modlinkage = {
96 	MODREV_1, (void *)&modlfs, NULL
97 };
98 
99 int
100 _init(void)
101 {
102 	int e;
103 
104 	mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
105 	dv_node_cache_init();
106 	if ((e = mod_install(&modlinkage)) != 0) {
107 		dv_node_cache_fini();
108 		mutex_destroy(&devfs_lock);
109 		return (e);
110 	}
111 	dcmn_err(("devfs loaded\n"));
112 	return (0);
113 }
114 
115 int
116 _fini(void)
117 {
118 	return (EBUSY);
119 }
120 
121 int
122 _info(struct modinfo *modinfop)
123 {
124 	return (mod_info(&modlinkage, modinfop));
125 }
126 
127 /*ARGSUSED1*/
128 static int
129 devfsinit(int fstype, char *name)
130 {
131 	static const fs_operation_def_t devfs_vfsops_template[] = {
132 		VFSNAME_MOUNT, devfs_mount,
133 		VFSNAME_UNMOUNT, devfs_unmount,
134 		VFSNAME_ROOT, devfs_root,
135 		VFSNAME_STATVFS, devfs_statvfs,
136 		VFSNAME_SYNC, (fs_generic_func_p) fs_sync,
137 		VFSNAME_MOUNTROOT, devfs_mountroot,
138 		NULL, NULL
139 	};
140 	int error;
141 	int dev;
142 	extern major_t getudev(void);	/* gack - what a function */
143 
144 	devfstype = fstype;
145 	/*
146 	 * Associate VFS ops vector with this fstype
147 	 */
148 	error = vfs_setfsops(fstype, devfs_vfsops_template, NULL);
149 	if (error != 0) {
150 		cmn_err(CE_WARN, "devfsinit: bad vfs ops template");
151 		return (error);
152 	}
153 
154 	error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops);
155 	if (error != 0) {
156 		(void) vfs_freevfsops_by_type(fstype);
157 		cmn_err(CE_WARN, "devfsinit: bad vnode ops template");
158 		return (error);
159 	}
160 
161 	/*
162 	 * Invent a dev_t (sigh).
163 	 */
164 	if ((dev = getudev()) == (major_t)-1) {
165 		cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
166 		dev = 0;
167 	}
168 	devfsdev = makedevice(dev, 0);
169 
170 	return (0);
171 }
172 
173 /*
174  * The name of the mount point and the name of the attribute
175  * filesystem are passed down from userland for now.
176  */
177 static int
178 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
179     struct cred *cr)
180 {
181 	struct devfs_data *devfs_data;
182 	struct vnode *avp;
183 	struct dv_node *dv;
184 	struct vattr va;
185 
186 	dcmn_err(("devfs_mount\n"));
187 
188 	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
189 		return (EPERM);
190 
191 	/*
192 	 * check that the mount point is sane
193 	 */
194 	if (mvp->v_type != VDIR)
195 		return (ENOTDIR);
196 
197 	ASSERT(uap->flags & MS_SYSSPACE);
198 	/*
199 	 * Devfs can only be mounted from kernel during boot.
200 	 * avp is the existing /devices, the same as the mount point.
201 	 */
202 	avp = mvp;
203 
204 	/*
205 	 * Create and initialize the vfs-private data.
206 	 * This includes a hand-crafted root vnode (we build
207 	 * this here mostly so that traverse() doesn't sleep
208 	 * in VFS_ROOT()).
209 	 */
210 	mutex_enter(&devfs_lock);
211 	ASSERT(devfs_mntinfo == NULL);
212 	dv = dv_mkroot(vfsp, devfsdev);
213 	dv->dv_attrvp = avp;		/* attribute root vp */
214 
215 	ASSERT(dv == dv->dv_dotdot);
216 
217 	devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
218 	devfs_data->devfs_vfsp = vfsp;
219 	devfs_data->devfs_root = dv;
220 
221 	vfsp->vfs_data = (caddr_t)devfs_data;
222 	vfsp->vfs_fstype = devfstype;
223 	vfsp->vfs_dev = devfsdev;
224 	vfsp->vfs_bsize = DEV_BSIZE;
225 	vfsp->vfs_mtime = ddi_get_time();
226 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
227 
228 	/* We're there. */
229 	devfs_mntinfo = devfs_data;
230 	mutex_exit(&devfs_lock);
231 
232 	va.va_mask = AT_ATIME|AT_MTIME;
233 	gethrestime(&va.va_atime);
234 	gethrestime(&va.va_mtime);
235 	(void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL);
236 	return (0);
237 }
238 
239 
240 /*
241  * We never unmount devfs in a real production system.
242  */
243 /*ARGSUSED*/
244 static int
245 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
246 {
247 	return (EBUSY);
248 }
249 
250 /*
251  * return root vnode for given vfs
252  */
253 static int
254 devfs_root(struct vfs *vfsp, struct vnode **vpp)
255 {
256 	dcmn_err(("devfs_root\n"));
257 	*vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
258 	VN_HOLD(*vpp);
259 	return (0);
260 }
261 
262 /*
263  * return 'generic superblock' information to userland.
264  *
265  * not much that we can usefully admit to here
266  */
267 static int
268 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
269 {
270 	extern kmem_cache_t *dv_node_cache;
271 
272 	dev32_t d32;
273 
274 	dcmn_err(("devfs_statvfs\n"));
275 	bzero(sbp, sizeof (*sbp));
276 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
277 	/*
278 	 * We could compute the number of devfsnodes here .. but since
279 	 * it's dynamic anyway, it's not clear how useful this is.
280 	 */
281 	sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
282 
283 	/* no illusions that free/avail files is relevant to devfs */
284 	sbp->f_ffree = 0;
285 	sbp->f_favail = 0;
286 
287 	/* no illusions that blocks are relevant to devfs */
288 	sbp->f_bfree = 0;
289 	sbp->f_bavail = 0;
290 	sbp->f_blocks = 0;
291 
292 	(void) cmpldev(&d32, vfsp->vfs_dev);
293 	sbp->f_fsid = d32;
294 	(void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
295 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
296 	sbp->f_namemax = MAXNAMELEN - 1;
297 	(void) strcpy(sbp->f_fstr, "devices");
298 
299 	return (0);
300 }
301 
302 /*
303  * devfs always mount after root is mounted, so this should never
304  * be invoked.
305  */
306 /*ARGSUSED*/
307 static int
308 devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
309 {
310 	dcmn_err(("devfs_mountroot\n"));
311 
312 	return (EINVAL);
313 }
314 
315 struct dv_node *
316 devfs_dip_to_dvnode(dev_info_t *dip)
317 {
318 	char *dirpath;
319 	struct vnode *dirvp;
320 
321 	ASSERT(dip != NULL);
322 
323 	/* no-op if devfs not mounted yet */
324 	if (devfs_mntinfo == NULL)
325 		return (NULL);
326 
327 	/*
328 	 * The lookupname below only looks up cached dv_nodes
329 	 * because devfs_clean_key is set in thread specific data.
330 	 */
331 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
332 	(void) ddi_pathname(dip, dirpath);
333 	if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
334 		dcmn_err(("directory %s not found\n", dirpath));
335 		kmem_free(dirpath, MAXPATHLEN);
336 		return (NULL);
337 	}
338 
339 	kmem_free(dirpath, MAXPATHLEN);
340 	return (VTODV(dirvp));
341 }
342 
343 /*
344  * devfs_clean()
345  *
346  * Destroy unreferenced dv_node's and detach devices.
347  * Returns 0 on success, error if failed to unconfigure node.
348  *
349  * devfs caches unreferenced dv_node to speed by the performance
350  * of ls, find, etc. devfs_clean() is invoked to cleanup cached
351  * dv_nodes to reclaim memory as well as to facilitate device
352  * removal (dv_node reference devinfo nodes, which prevents driver
353  * detach).
354  *
355  * If a shell parks in a /devices directory, the dv_node will be
356  * held, preventing the corresponding device to be detached.
357  * This would be a denial of service against DR. To prevent this,
358  * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
359  * The dv_cleandir() implementation does the right thing to ensure
360  * successful DR.
361  */
362 int
363 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
364 {
365 	struct dv_node *dvp;
366 
367 	dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
368 		(void *)dip, flags));
369 
370 	/* avoid recursion back into the device tree */
371 	(void) tsd_set(devfs_clean_key, (void *)1);
372 	dvp = devfs_dip_to_dvnode(dip);
373 	(void) tsd_set(devfs_clean_key, NULL);
374 	if (dvp == NULL)
375 		return (0);
376 
377 	if (dv_cleandir(dvp, devnm, flags) != 0) {
378 		VN_RELE(DVTOV(dvp));
379 		return (EBUSY);
380 	}
381 	VN_RELE(DVTOV(dvp));
382 	return (0);
383 }
384 
385 /*
386  * lookup a devfs relative pathname, returning held vnodes for the final
387  * component and the containing directory (if requested).
388  *
389  * NOTE: We can't use lookupname because this would use the current
390  *	processes credentials (CRED) in the call lookuppnvp instead
391  *	of kcred.  It also does not give you the flexibility so
392  * 	specify the directory to start the resolution in (devicesdir).
393  */
394 int
395 devfs_lookupname(
396 	char	*pathname,		/* user pathname */
397 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
398 	vnode_t **compvpp)		/* ret for ptr to component vnode */
399 {
400 	struct pathname	pn;
401 	int		error;
402 
403 	ASSERT(devicesdir);		/* devfs must be initialized */
404 	ASSERT(pathname);		/* must have some path */
405 
406 	if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
407 		return (error);
408 
409 	/* make the path relative to /devices. */
410 	pn_skipslash(&pn);
411 	if (pn_pathleft(&pn) == 0) {
412 		/* all we had was "\0" or "/" (which skipslash skiped) */
413 		if (dirvpp)
414 			*dirvpp = NULL;
415 		if (compvpp) {
416 			VN_HOLD(devicesdir);
417 			*compvpp = devicesdir;
418 		}
419 	} else {
420 		/*
421 		 * Use devfs lookup to resolve pathname to the vnode for
422 		 * the device via relative lookup in devfs. Extra holds for
423 		 * using devicesdir as directory we are searching and for
424 		 * being our root without being == rootdir.
425 		 */
426 		VN_HOLD(devicesdir);
427 		VN_HOLD(devicesdir);
428 		error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
429 		    devicesdir, devicesdir, kcred);
430 	}
431 	pn_free(&pn);
432 
433 	return (error);
434 }
435 
436 /*
437  * Given a devfs path (without the /devices prefix), walk
438  * the dv_node sub-tree rooted at the path.
439  */
440 int
441 devfs_walk(
442 	char		*path,
443 	void		(*callback)(struct dv_node *, void *),
444 	void		*arg)
445 {
446 	char *dirpath, *devnm;
447 	struct vnode	*dirvp;
448 
449 	ASSERT(path && callback);
450 
451 	if (*path != '/' || devfs_mntinfo == NULL)
452 		return (ENXIO);
453 
454 	dcmn_err(("devfs_walk: path = %s", path));
455 
456 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
457 
458 	(void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
459 
460 	devnm = strrchr(dirpath, '/');
461 
462 	ASSERT(devnm);
463 
464 	*devnm++ = '\0';
465 
466 	if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
467 		dcmn_err(("directory %s not found\n", dirpath));
468 		kmem_free(dirpath, MAXPATHLEN);
469 		return (ENXIO);
470 	}
471 
472 	/*
473 	 * if path == "/", visit the root dv_node
474 	 */
475 	if (*devnm == '\0') {
476 		callback(VTODV(dirvp), arg);
477 		devnm = NULL;
478 	}
479 
480 	dv_walk(VTODV(dirvp), devnm, callback, arg);
481 
482 	VN_RELE(dirvp);
483 
484 	kmem_free(dirpath, MAXPATHLEN);
485 
486 	return (0);
487 }
488 
489 int
490 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
491 {
492 	struct vnode *rvp;
493 	struct dv_node *dvp;
494 	int rval = -1;
495 
496 	/* fail if devfs not mounted yet */
497 	if (devfs_mntinfo == NULL)
498 		return (rval);
499 
500 	if (VOP_REALVP(vp, &rvp) == 0 && vn_matchops(rvp, dv_vnodeops)) {
501 		dvp = VTODV(rvp);
502 		rw_enter(&dvp->dv_contents, RW_READER);
503 		if (dvp->dv_priv) {
504 			dphold(dvp->dv_priv);
505 			*dpp = dvp->dv_priv;
506 			rval = 0;
507 		}
508 		rw_exit(&dvp->dv_contents);
509 	}
510 	return (rval);
511 }
512