xref: /titanic_51/usr/src/uts/common/fs/devfs/devfs_vfsops.c (revision 261a51afbf7133d9f7c89f1388050677f56b7d1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * This is the device filesystem.
30  *
31  * It is a combination of a namer to drive autoconfiguration,
32  * plus the access methods for the device drivers of the system.
33  *
34  * The prototype is fairly dependent on specfs for the latter part
35  * of its implementation, though a final version would integrate the two.
36  */
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/sysmacros.h>
40 #include <sys/systm.h>
41 #include <sys/kmem.h>
42 #include <sys/time.h>
43 #include <sys/pathname.h>
44 #include <sys/vfs.h>
45 #include <sys/vfs_opreg.h>
46 #include <sys/vnode.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/stat.h>
50 #include <sys/errno.h>
51 #include <sys/cmn_err.h>
52 #include <sys/cred.h>
53 #include <sys/statvfs.h>
54 #include <sys/mount.h>
55 #include <sys/debug.h>
56 #include <sys/modctl.h>
57 #include <fs/fs_subr.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/fs/snode.h>
60 #include <sys/sunndi.h>
61 #include <sys/policy.h>
62 #include <sys/sunmdi.h>
63 
64 /*
65  * devfs vfs operations.
66  */
67 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
68     struct cred *);
69 static int devfs_unmount(struct vfs *, int, struct cred *);
70 static int devfs_root(struct vfs *, struct vnode **);
71 static int devfs_statvfs(struct vfs *, struct statvfs64 *);
72 static int devfs_mountroot(struct vfs *, enum whymountroot);
73 
74 static int devfsinit(int, char *);
75 
76 static vfsdef_t devfs_vfssw = {
77 	VFSDEF_VERSION,
78 	"devfs",	/* type name string */
79 	devfsinit,	/* init routine */
80 	0,		/* flags */
81 	NULL		/* mount options table prototype */
82 };
83 
84 static kmutex_t devfs_lock;	/* protects global data */
85 static int devfstype;		/* fstype */
86 static dev_t devfsdev;		/* the fictious 'device' we live on */
87 static struct devfs_data *devfs_mntinfo;	/* linked list of instances */
88 
89 /*
90  * Module linkage information
91  */
92 static struct modlfs modlfs = {
93 	&mod_fsops, "devices filesystem %I%", &devfs_vfssw
94 };
95 
96 static struct modlinkage modlinkage = {
97 	MODREV_1, (void *)&modlfs, NULL
98 };
99 
100 int
101 _init(void)
102 {
103 	int e;
104 
105 	mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
106 	dv_node_cache_init();
107 	if ((e = mod_install(&modlinkage)) != 0) {
108 		dv_node_cache_fini();
109 		mutex_destroy(&devfs_lock);
110 		return (e);
111 	}
112 	dcmn_err(("devfs loaded\n"));
113 	return (0);
114 }
115 
116 int
117 _fini(void)
118 {
119 	return (EBUSY);
120 }
121 
122 int
123 _info(struct modinfo *modinfop)
124 {
125 	return (mod_info(&modlinkage, modinfop));
126 }
127 
128 /*ARGSUSED1*/
129 static int
130 devfsinit(int fstype, char *name)
131 {
132 	static const fs_operation_def_t devfs_vfsops_template[] = {
133 		VFSNAME_MOUNT,		{ .vfs_mount = devfs_mount },
134 		VFSNAME_UNMOUNT,	{ .vfs_unmount = devfs_unmount },
135 		VFSNAME_ROOT,		{ .vfs_root = devfs_root },
136 		VFSNAME_STATVFS,	{ .vfs_statvfs = devfs_statvfs },
137 		VFSNAME_SYNC,		{ .vfs_sync = fs_sync },
138 		VFSNAME_MOUNTROOT,	{ .vfs_mountroot = devfs_mountroot },
139 		NULL,			NULL
140 	};
141 	int error;
142 	int dev;
143 	extern major_t getudev(void);	/* gack - what a function */
144 
145 	devfstype = fstype;
146 	/*
147 	 * Associate VFS ops vector with this fstype
148 	 */
149 	error = vfs_setfsops(fstype, devfs_vfsops_template, NULL);
150 	if (error != 0) {
151 		cmn_err(CE_WARN, "devfsinit: bad vfs ops template");
152 		return (error);
153 	}
154 
155 	error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops);
156 	if (error != 0) {
157 		(void) vfs_freevfsops_by_type(fstype);
158 		cmn_err(CE_WARN, "devfsinit: bad vnode ops template");
159 		return (error);
160 	}
161 
162 	/*
163 	 * Invent a dev_t (sigh).
164 	 */
165 	if ((dev = getudev()) == (major_t)-1) {
166 		cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
167 		dev = 0;
168 	}
169 	devfsdev = makedevice(dev, 0);
170 
171 	return (0);
172 }
173 
174 /*
175  * The name of the mount point and the name of the attribute
176  * filesystem are passed down from userland for now.
177  */
178 static int
179 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
180     struct cred *cr)
181 {
182 	struct devfs_data *devfs_data;
183 	struct vnode *avp;
184 	struct dv_node *dv;
185 	struct vattr va;
186 
187 	dcmn_err(("devfs_mount\n"));
188 
189 	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
190 		return (EPERM);
191 
192 	/*
193 	 * check that the mount point is sane
194 	 */
195 	if (mvp->v_type != VDIR)
196 		return (ENOTDIR);
197 
198 	ASSERT(uap->flags & MS_SYSSPACE);
199 	/*
200 	 * Devfs can only be mounted from kernel during boot.
201 	 * avp is the existing /devices, the same as the mount point.
202 	 */
203 	avp = mvp;
204 
205 	/*
206 	 * Create and initialize the vfs-private data.
207 	 * This includes a hand-crafted root vnode (we build
208 	 * this here mostly so that traverse() doesn't sleep
209 	 * in VFS_ROOT()).
210 	 */
211 	mutex_enter(&devfs_lock);
212 	ASSERT(devfs_mntinfo == NULL);
213 	dv = dv_mkroot(vfsp, devfsdev);
214 	dv->dv_attrvp = avp;		/* attribute root vp */
215 
216 	ASSERT(dv == dv->dv_dotdot);
217 
218 	devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
219 	devfs_data->devfs_vfsp = vfsp;
220 	devfs_data->devfs_root = dv;
221 
222 	vfsp->vfs_data = (caddr_t)devfs_data;
223 	vfsp->vfs_fstype = devfstype;
224 	vfsp->vfs_dev = devfsdev;
225 	vfsp->vfs_bsize = DEV_BSIZE;
226 	vfsp->vfs_mtime = ddi_get_time();
227 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
228 
229 	/* We're there. */
230 	devfs_mntinfo = devfs_data;
231 	mutex_exit(&devfs_lock);
232 
233 	va.va_mask = AT_ATIME|AT_MTIME;
234 	gethrestime(&va.va_atime);
235 	gethrestime(&va.va_mtime);
236 	(void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL);
237 	return (0);
238 }
239 
240 
241 /*
242  * We never unmount devfs in a real production system.
243  */
244 /*ARGSUSED*/
245 static int
246 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
247 {
248 	return (EBUSY);
249 }
250 
251 /*
252  * return root vnode for given vfs
253  */
254 static int
255 devfs_root(struct vfs *vfsp, struct vnode **vpp)
256 {
257 	dcmn_err(("devfs_root\n"));
258 	*vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
259 	VN_HOLD(*vpp);
260 	return (0);
261 }
262 
263 /*
264  * return 'generic superblock' information to userland.
265  *
266  * not much that we can usefully admit to here
267  */
268 static int
269 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
270 {
271 	extern kmem_cache_t *dv_node_cache;
272 
273 	dev32_t d32;
274 
275 	dcmn_err(("devfs_statvfs\n"));
276 	bzero(sbp, sizeof (*sbp));
277 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
278 	/*
279 	 * We could compute the number of devfsnodes here .. but since
280 	 * it's dynamic anyway, it's not clear how useful this is.
281 	 */
282 	sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
283 
284 	/* no illusions that free/avail files is relevant to devfs */
285 	sbp->f_ffree = 0;
286 	sbp->f_favail = 0;
287 
288 	/* no illusions that blocks are relevant to devfs */
289 	sbp->f_bfree = 0;
290 	sbp->f_bavail = 0;
291 	sbp->f_blocks = 0;
292 
293 	(void) cmpldev(&d32, vfsp->vfs_dev);
294 	sbp->f_fsid = d32;
295 	(void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
296 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
297 	sbp->f_namemax = MAXNAMELEN - 1;
298 	(void) strcpy(sbp->f_fstr, "devices");
299 
300 	return (0);
301 }
302 
303 /*
304  * devfs always mount after root is mounted, so this should never
305  * be invoked.
306  */
307 /*ARGSUSED*/
308 static int
309 devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
310 {
311 	dcmn_err(("devfs_mountroot\n"));
312 
313 	return (EINVAL);
314 }
315 
316 struct dv_node *
317 devfs_dip_to_dvnode(dev_info_t *dip)
318 {
319 	char *dirpath;
320 	struct vnode *dirvp;
321 
322 	ASSERT(dip != NULL);
323 
324 	/* no-op if devfs not mounted yet */
325 	if (devfs_mntinfo == NULL)
326 		return (NULL);
327 
328 	/*
329 	 * The lookupname below only looks up cached dv_nodes
330 	 * because devfs_clean_key is set in thread specific data.
331 	 */
332 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
333 	(void) ddi_pathname(dip, dirpath);
334 	if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
335 		dcmn_err(("directory %s not found\n", dirpath));
336 		kmem_free(dirpath, MAXPATHLEN);
337 		return (NULL);
338 	}
339 
340 	kmem_free(dirpath, MAXPATHLEN);
341 	return (VTODV(dirvp));
342 }
343 
344 /*
345  * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
346  * and not a vHCI we also need to clean any vHCI branches because they
347  * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
348  * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
349  * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
350  * the vHCI is successful - which requires a clean vHCI branch to removed
351  * the devi_refs associated with devfs vnodes.
352  */
353 static int
354 devfs_clean_vhci(dev_info_t *dip, void *args)
355 {
356 	struct dv_node	*dvp;
357 	uint_t		flags = (uint_t)(uintptr_t)args;
358 
359 	(void) tsd_set(devfs_clean_key, (void *)1);
360 	dvp = devfs_dip_to_dvnode(dip);
361 	(void) tsd_set(devfs_clean_key, NULL);
362 	if (dvp) {
363 		(void) dv_cleandir(dvp, NULL, flags);
364 		VN_RELE(DVTOV(dvp));
365 	}
366 	return (DDI_WALK_CONTINUE);
367 }
368 
369 /*
370  * devfs_clean()
371  *
372  * Destroy unreferenced dv_node's and detach devices.
373  * Returns 0 on success, error if failed to unconfigure node.
374  *
375  * devfs caches unreferenced dv_node to speed by the performance
376  * of ls, find, etc. devfs_clean() is invoked to cleanup cached
377  * dv_nodes to reclaim memory as well as to facilitate device
378  * removal (dv_node reference devinfo nodes, which prevents driver
379  * detach).
380  *
381  * If a shell parks in a /devices directory, the dv_node will be
382  * held, preventing the corresponding device to be detached.
383  * This would be a denial of service against DR. To prevent this,
384  * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
385  * The dv_cleandir() implementation does the right thing to ensure
386  * successful DR.
387  */
388 int
389 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
390 {
391 	struct dv_node		*dvp;
392 	int			rval = 0;
393 
394 	dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
395 		(void *)dip, flags));
396 
397 	/* avoid recursion back into the device tree */
398 	(void) tsd_set(devfs_clean_key, (void *)1);
399 	dvp = devfs_dip_to_dvnode(dip);
400 	(void) tsd_set(devfs_clean_key, NULL);
401 	if (dvp == NULL)
402 		return (0);
403 
404 	if (dv_cleandir(dvp, devnm, flags) != 0)
405 		rval = EBUSY;
406 	VN_RELE(DVTOV(dvp));
407 
408 	/*
409 	 * If we are doing a DV_CLEAN_FORCE, and we did not start at the
410 	 * root, and we did not start at a vHCI node then clean vHCI
411 	 * branches too.  Failure to clean vHCI branch does not cause EBUSY.
412 	 *
413 	 * Also, to accommodate nexus callers that clean 'self' to DR 'child'
414 	 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
415 	 * above fails - this prevents a busy DR 'child' sibling from causing
416 	 * the DR of 'child' to fail because a vHCI branch was not cleaned.
417 	 */
418 	if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
419 	    (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
420 		/*
421 		 * NOTE: for backport the following is recommended
422 		 * 	(void) devfs_clean_vhci(scsi_vhci_dip,
423 		 *	    (void *)(uintptr_t)flags);
424 		 */
425 		mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
426 	}
427 
428 	return (rval);
429 }
430 
431 /*
432  * lookup a devfs relative pathname, returning held vnodes for the final
433  * component and the containing directory (if requested).
434  *
435  * NOTE: We can't use lookupname because this would use the current
436  *	processes credentials (CRED) in the call lookuppnvp instead
437  *	of kcred.  It also does not give you the flexibility so
438  * 	specify the directory to start the resolution in (devicesdir).
439  */
440 int
441 devfs_lookupname(
442 	char	*pathname,		/* user pathname */
443 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
444 	vnode_t **compvpp)		/* ret for ptr to component vnode */
445 {
446 	struct pathname	pn;
447 	int		error;
448 
449 	ASSERT(devicesdir);		/* devfs must be initialized */
450 	ASSERT(pathname);		/* must have some path */
451 
452 	if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
453 		return (error);
454 
455 	/* make the path relative to /devices. */
456 	pn_skipslash(&pn);
457 	if (pn_pathleft(&pn) == 0) {
458 		/* all we had was "\0" or "/" (which skipslash skiped) */
459 		if (dirvpp)
460 			*dirvpp = NULL;
461 		if (compvpp) {
462 			VN_HOLD(devicesdir);
463 			*compvpp = devicesdir;
464 		}
465 	} else {
466 		/*
467 		 * Use devfs lookup to resolve pathname to the vnode for
468 		 * the device via relative lookup in devfs. Extra holds for
469 		 * using devicesdir as directory we are searching and for
470 		 * being our root without being == rootdir.
471 		 */
472 		VN_HOLD(devicesdir);
473 		VN_HOLD(devicesdir);
474 		error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
475 		    devicesdir, devicesdir, kcred);
476 	}
477 	pn_free(&pn);
478 
479 	return (error);
480 }
481 
482 /*
483  * Given a devfs path (without the /devices prefix), walk
484  * the dv_node sub-tree rooted at the path.
485  */
486 int
487 devfs_walk(
488 	char		*path,
489 	void		(*callback)(struct dv_node *, void *),
490 	void		*arg)
491 {
492 	char *dirpath, *devnm;
493 	struct vnode	*dirvp;
494 
495 	ASSERT(path && callback);
496 
497 	if (*path != '/' || devfs_mntinfo == NULL)
498 		return (ENXIO);
499 
500 	dcmn_err(("devfs_walk: path = %s", path));
501 
502 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
503 
504 	(void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
505 
506 	devnm = strrchr(dirpath, '/');
507 
508 	ASSERT(devnm);
509 
510 	*devnm++ = '\0';
511 
512 	if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
513 		dcmn_err(("directory %s not found\n", dirpath));
514 		kmem_free(dirpath, MAXPATHLEN);
515 		return (ENXIO);
516 	}
517 
518 	/*
519 	 * if path == "/", visit the root dv_node
520 	 */
521 	if (*devnm == '\0') {
522 		callback(VTODV(dirvp), arg);
523 		devnm = NULL;
524 	}
525 
526 	dv_walk(VTODV(dirvp), devnm, callback, arg);
527 
528 	VN_RELE(dirvp);
529 
530 	kmem_free(dirpath, MAXPATHLEN);
531 
532 	return (0);
533 }
534 
535 int
536 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
537 {
538 	struct vnode *rvp;
539 	struct dv_node *dvp;
540 	int rval = -1;
541 
542 	/* fail if devfs not mounted yet */
543 	if (devfs_mntinfo == NULL)
544 		return (rval);
545 
546 	if (VOP_REALVP(vp, &rvp) == 0 && vn_matchops(rvp, dv_vnodeops)) {
547 		dvp = VTODV(rvp);
548 		rw_enter(&dvp->dv_contents, RW_READER);
549 		if (dvp->dv_priv) {
550 			dphold(dvp->dv_priv);
551 			*dpp = dvp->dv_priv;
552 			rval = 0;
553 		}
554 		rw_exit(&dvp->dv_contents);
555 	}
556 	return (rval);
557 }
558