xref: /titanic_50/usr/src/uts/common/fs/devfs/devfs_vfsops.c (revision 70a587dd392ff1dbaa2875c6c33921f08ea85273)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * This is the device filesystem.
30  *
31  * It is a combination of a namer to drive autoconfiguration,
32  * plus the access methods for the device drivers of the system.
33  *
34  * The prototype is fairly dependent on specfs for the latter part
35  * of its implementation, though a final version would integrate the two.
36  */
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/sysmacros.h>
40 #include <sys/systm.h>
41 #include <sys/kmem.h>
42 #include <sys/time.h>
43 #include <sys/pathname.h>
44 #include <sys/vfs.h>
45 #include <sys/vnode.h>
46 #include <sys/stat.h>
47 #include <sys/uio.h>
48 #include <sys/stat.h>
49 #include <sys/errno.h>
50 #include <sys/cmn_err.h>
51 #include <sys/cred.h>
52 #include <sys/statvfs.h>
53 #include <sys/mount.h>
54 #include <sys/debug.h>
55 #include <sys/modctl.h>
56 #include <fs/fs_subr.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/fs/snode.h>
59 #include <sys/sunndi.h>
60 #include <sys/policy.h>
61 #include <sys/sunmdi.h>
62 
63 /*
64  * devfs vfs operations.
65  */
66 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
67     struct cred *);
68 static int devfs_unmount(struct vfs *, int, struct cred *);
69 static int devfs_root(struct vfs *, struct vnode **);
70 static int devfs_statvfs(struct vfs *, struct statvfs64 *);
71 static int devfs_mountroot(struct vfs *, enum whymountroot);
72 
73 static int devfsinit(int, char *);
74 
75 static vfsdef_t devfs_vfssw = {
76 	VFSDEF_VERSION,
77 	"devfs",	/* type name string */
78 	devfsinit,	/* init routine */
79 	0,		/* flags */
80 	NULL		/* mount options table prototype */
81 };
82 
83 static kmutex_t devfs_lock;	/* protects global data */
84 static int devfstype;		/* fstype */
85 static dev_t devfsdev;		/* the fictious 'device' we live on */
86 static struct devfs_data *devfs_mntinfo;	/* linked list of instances */
87 
88 /*
89  * Module linkage information
90  */
91 static struct modlfs modlfs = {
92 	&mod_fsops, "devices filesystem %I%", &devfs_vfssw
93 };
94 
95 static struct modlinkage modlinkage = {
96 	MODREV_1, (void *)&modlfs, NULL
97 };
98 
99 int
100 _init(void)
101 {
102 	int e;
103 
104 	mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
105 	dv_node_cache_init();
106 	if ((e = mod_install(&modlinkage)) != 0) {
107 		dv_node_cache_fini();
108 		mutex_destroy(&devfs_lock);
109 		return (e);
110 	}
111 	dcmn_err(("devfs loaded\n"));
112 	return (0);
113 }
114 
115 int
116 _fini(void)
117 {
118 	return (EBUSY);
119 }
120 
121 int
122 _info(struct modinfo *modinfop)
123 {
124 	return (mod_info(&modlinkage, modinfop));
125 }
126 
127 /*ARGSUSED1*/
128 static int
129 devfsinit(int fstype, char *name)
130 {
131 	static const fs_operation_def_t devfs_vfsops_template[] = {
132 		VFSNAME_MOUNT, devfs_mount,
133 		VFSNAME_UNMOUNT, devfs_unmount,
134 		VFSNAME_ROOT, devfs_root,
135 		VFSNAME_STATVFS, devfs_statvfs,
136 		VFSNAME_SYNC, (fs_generic_func_p) fs_sync,
137 		VFSNAME_MOUNTROOT, devfs_mountroot,
138 		NULL, NULL
139 	};
140 	int error;
141 	int dev;
142 	extern major_t getudev(void);	/* gack - what a function */
143 
144 	devfstype = fstype;
145 	/*
146 	 * Associate VFS ops vector with this fstype
147 	 */
148 	error = vfs_setfsops(fstype, devfs_vfsops_template, NULL);
149 	if (error != 0) {
150 		cmn_err(CE_WARN, "devfsinit: bad vfs ops template");
151 		return (error);
152 	}
153 
154 	error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops);
155 	if (error != 0) {
156 		(void) vfs_freevfsops_by_type(fstype);
157 		cmn_err(CE_WARN, "devfsinit: bad vnode ops template");
158 		return (error);
159 	}
160 
161 	/*
162 	 * Invent a dev_t (sigh).
163 	 */
164 	if ((dev = getudev()) == (major_t)-1) {
165 		cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
166 		dev = 0;
167 	}
168 	devfsdev = makedevice(dev, 0);
169 
170 	return (0);
171 }
172 
173 /*
174  * The name of the mount point and the name of the attribute
175  * filesystem are passed down from userland for now.
176  */
177 static int
178 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
179     struct cred *cr)
180 {
181 	struct devfs_data *devfs_data;
182 	struct vnode *avp;
183 	struct dv_node *dv;
184 	struct vattr va;
185 
186 	dcmn_err(("devfs_mount\n"));
187 
188 	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
189 		return (EPERM);
190 
191 	/*
192 	 * check that the mount point is sane
193 	 */
194 	if (mvp->v_type != VDIR)
195 		return (ENOTDIR);
196 
197 	ASSERT(uap->flags & MS_SYSSPACE);
198 	/*
199 	 * Devfs can only be mounted from kernel during boot.
200 	 * avp is the existing /devices, the same as the mount point.
201 	 */
202 	avp = mvp;
203 
204 	/*
205 	 * Create and initialize the vfs-private data.
206 	 * This includes a hand-crafted root vnode (we build
207 	 * this here mostly so that traverse() doesn't sleep
208 	 * in VFS_ROOT()).
209 	 */
210 	mutex_enter(&devfs_lock);
211 	ASSERT(devfs_mntinfo == NULL);
212 	dv = dv_mkroot(vfsp, devfsdev);
213 	dv->dv_attrvp = avp;		/* attribute root vp */
214 
215 	ASSERT(dv == dv->dv_dotdot);
216 
217 	devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
218 	devfs_data->devfs_vfsp = vfsp;
219 	devfs_data->devfs_root = dv;
220 
221 	vfsp->vfs_data = (caddr_t)devfs_data;
222 	vfsp->vfs_fstype = devfstype;
223 	vfsp->vfs_dev = devfsdev;
224 	vfsp->vfs_bsize = DEV_BSIZE;
225 	vfsp->vfs_mtime = ddi_get_time();
226 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
227 
228 	/* We're there. */
229 	devfs_mntinfo = devfs_data;
230 	mutex_exit(&devfs_lock);
231 
232 	va.va_mask = AT_ATIME|AT_MTIME;
233 	gethrestime(&va.va_atime);
234 	gethrestime(&va.va_mtime);
235 	(void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL);
236 	return (0);
237 }
238 
239 
240 /*
241  * We never unmount devfs in a real production system.
242  */
243 /*ARGSUSED*/
244 static int
245 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
246 {
247 	return (EBUSY);
248 }
249 
250 /*
251  * return root vnode for given vfs
252  */
253 static int
254 devfs_root(struct vfs *vfsp, struct vnode **vpp)
255 {
256 	dcmn_err(("devfs_root\n"));
257 	*vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
258 	VN_HOLD(*vpp);
259 	return (0);
260 }
261 
262 /*
263  * return 'generic superblock' information to userland.
264  *
265  * not much that we can usefully admit to here
266  */
267 static int
268 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
269 {
270 	extern kmem_cache_t *dv_node_cache;
271 
272 	dev32_t d32;
273 
274 	dcmn_err(("devfs_statvfs\n"));
275 	bzero(sbp, sizeof (*sbp));
276 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
277 	/*
278 	 * We could compute the number of devfsnodes here .. but since
279 	 * it's dynamic anyway, it's not clear how useful this is.
280 	 */
281 	sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
282 
283 	/* no illusions that free/avail files is relevant to devfs */
284 	sbp->f_ffree = 0;
285 	sbp->f_favail = 0;
286 
287 	/* no illusions that blocks are relevant to devfs */
288 	sbp->f_bfree = 0;
289 	sbp->f_bavail = 0;
290 	sbp->f_blocks = 0;
291 
292 	(void) cmpldev(&d32, vfsp->vfs_dev);
293 	sbp->f_fsid = d32;
294 	(void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
295 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
296 	sbp->f_namemax = MAXNAMELEN - 1;
297 	(void) strcpy(sbp->f_fstr, "devices");
298 
299 	return (0);
300 }
301 
302 /*
303  * devfs always mount after root is mounted, so this should never
304  * be invoked.
305  */
306 /*ARGSUSED*/
307 static int
308 devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
309 {
310 	dcmn_err(("devfs_mountroot\n"));
311 
312 	return (EINVAL);
313 }
314 
315 struct dv_node *
316 devfs_dip_to_dvnode(dev_info_t *dip)
317 {
318 	char *dirpath;
319 	struct vnode *dirvp;
320 
321 	ASSERT(dip != NULL);
322 
323 	/* no-op if devfs not mounted yet */
324 	if (devfs_mntinfo == NULL)
325 		return (NULL);
326 
327 	/*
328 	 * The lookupname below only looks up cached dv_nodes
329 	 * because devfs_clean_key is set in thread specific data.
330 	 */
331 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
332 	(void) ddi_pathname(dip, dirpath);
333 	if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
334 		dcmn_err(("directory %s not found\n", dirpath));
335 		kmem_free(dirpath, MAXPATHLEN);
336 		return (NULL);
337 	}
338 
339 	kmem_free(dirpath, MAXPATHLEN);
340 	return (VTODV(dirvp));
341 }
342 
343 /*
344  * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
345  * and not a vHCI we also need to clean any vHCI branches because they
346  * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
347  * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
348  * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
349  * the vHCI is successful - which requires a clean vHCI branch to removed
350  * the devi_refs associated with devfs vnodes.
351  */
352 static int
353 devfs_clean_vhci(dev_info_t *dip, void *args)
354 {
355 	struct dv_node	*dvp;
356 	uint_t		flags = (uint_t)(uintptr_t)args;
357 
358 	(void) tsd_set(devfs_clean_key, (void *)1);
359 	dvp = devfs_dip_to_dvnode(dip);
360 	(void) tsd_set(devfs_clean_key, NULL);
361 	if (dvp) {
362 		(void) dv_cleandir(dvp, NULL, flags);
363 		VN_RELE(DVTOV(dvp));
364 	}
365 	return (DDI_WALK_CONTINUE);
366 }
367 
368 /*
369  * devfs_clean()
370  *
371  * Destroy unreferenced dv_node's and detach devices.
372  * Returns 0 on success, error if failed to unconfigure node.
373  *
374  * devfs caches unreferenced dv_node to speed by the performance
375  * of ls, find, etc. devfs_clean() is invoked to cleanup cached
376  * dv_nodes to reclaim memory as well as to facilitate device
377  * removal (dv_node reference devinfo nodes, which prevents driver
378  * detach).
379  *
380  * If a shell parks in a /devices directory, the dv_node will be
381  * held, preventing the corresponding device to be detached.
382  * This would be a denial of service against DR. To prevent this,
383  * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
384  * The dv_cleandir() implementation does the right thing to ensure
385  * successful DR.
386  */
387 int
388 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
389 {
390 	struct dv_node		*dvp;
391 	int			rval = 0;
392 
393 	dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
394 		(void *)dip, flags));
395 
396 	/* avoid recursion back into the device tree */
397 	(void) tsd_set(devfs_clean_key, (void *)1);
398 	dvp = devfs_dip_to_dvnode(dip);
399 	(void) tsd_set(devfs_clean_key, NULL);
400 	if (dvp == NULL)
401 		return (0);
402 
403 	if (dv_cleandir(dvp, devnm, flags) != 0)
404 		rval = EBUSY;
405 	VN_RELE(DVTOV(dvp));
406 
407 	/*
408 	 * If we are doing a DV_CLEAN_FORCE, and we did not start at the
409 	 * root, and we did not start at a vHCI node then clean vHCI
410 	 * branches too.  Failure to clean vHCI branch does not cause EBUSY.
411 	 *
412 	 * Also, to accommodate nexus callers that clean 'self' to DR 'child'
413 	 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
414 	 * above fails - this prevents a busy DR 'child' sibling from causing
415 	 * the DR of 'child' to fail because a vHCI branch was not cleaned.
416 	 */
417 	if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
418 	    (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
419 		/*
420 		 * NOTE: for backport the following is recommended
421 		 * 	(void) devfs_clean_vhci(scsi_vhci_dip,
422 		 *	    (void *)(uintptr_t)flags);
423 		 */
424 		mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
425 	}
426 
427 	return (rval);
428 }
429 
430 /*
431  * lookup a devfs relative pathname, returning held vnodes for the final
432  * component and the containing directory (if requested).
433  *
434  * NOTE: We can't use lookupname because this would use the current
435  *	processes credentials (CRED) in the call lookuppnvp instead
436  *	of kcred.  It also does not give you the flexibility so
437  * 	specify the directory to start the resolution in (devicesdir).
438  */
439 int
440 devfs_lookupname(
441 	char	*pathname,		/* user pathname */
442 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
443 	vnode_t **compvpp)		/* ret for ptr to component vnode */
444 {
445 	struct pathname	pn;
446 	int		error;
447 
448 	ASSERT(devicesdir);		/* devfs must be initialized */
449 	ASSERT(pathname);		/* must have some path */
450 
451 	if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
452 		return (error);
453 
454 	/* make the path relative to /devices. */
455 	pn_skipslash(&pn);
456 	if (pn_pathleft(&pn) == 0) {
457 		/* all we had was "\0" or "/" (which skipslash skiped) */
458 		if (dirvpp)
459 			*dirvpp = NULL;
460 		if (compvpp) {
461 			VN_HOLD(devicesdir);
462 			*compvpp = devicesdir;
463 		}
464 	} else {
465 		/*
466 		 * Use devfs lookup to resolve pathname to the vnode for
467 		 * the device via relative lookup in devfs. Extra holds for
468 		 * using devicesdir as directory we are searching and for
469 		 * being our root without being == rootdir.
470 		 */
471 		VN_HOLD(devicesdir);
472 		VN_HOLD(devicesdir);
473 		error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
474 		    devicesdir, devicesdir, kcred);
475 	}
476 	pn_free(&pn);
477 
478 	return (error);
479 }
480 
481 /*
482  * Given a devfs path (without the /devices prefix), walk
483  * the dv_node sub-tree rooted at the path.
484  */
485 int
486 devfs_walk(
487 	char		*path,
488 	void		(*callback)(struct dv_node *, void *),
489 	void		*arg)
490 {
491 	char *dirpath, *devnm;
492 	struct vnode	*dirvp;
493 
494 	ASSERT(path && callback);
495 
496 	if (*path != '/' || devfs_mntinfo == NULL)
497 		return (ENXIO);
498 
499 	dcmn_err(("devfs_walk: path = %s", path));
500 
501 	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
502 
503 	(void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
504 
505 	devnm = strrchr(dirpath, '/');
506 
507 	ASSERT(devnm);
508 
509 	*devnm++ = '\0';
510 
511 	if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
512 		dcmn_err(("directory %s not found\n", dirpath));
513 		kmem_free(dirpath, MAXPATHLEN);
514 		return (ENXIO);
515 	}
516 
517 	/*
518 	 * if path == "/", visit the root dv_node
519 	 */
520 	if (*devnm == '\0') {
521 		callback(VTODV(dirvp), arg);
522 		devnm = NULL;
523 	}
524 
525 	dv_walk(VTODV(dirvp), devnm, callback, arg);
526 
527 	VN_RELE(dirvp);
528 
529 	kmem_free(dirpath, MAXPATHLEN);
530 
531 	return (0);
532 }
533 
534 int
535 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
536 {
537 	struct vnode *rvp;
538 	struct dv_node *dvp;
539 	int rval = -1;
540 
541 	/* fail if devfs not mounted yet */
542 	if (devfs_mntinfo == NULL)
543 		return (rval);
544 
545 	if (VOP_REALVP(vp, &rvp) == 0 && vn_matchops(rvp, dv_vnodeops)) {
546 		dvp = VTODV(rvp);
547 		rw_enter(&dvp->dv_contents, RW_READER);
548 		if (dvp->dv_priv) {
549 			dphold(dvp->dv_priv);
550 			*dpp = dvp->dv_priv;
551 			rval = 0;
552 		}
553 		rw_exit(&dvp->dv_contents);
554 	}
555 	return (rval);
556 }
557