xref: /titanic_44/usr/src/uts/common/fs/dev/sdev_vfsops.c (revision 2a9459bdd821c1cf59590a7a9069ac9c591e8a6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * This is the /dev (hence, the sdev_ prefix) filesystem.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/systm.h>
36 #include <sys/kmem.h>
37 #include <sys/time.h>
38 #include <sys/pathname.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/vnode.h>
42 #include <sys/file.h>
43 #include <sys/stat.h>
44 #include <sys/uio.h>
45 #include <sys/stat.h>
46 #include <sys/errno.h>
47 #include <sys/cmn_err.h>
48 #include <sys/cred.h>
49 #include <sys/statvfs.h>
50 #include <sys/policy.h>
51 #include <sys/mount.h>
52 #include <sys/debug.h>
53 #include <sys/modctl.h>
54 #include <sys/mkdev.h>
55 #include <fs/fs_subr.h>
56 #include <sys/fs/sdev_impl.h>
57 #include <sys/fs/sdev_node.h>
58 #include <sys/fs/snode.h>
59 #include <sys/fs/dv_node.h>
60 #include <sys/sunndi.h>
61 #include <sys/mntent.h>
62 
63 /*
64  * /dev vfs operations.
65  */
66 
67 /*
68  * globals
69  */
70 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
71 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
72 
73 /*
74  * static
75  */
76 static major_t devmajor;	/* the fictitious major we live on */
77 static major_t devminor;	/* the fictitious minor of this instance */
78 static struct sdev_data *sdev_mntinfo = NULL;	/* linked list of instances */
79 
80 /* LINTED E_STATIC_UNUSED */		/* useful for debugging */
81 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
82 
83 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
84     struct cred *);
85 static int sdev_unmount(struct vfs *, int, struct cred *);
86 static int sdev_root(struct vfs *, struct vnode **);
87 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
88 static void sdev_insert_mntinfo(struct sdev_data *);
89 static int devinit(int, char *);
90 
91 static vfsdef_t sdev_vfssw = {
92 	VFSDEF_VERSION,
93 	"dev",		/* type name string */
94 	devinit,	/* init routine */
95 	VSW_CANREMOUNT,	/* flags */
96 	NULL		/* mount options table prototype */
97 };
98 
99 
100 /*
101  * Module linkage information
102  */
103 static struct modlfs modlfs = {
104 	&mod_fsops, "/dev filesystem %I%", &sdev_vfssw
105 };
106 
107 static struct modlinkage modlinkage = {
108 	MODREV_1, (void *)&modlfs, NULL
109 };
110 
111 int
112 _init(void)
113 {
114 	int e;
115 
116 	mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
117 	sdev_node_cache_init();
118 	sdev_devfsadm_lockinit();
119 	if ((e = mod_install(&modlinkage)) != 0) {
120 		sdev_devfsadm_lockdestroy();
121 		sdev_node_cache_fini();
122 		mutex_destroy(&sdev_lock);
123 		return (e);
124 	}
125 	return (0);
126 }
127 
128 /*
129  * dev module remained loaded for the global /dev instance
130  */
131 int
132 _fini(void)
133 {
134 	return (EBUSY);
135 }
136 
137 int
138 _info(struct modinfo *modinfop)
139 {
140 	return (mod_info(&modlinkage, modinfop));
141 }
142 
143 /*ARGSUSED*/
144 static int
145 devinit(int fstype, char *name)
146 {
147 	static const fs_operation_def_t dev_vfsops_tbl[] = {
148 		VFSNAME_MOUNT,		{ .vfs_mount = sdev_mount },
149 		VFSNAME_UNMOUNT,	{ .vfs_unmount = sdev_unmount },
150 		VFSNAME_ROOT, 		{ .vfs_root = sdev_root },
151 		VFSNAME_STATVFS,	{ .vfs_statvfs = sdev_statvfs },
152 		NULL,			NULL
153 	};
154 
155 	int	error;
156 	extern major_t getudev(void);
157 
158 	devtype = fstype;
159 
160 	error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
161 	if (error != 0) {
162 		cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
163 		return (error);
164 	}
165 
166 	error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
167 	if (error != 0) {
168 		(void) vfs_freevfsops_by_type(fstype);
169 		cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
170 		return (error);
171 	}
172 
173 	if ((devmajor = getudev()) == (major_t)-1) {
174 		cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
175 		return (1);
176 	}
177 
178 	/* initialize negative cache */
179 	sdev_ncache_init();
180 
181 	return (0);
182 }
183 
184 /*
185  * Both mount point and backing store directory name are
186  * passed in from userland
187  */
188 static int
189 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
190     struct cred *cr)
191 {
192 	struct sdev_data *sdev_data;
193 	struct vnode *avp;
194 	struct sdev_node *dv;
195 	struct sdev_mountargs *args = NULL;
196 	int	error = 0;
197 	dev_t	devdev;
198 
199 	/*
200 	 * security check
201 	 */
202 	if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
203 	    (secpolicy_sys_devices(cr) != 0))
204 		return (EPERM);
205 
206 	/*
207 	 * Sanity check the mount point
208 	 */
209 	if (mvp->v_type != VDIR)
210 		return (ENOTDIR);
211 
212 	/*
213 	 * Sanity Check for overlay mount.
214 	 */
215 	mutex_enter(&mvp->v_lock);
216 	if ((uap->flags & MS_OVERLAY) == 0 &&
217 	    (uap->flags & MS_REMOUNT) == 0 &&
218 	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
219 		mutex_exit(&mvp->v_lock);
220 		return (EBUSY);
221 	}
222 	mutex_exit(&mvp->v_lock);
223 
224 	args = kmem_zalloc(sizeof (*args), KM_SLEEP);
225 
226 	if ((uap->flags & MS_DATA) &&
227 	    (uap->datalen != 0 && uap->dataptr != NULL)) {
228 		/* copy in the arguments */
229 		if (error = sdev_copyin_mountargs(uap, args))
230 			goto cleanup;
231 	}
232 
233 	/*
234 	 * Sanity check the backing store
235 	 */
236 	if (args->sdev_attrdir) {
237 		/* user supplied an attribute store */
238 		if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
239 		    UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
240 			cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
241 			    "directory %s failed",
242 			    (char *)(uintptr_t)args->sdev_attrdir);
243 			goto cleanup;
244 		}
245 
246 		if (avp->v_type != VDIR) {
247 			VN_RELE(avp);
248 			error = ENOTDIR;
249 			goto cleanup;
250 		}
251 	} else {
252 		/* use mountp as the attribute store */
253 		avp = mvp;
254 		VN_HOLD(avp);
255 	}
256 
257 	mutex_enter(&sdev_lock);
258 
259 	/*
260 	 * handling installation
261 	 */
262 	if (uap->flags & MS_REMOUNT) {
263 		sdev_data = (struct sdev_data *)vfsp->vfs_data;
264 		ASSERT(sdev_data);
265 
266 		dv = sdev_data->sdev_root;
267 		ASSERT(dv == dv->sdev_dotdot);
268 
269 		/*
270 		 * mark all existing sdev_nodes (except root node) stale
271 		 */
272 		sdev_stale(dv);
273 
274 		/* Reset previous mountargs */
275 		if (sdev_data->sdev_mountargs) {
276 			kmem_free(sdev_data->sdev_mountargs,
277 			    sizeof (struct sdev_mountargs));
278 		}
279 		sdev_data->sdev_mountargs = args;
280 		args = NULL;		/* so it won't be freed below */
281 
282 		sdev_stale_attrvp = dv->sdev_attrvp;
283 		dv->sdev_attrvp = avp;
284 		vfsp->vfs_mtime = ddi_get_time();
285 
286 		mutex_exit(&sdev_lock);
287 		goto cleanup;				/* we're done */
288 	}
289 
290 	/*
291 	 * Create and initialize the vfs-private data.
292 	 */
293 	devdev = makedevice(devmajor, devminor);
294 	while (vfs_devismounted(devdev)) {
295 		devminor = (devminor + 1) & MAXMIN32;
296 
297 		/*
298 		 * All the minor numbers are used up.
299 		 */
300 		if (devminor == 0) {
301 			mutex_exit(&sdev_lock);
302 			VN_RELE(avp);
303 			error = ENODEV;
304 			goto cleanup;
305 		}
306 
307 		devdev = makedevice(devmajor, devminor);
308 	}
309 
310 	dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
311 	sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
312 	vfsp->vfs_dev = devdev;
313 	vfsp->vfs_data = (caddr_t)sdev_data;
314 	vfsp->vfs_fstype = devtype;
315 	vfsp->vfs_bsize = DEV_BSIZE;
316 	vfsp->vfs_mtime = ddi_get_time();
317 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
318 
319 	ASSERT(dv == dv->sdev_dotdot);
320 
321 	sdev_data->sdev_vfsp = vfsp;
322 	sdev_data->sdev_root = dv;
323 	sdev_data->sdev_mountargs = args;
324 
325 	/* get acl flavor from attribute dir */
326 	if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
327 	    kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
328 		sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
329 
330 	args = NULL;			/* so it won't be freed below */
331 	sdev_insert_mntinfo(sdev_data);
332 	mutex_exit(&sdev_lock);
333 
334 	if (!SDEV_IS_GLOBAL(dv)) {
335 		ASSERT(sdev_origins);
336 		dv->sdev_flags &= ~SDEV_GLOBAL;
337 		dv->sdev_origin = sdev_origins->sdev_root;
338 	} else {
339 		sdev_ncache_setup();
340 		rw_enter(&dv->sdev_contents, RW_WRITER);
341 		sdev_filldir_dynamic(dv);
342 		rw_exit(&dv->sdev_contents);
343 	}
344 
345 	sdev_update_timestamps(dv->sdev_attrvp,
346 		cr, AT_CTIME|AT_MTIME|AT_ATIME);
347 
348 cleanup:
349 	if (args)
350 		kmem_free(args, sizeof (*args));
351 	return (error);
352 }
353 
354 /*
355  * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
356  */
357 static int
358 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
359 {
360 	struct sdev_node *dv;
361 	int error;
362 	struct sdev_data *sdev_data, *prev, *next;
363 
364 	/*
365 	 * enforce the security policies
366 	 */
367 	if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
368 	    (secpolicy_sys_devices(cr) != 0))
369 		return (EPERM);
370 
371 	if (flag & MS_FORCE)
372 		return (ENOTSUP);
373 
374 	mutex_enter(&sdev_lock);
375 	dv = VFSTOSDEVFS(vfsp)->sdev_root;
376 	ASSERT(dv == dv->sdev_dotdot);
377 	if (SDEVTOV(dv)->v_count > 1) {
378 		mutex_exit(&sdev_lock);
379 		return (EBUSY);
380 	}
381 
382 	/*
383 	 * global instance remains mounted
384 	 */
385 	if (SDEV_IS_GLOBAL(dv)) {
386 		mutex_exit(&sdev_lock);
387 		return (EBUSY);
388 	}
389 	mutex_exit(&sdev_lock);
390 
391 	/* verify the v_count */
392 	if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
393 		return (error);
394 	}
395 	ASSERT(SDEVTOV(dv)->v_count == 1);
396 
397 	/* release hold on root node and destroy it */
398 	SDEV_RELE(dv);
399 	dv->sdev_nlink -= 2;
400 	sdev_nodedestroy(dv, 0);
401 
402 	sdev_data = (struct sdev_data *)vfsp->vfs_data;
403 	vfsp->vfs_data = (caddr_t)0;
404 
405 	/*
406 	 * XXX separate it into sdev_delete_mntinfo() if useful
407 	 */
408 	mutex_enter(&sdev_lock);
409 	prev = sdev_data->sdev_prev;
410 	next = sdev_data->sdev_next;
411 	if (prev)
412 		prev->sdev_next = next;
413 	else
414 		sdev_mntinfo = next;
415 	if (next)
416 		next->sdev_prev = prev;
417 	mutex_exit(&sdev_lock);
418 
419 	if (sdev_data->sdev_mountargs) {
420 		kmem_free(sdev_data->sdev_mountargs,
421 		    sizeof (struct sdev_mountargs));
422 	}
423 	kmem_free(sdev_data, sizeof (struct sdev_data));
424 	return (0);
425 }
426 
427 /*
428  * return root vnode for given vfs
429  */
430 static int
431 sdev_root(struct vfs *vfsp, struct vnode **vpp)
432 {
433 	*vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
434 	VN_HOLD(*vpp);
435 	return (0);
436 }
437 
438 /*
439  * return 'generic superblock' information to userland.
440  *
441  * not much that we can usefully admit to here
442  */
443 static int
444 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
445 {
446 	dev32_t d32;
447 
448 	bzero(sbp, sizeof (*sbp));
449 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
450 	sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
451 
452 	/* no illusions that free/avail files is relevant to dev */
453 	sbp->f_ffree = 0;
454 	sbp->f_favail = 0;
455 
456 	/* no illusions that blocks are relevant to devfs */
457 	sbp->f_bfree = 0;
458 	sbp->f_bavail = 0;
459 	sbp->f_blocks = 0;
460 
461 	(void) cmpldev(&d32, vfsp->vfs_dev);
462 	sbp->f_fsid = d32;
463 	(void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
464 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
465 	sbp->f_namemax = MAXNAMELEN - 1;
466 	(void) strcpy(sbp->f_fstr, "dev");
467 
468 	return (0);
469 }
470 
471 int
472 sdev_module_register(char *mod_name, struct devname_ops *dev_ops)
473 {
474 	struct devname_nsmap *map = NULL;
475 
476 	if (strcmp(mod_name, DEVNAME_NSCONFIG) == 0) {
477 		devname_ns_ops = dev_ops;
478 		return (0);
479 	}
480 
481 	map = sdev_get_nsmap_by_module(mod_name);
482 	if (map == NULL)
483 		return (EFAULT);
484 
485 	rw_enter(&map->dir_lock, RW_WRITER);
486 	map->dir_ops = dev_ops;
487 	rw_exit(&map->dir_lock);
488 	return (0);
489 }
490 
491 static void
492 sdev_insert_mntinfo(struct sdev_data *data)
493 {
494 	ASSERT(mutex_owned(&sdev_lock));
495 	data->sdev_next = sdev_mntinfo;
496 	data->sdev_prev = NULL;
497 	if (sdev_mntinfo) {
498 		sdev_mntinfo->sdev_prev = data;
499 	} else {
500 		sdev_origins = data;
501 	}
502 	sdev_mntinfo = data;
503 }
504 
505 struct sdev_data *
506 sdev_find_mntinfo(char *mntpt)
507 {
508 	struct sdev_data *mntinfo;
509 
510 	mutex_enter(&sdev_lock);
511 	mntinfo = sdev_mntinfo;
512 	while (mntinfo) {
513 		if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
514 			SDEVTOV(mntinfo->sdev_root)->v_count++;
515 			break;
516 		}
517 		mntinfo = mntinfo->sdev_next;
518 	}
519 	mutex_exit(&sdev_lock);
520 	return (mntinfo);
521 }
522 
523 void
524 sdev_mntinfo_rele(struct sdev_data *mntinfo)
525 {
526 	mutex_enter(&sdev_lock);
527 	SDEVTOV(mntinfo->sdev_root)->v_count--;
528 	mutex_exit(&sdev_lock);
529 }
530