xref: /titanic_52/usr/src/uts/common/fs/dev/sdev_vfsops.c (revision 2fb4439d628ad2df0775287be1abd1ed95e7d267)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2015 Joyent, Inc.  All rights reserved.
25  */
26 
27 /*
28  * This is the /dev (hence, the sdev_ prefix) filesystem.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/kmem.h>
36 #include <sys/time.h>
37 #include <sys/pathname.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/stat.h>
43 #include <sys/uio.h>
44 #include <sys/stat.h>
45 #include <sys/errno.h>
46 #include <sys/cmn_err.h>
47 #include <sys/cred.h>
48 #include <sys/statvfs.h>
49 #include <sys/policy.h>
50 #include <sys/mount.h>
51 #include <sys/debug.h>
52 #include <sys/modctl.h>
53 #include <sys/mkdev.h>
54 #include <fs/fs_subr.h>
55 #include <sys/fs/sdev_impl.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/sunndi.h>
59 #include <sys/mntent.h>
60 #include <sys/disp.h>
61 
62 /*
63  * /dev vfs operations.
64  */
65 
66 /*
67  * globals
68  */
69 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
70 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
71 taskq_t *sdev_taskq = NULL;
72 
73 /*
74  * static
75  */
76 static major_t devmajor;	/* the fictitious major we live on */
77 static major_t devminor;	/* the fictitious minor of this instance */
78 static struct sdev_data *sdev_mntinfo = NULL;	/* linked list of instances */
79 
80 /* LINTED E_STATIC_UNUSED */		/* useful for debugging */
81 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
82 
83 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
84     struct cred *);
85 static int sdev_unmount(struct vfs *, int, struct cred *);
86 static int sdev_root(struct vfs *, struct vnode **);
87 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
88 static void sdev_insert_mntinfo(struct sdev_data *);
89 static int devinit(int, char *);
90 
91 static vfsdef_t sdev_vfssw = {
92 	VFSDEF_VERSION,
93 	"dev",		/* type name string */
94 	devinit,	/* init routine */
95 	VSW_CANREMOUNT,	/* flags */
96 	NULL		/* mount options table prototype */
97 };
98 
99 
100 /*
101  * Module linkage information
102  */
103 static struct modlfs modlfs = {
104 	&mod_fsops, "/dev filesystem", &sdev_vfssw
105 };
106 
107 static struct modlinkage modlinkage = {
108 	MODREV_1, (void *)&modlfs, NULL
109 };
110 
111 int
112 _init(void)
113 {
114 	int e;
115 
116 	mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
117 	sdev_node_cache_init();
118 	sdev_devfsadm_lockinit();
119 	if ((e = mod_install(&modlinkage)) != 0) {
120 		sdev_devfsadm_lockdestroy();
121 		sdev_node_cache_fini();
122 		mutex_destroy(&sdev_lock);
123 		return (e);
124 	}
125 	return (0);
126 }
127 
128 /*
129  * dev module remained loaded for the global /dev instance
130  */
131 int
132 _fini(void)
133 {
134 	return (EBUSY);
135 }
136 
137 int
138 _info(struct modinfo *modinfop)
139 {
140 	return (mod_info(&modlinkage, modinfop));
141 }
142 
143 /*ARGSUSED*/
144 static int
145 devinit(int fstype, char *name)
146 {
147 	static const fs_operation_def_t dev_vfsops_tbl[] = {
148 		VFSNAME_MOUNT,		{ .vfs_mount = sdev_mount },
149 		VFSNAME_UNMOUNT,	{ .vfs_unmount = sdev_unmount },
150 		VFSNAME_ROOT, 		{ .vfs_root = sdev_root },
151 		VFSNAME_STATVFS,	{ .vfs_statvfs = sdev_statvfs },
152 		NULL,			NULL
153 	};
154 
155 	int	error;
156 	extern major_t getudev(void);
157 
158 	devtype = fstype;
159 
160 	error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
161 	if (error != 0) {
162 		cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
163 		return (error);
164 	}
165 
166 	error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
167 	if (error != 0) {
168 		(void) vfs_freevfsops_by_type(fstype);
169 		cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
170 		return (error);
171 	}
172 
173 	if ((devmajor = getudev()) == (major_t)-1) {
174 		cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
175 		return (1);
176 	}
177 
178 	/* initialize negative cache */
179 	sdev_ncache_init();
180 
181 	return (0);
182 }
183 
184 /*
185  * Both mount point and backing store directory name are
186  * passed in from userland
187  */
188 static int
189 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
190     struct cred *cr)
191 {
192 	struct sdev_data *sdev_data;
193 	struct vnode *avp;
194 	struct sdev_node *dv;
195 	struct sdev_mountargs *args = NULL;
196 	int	error = 0;
197 	dev_t	devdev;
198 
199 	/*
200 	 * security check
201 	 */
202 	if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
203 	    (secpolicy_sys_devices(cr) != 0))
204 		return (EPERM);
205 
206 	/*
207 	 * Sanity check the mount point
208 	 */
209 	if (mvp->v_type != VDIR)
210 		return (ENOTDIR);
211 
212 	/*
213 	 * Sanity Check for overlay mount.
214 	 */
215 	mutex_enter(&mvp->v_lock);
216 	if ((uap->flags & MS_OVERLAY) == 0 &&
217 	    (uap->flags & MS_REMOUNT) == 0 &&
218 	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
219 		mutex_exit(&mvp->v_lock);
220 		return (EBUSY);
221 	}
222 	mutex_exit(&mvp->v_lock);
223 
224 	args = kmem_zalloc(sizeof (*args), KM_SLEEP);
225 
226 	if ((uap->flags & MS_DATA) &&
227 	    (uap->datalen != 0 && uap->dataptr != NULL)) {
228 		/* copy in the arguments */
229 		if (error = sdev_copyin_mountargs(uap, args))
230 			goto cleanup;
231 	}
232 
233 	/*
234 	 * Sanity check the backing store
235 	 */
236 	if (args->sdev_attrdir) {
237 		/* user supplied an attribute store */
238 		if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
239 		    UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
240 			cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
241 			    "directory %s failed",
242 			    (char *)(uintptr_t)args->sdev_attrdir);
243 			goto cleanup;
244 		}
245 
246 		if (avp->v_type != VDIR) {
247 			VN_RELE(avp);
248 			error = ENOTDIR;
249 			goto cleanup;
250 		}
251 	} else {
252 		/* use mountp as the attribute store */
253 		avp = mvp;
254 		VN_HOLD(avp);
255 	}
256 
257 	mutex_enter(&sdev_lock);
258 
259 	/*
260 	 * Check that the taskq has been created. We can't do this in our
261 	 * _init or devinit because they run too early for ddi_taskq_create.
262 	 */
263 	if (sdev_taskq == NULL) {
264 		sdev_taskq = taskq_create("sdev", 1, minclsyspri, 1, 1, 0);
265 		if (sdev_taskq == NULL) {
266 			error = ENOMEM;
267 			mutex_exit(&sdev_lock);
268 			VN_RELE(avp);
269 			goto cleanup;
270 		}
271 	}
272 
273 	/*
274 	 * handling installation
275 	 */
276 	if (uap->flags & MS_REMOUNT) {
277 		sdev_data = (struct sdev_data *)vfsp->vfs_data;
278 		ASSERT(sdev_data);
279 
280 		dv = sdev_data->sdev_root;
281 		ASSERT(dv == dv->sdev_dotdot);
282 
283 		/*
284 		 * mark all existing sdev_nodes (except root node) stale
285 		 */
286 		sdev_stale(dv);
287 
288 		/* Reset previous mountargs */
289 		if (sdev_data->sdev_mountargs) {
290 			kmem_free(sdev_data->sdev_mountargs,
291 			    sizeof (struct sdev_mountargs));
292 		}
293 		sdev_data->sdev_mountargs = args;
294 		args = NULL;		/* so it won't be freed below */
295 
296 		sdev_stale_attrvp = dv->sdev_attrvp;
297 		dv->sdev_attrvp = avp;
298 		vfsp->vfs_mtime = ddi_get_time();
299 
300 		mutex_exit(&sdev_lock);
301 		goto cleanup;				/* we're done */
302 	}
303 
304 	/*
305 	 * Create and initialize the vfs-private data.
306 	 */
307 	devdev = makedevice(devmajor, devminor);
308 	while (vfs_devismounted(devdev)) {
309 		devminor = (devminor + 1) & MAXMIN32;
310 
311 		/*
312 		 * All the minor numbers are used up.
313 		 */
314 		if (devminor == 0) {
315 			mutex_exit(&sdev_lock);
316 			VN_RELE(avp);
317 			error = ENODEV;
318 			goto cleanup;
319 		}
320 
321 		devdev = makedevice(devmajor, devminor);
322 	}
323 
324 	dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
325 	sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
326 	vfsp->vfs_dev = devdev;
327 	vfsp->vfs_data = (caddr_t)sdev_data;
328 	vfsp->vfs_fstype = devtype;
329 	vfsp->vfs_bsize = DEV_BSIZE;
330 	vfsp->vfs_mtime = ddi_get_time();
331 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
332 
333 	ASSERT(dv == dv->sdev_dotdot);
334 
335 	sdev_data->sdev_vfsp = vfsp;
336 	sdev_data->sdev_root = dv;
337 	sdev_data->sdev_mountargs = args;
338 
339 	/* get acl flavor from attribute dir */
340 	if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
341 	    kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
342 		sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
343 
344 	args = NULL;			/* so it won't be freed below */
345 	sdev_insert_mntinfo(sdev_data);
346 	mutex_exit(&sdev_lock);
347 
348 	if (!SDEV_IS_GLOBAL(dv)) {
349 		ASSERT(sdev_origins);
350 		dv->sdev_flags &= ~SDEV_GLOBAL;
351 		dv->sdev_origin = sdev_origins->sdev_root;
352 	} else {
353 		sdev_ncache_setup();
354 		rw_enter(&dv->sdev_contents, RW_WRITER);
355 		sdev_filldir_dynamic(dv);
356 		rw_exit(&dv->sdev_contents);
357 	}
358 
359 	sdev_update_timestamps(dv->sdev_attrvp,
360 	    cr, AT_CTIME|AT_MTIME|AT_ATIME);
361 
362 cleanup:
363 	if (args)
364 		kmem_free(args, sizeof (*args));
365 	return (error);
366 }
367 
368 /*
369  * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
370  */
371 static int
372 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
373 {
374 	struct sdev_node *dv;
375 	int error;
376 	struct sdev_data *sdev_data, *prev, *next;
377 
378 	/*
379 	 * enforce the security policies
380 	 */
381 	if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
382 	    (secpolicy_sys_devices(cr) != 0))
383 		return (EPERM);
384 
385 	if (flag & MS_FORCE)
386 		return (ENOTSUP);
387 
388 	mutex_enter(&sdev_lock);
389 	dv = VFSTOSDEVFS(vfsp)->sdev_root;
390 	ASSERT(dv == dv->sdev_dotdot);
391 	if (SDEVTOV(dv)->v_count > 1) {
392 		mutex_exit(&sdev_lock);
393 		return (EBUSY);
394 	}
395 
396 	/*
397 	 * global instance remains mounted
398 	 */
399 	if (SDEV_IS_GLOBAL(dv)) {
400 		mutex_exit(&sdev_lock);
401 		return (EBUSY);
402 	}
403 	mutex_exit(&sdev_lock);
404 
405 	/* verify the v_count */
406 	if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
407 		return (error);
408 	}
409 	ASSERT(SDEVTOV(dv)->v_count == 1);
410 
411 	/* release hold on root node and destroy it */
412 	SDEV_RELE(dv);
413 	dv->sdev_nlink -= 2;
414 	sdev_nodedestroy(dv, 0);
415 
416 	sdev_data = (struct sdev_data *)vfsp->vfs_data;
417 	vfsp->vfs_data = (caddr_t)0;
418 
419 	/*
420 	 * XXX separate it into sdev_delete_mntinfo() if useful
421 	 */
422 	mutex_enter(&sdev_lock);
423 	prev = sdev_data->sdev_prev;
424 	next = sdev_data->sdev_next;
425 	if (prev)
426 		prev->sdev_next = next;
427 	else
428 		sdev_mntinfo = next;
429 	if (next)
430 		next->sdev_prev = prev;
431 	mutex_exit(&sdev_lock);
432 
433 	if (sdev_data->sdev_mountargs) {
434 		kmem_free(sdev_data->sdev_mountargs,
435 		    sizeof (struct sdev_mountargs));
436 	}
437 	kmem_free(sdev_data, sizeof (struct sdev_data));
438 	return (0);
439 }
440 
441 /*
442  * return root vnode for given vfs
443  */
444 static int
445 sdev_root(struct vfs *vfsp, struct vnode **vpp)
446 {
447 	*vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
448 	VN_HOLD(*vpp);
449 	return (0);
450 }
451 
452 /*
453  * return 'generic superblock' information to userland.
454  *
455  * not much that we can usefully admit to here
456  */
457 static int
458 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
459 {
460 	dev32_t d32;
461 
462 	bzero(sbp, sizeof (*sbp));
463 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
464 	sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
465 
466 	/* no illusions that free/avail files is relevant to dev */
467 	sbp->f_ffree = 0;
468 	sbp->f_favail = 0;
469 
470 	/* no illusions that blocks are relevant to devfs */
471 	sbp->f_bfree = 0;
472 	sbp->f_bavail = 0;
473 	sbp->f_blocks = 0;
474 
475 	(void) cmpldev(&d32, vfsp->vfs_dev);
476 	sbp->f_fsid = d32;
477 	(void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
478 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
479 	sbp->f_namemax = MAXNAMELEN - 1;
480 	(void) strcpy(sbp->f_fstr, "dev");
481 
482 	return (0);
483 }
484 
485 static void
486 sdev_insert_mntinfo(struct sdev_data *data)
487 {
488 	ASSERT(mutex_owned(&sdev_lock));
489 	data->sdev_next = sdev_mntinfo;
490 	data->sdev_prev = NULL;
491 	if (sdev_mntinfo) {
492 		sdev_mntinfo->sdev_prev = data;
493 	} else {
494 		sdev_origins = data;
495 	}
496 	sdev_mntinfo = data;
497 }
498 
499 struct sdev_data *
500 sdev_find_mntinfo(char *mntpt)
501 {
502 	struct sdev_data *mntinfo;
503 
504 	mutex_enter(&sdev_lock);
505 	mntinfo = sdev_mntinfo;
506 	while (mntinfo) {
507 		if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
508 			SDEVTOV(mntinfo->sdev_root)->v_count++;
509 			break;
510 		}
511 		mntinfo = mntinfo->sdev_next;
512 	}
513 	mutex_exit(&sdev_lock);
514 	return (mntinfo);
515 }
516 
517 void
518 sdev_mntinfo_rele(struct sdev_data *mntinfo)
519 {
520 	mutex_enter(&sdev_lock);
521 	SDEVTOV(mntinfo->sdev_root)->v_count--;
522 	mutex_exit(&sdev_lock);
523 }
524