xref: /illumos-gate/usr/src/uts/common/fs/dev/sdev_vfsops.c (revision e34d8872f4a713d904a4b34fb081060d1a7eba62)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2015 Joyent, Inc.  All rights reserved.
25  * Copyright (c) 2017 by Delphix. All rights reserved.
26  */
27 
28 /*
29  * This is the /dev (hence, the sdev_ prefix) filesystem.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/systm.h>
36 #include <sys/kmem.h>
37 #include <sys/time.h>
38 #include <sys/pathname.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/vnode.h>
42 #include <sys/file.h>
43 #include <sys/stat.h>
44 #include <sys/uio.h>
45 #include <sys/stat.h>
46 #include <sys/errno.h>
47 #include <sys/cmn_err.h>
48 #include <sys/cred.h>
49 #include <sys/statvfs.h>
50 #include <sys/policy.h>
51 #include <sys/mount.h>
52 #include <sys/debug.h>
53 #include <sys/modctl.h>
54 #include <sys/mkdev.h>
55 #include <fs/fs_subr.h>
56 #include <sys/fs/sdev_impl.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/sunndi.h>
60 #include <sys/mntent.h>
61 #include <sys/disp.h>
62 
63 /*
64  * /dev vfs operations.
65  */
66 
67 /*
68  * globals
69  */
70 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
71 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
72 taskq_t *sdev_taskq = NULL;
73 
74 /*
75  * static
76  */
77 static major_t devmajor;	/* the fictitious major we live on */
78 static major_t devminor;	/* the fictitious minor of this instance */
79 static struct sdev_data *sdev_mntinfo = NULL;	/* linked list of instances */
80 
81 /* LINTED E_STATIC_UNUSED */		/* useful for debugging */
82 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
83 
84 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
85     struct cred *);
86 static int sdev_unmount(struct vfs *, int, struct cred *);
87 static int sdev_root(struct vfs *, struct vnode **);
88 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
89 static void sdev_insert_mntinfo(struct sdev_data *);
90 static int devinit(int, char *);
91 
92 static vfsdef_t sdev_vfssw = {
93 	VFSDEF_VERSION,
94 	"dev",		/* type name string */
95 	devinit,	/* init routine */
96 	VSW_CANREMOUNT,	/* flags */
97 	NULL		/* mount options table prototype */
98 };
99 
100 
101 /*
102  * Module linkage information
103  */
104 static struct modlfs modlfs = {
105 	&mod_fsops, "/dev filesystem", &sdev_vfssw
106 };
107 
108 static struct modlinkage modlinkage = {
109 	MODREV_1, (void *)&modlfs, NULL
110 };
111 
112 int
113 _init(void)
114 {
115 	int e;
116 
117 	mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
118 	sdev_node_cache_init();
119 	sdev_devfsadm_lockinit();
120 	if ((e = mod_install(&modlinkage)) != 0) {
121 		sdev_devfsadm_lockdestroy();
122 		sdev_node_cache_fini();
123 		mutex_destroy(&sdev_lock);
124 		return (e);
125 	}
126 	return (0);
127 }
128 
129 /*
130  * dev module remained loaded for the global /dev instance
131  */
132 int
133 _fini(void)
134 {
135 	return (EBUSY);
136 }
137 
138 int
139 _info(struct modinfo *modinfop)
140 {
141 	return (mod_info(&modlinkage, modinfop));
142 }
143 
144 /*ARGSUSED*/
145 static int
146 devinit(int fstype, char *name)
147 {
148 	static const fs_operation_def_t dev_vfsops_tbl[] = {
149 		VFSNAME_MOUNT,		{ .vfs_mount = sdev_mount },
150 		VFSNAME_UNMOUNT,	{ .vfs_unmount = sdev_unmount },
151 		VFSNAME_ROOT, 		{ .vfs_root = sdev_root },
152 		VFSNAME_STATVFS,	{ .vfs_statvfs = sdev_statvfs },
153 		NULL,			NULL
154 	};
155 
156 	int	error;
157 	extern major_t getudev(void);
158 
159 	devtype = fstype;
160 
161 	error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
162 	if (error != 0) {
163 		cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
164 		return (error);
165 	}
166 
167 	error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
168 	if (error != 0) {
169 		(void) vfs_freevfsops_by_type(fstype);
170 		cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
171 		return (error);
172 	}
173 
174 	if ((devmajor = getudev()) == (major_t)-1) {
175 		cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
176 		return (1);
177 	}
178 
179 	/* initialize negative cache */
180 	sdev_ncache_init();
181 
182 	return (0);
183 }
184 
185 /*
186  * Both mount point and backing store directory name are
187  * passed in from userland
188  */
189 static int
190 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
191     struct cred *cr)
192 {
193 	struct sdev_data *sdev_data;
194 	struct vnode *avp;
195 	struct sdev_node *dv;
196 	struct sdev_mountargs *args = NULL;
197 	int	error = 0;
198 	dev_t	devdev;
199 
200 	/*
201 	 * security check
202 	 */
203 	if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
204 	    (secpolicy_sys_devices(cr) != 0))
205 		return (EPERM);
206 
207 	/*
208 	 * Sanity check the mount point
209 	 */
210 	if (mvp->v_type != VDIR)
211 		return (ENOTDIR);
212 
213 	/*
214 	 * Sanity Check for overlay mount.
215 	 */
216 	mutex_enter(&mvp->v_lock);
217 	if ((uap->flags & MS_OVERLAY) == 0 &&
218 	    (uap->flags & MS_REMOUNT) == 0 &&
219 	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
220 		mutex_exit(&mvp->v_lock);
221 		return (EBUSY);
222 	}
223 	mutex_exit(&mvp->v_lock);
224 
225 	args = kmem_zalloc(sizeof (*args), KM_SLEEP);
226 
227 	if ((uap->flags & MS_DATA) &&
228 	    (uap->datalen != 0 && uap->dataptr != NULL)) {
229 		/* copy in the arguments */
230 		if (error = sdev_copyin_mountargs(uap, args))
231 			goto cleanup;
232 	}
233 
234 	/*
235 	 * Sanity check the backing store
236 	 */
237 	if (args->sdev_attrdir) {
238 		/* user supplied an attribute store */
239 		if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
240 		    UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
241 			cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
242 			    "directory %s failed",
243 			    (char *)(uintptr_t)args->sdev_attrdir);
244 			goto cleanup;
245 		}
246 
247 		if (avp->v_type != VDIR) {
248 			VN_RELE(avp);
249 			error = ENOTDIR;
250 			goto cleanup;
251 		}
252 	} else {
253 		/* use mountp as the attribute store */
254 		avp = mvp;
255 		VN_HOLD(avp);
256 	}
257 
258 	mutex_enter(&sdev_lock);
259 
260 	/*
261 	 * Check that the taskq has been created. We can't do this in our
262 	 * _init or devinit because they run too early for ddi_taskq_create.
263 	 */
264 	if (sdev_taskq == NULL) {
265 		sdev_taskq = taskq_create("sdev", 1, minclsyspri, 1, 1, 0);
266 		if (sdev_taskq == NULL) {
267 			error = ENOMEM;
268 			mutex_exit(&sdev_lock);
269 			VN_RELE(avp);
270 			goto cleanup;
271 		}
272 	}
273 
274 	/*
275 	 * handling installation
276 	 */
277 	if (uap->flags & MS_REMOUNT) {
278 		sdev_data = (struct sdev_data *)vfsp->vfs_data;
279 		ASSERT(sdev_data);
280 
281 		dv = sdev_data->sdev_root;
282 		ASSERT(dv == dv->sdev_dotdot);
283 
284 		/*
285 		 * mark all existing sdev_nodes (except root node) stale
286 		 */
287 		sdev_stale(dv);
288 
289 		/* Reset previous mountargs */
290 		if (sdev_data->sdev_mountargs) {
291 			kmem_free(sdev_data->sdev_mountargs,
292 			    sizeof (struct sdev_mountargs));
293 		}
294 		sdev_data->sdev_mountargs = args;
295 		args = NULL;		/* so it won't be freed below */
296 
297 		sdev_stale_attrvp = dv->sdev_attrvp;
298 		dv->sdev_attrvp = avp;
299 		vfsp->vfs_mtime = ddi_get_time();
300 
301 		mutex_exit(&sdev_lock);
302 		goto cleanup;				/* we're done */
303 	}
304 
305 	/*
306 	 * Create and initialize the vfs-private data.
307 	 */
308 	devdev = makedevice(devmajor, devminor);
309 	while (vfs_devismounted(devdev)) {
310 		devminor = (devminor + 1) & MAXMIN32;
311 
312 		/*
313 		 * All the minor numbers are used up.
314 		 */
315 		if (devminor == 0) {
316 			mutex_exit(&sdev_lock);
317 			VN_RELE(avp);
318 			error = ENODEV;
319 			goto cleanup;
320 		}
321 
322 		devdev = makedevice(devmajor, devminor);
323 	}
324 
325 	dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
326 	sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
327 	vfsp->vfs_dev = devdev;
328 	vfsp->vfs_data = (caddr_t)sdev_data;
329 	vfsp->vfs_fstype = devtype;
330 	vfsp->vfs_bsize = DEV_BSIZE;
331 	vfsp->vfs_mtime = ddi_get_time();
332 	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
333 
334 	ASSERT(dv == dv->sdev_dotdot);
335 
336 	sdev_data->sdev_vfsp = vfsp;
337 	sdev_data->sdev_root = dv;
338 	sdev_data->sdev_mountargs = args;
339 
340 	/* get acl flavor from attribute dir */
341 	if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
342 	    kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
343 		sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
344 
345 	args = NULL;			/* so it won't be freed below */
346 	sdev_insert_mntinfo(sdev_data);
347 	mutex_exit(&sdev_lock);
348 
349 	if (!SDEV_IS_GLOBAL(dv)) {
350 		ASSERT(sdev_origins);
351 		dv->sdev_flags &= ~SDEV_GLOBAL;
352 		dv->sdev_origin = sdev_origins->sdev_root;
353 	} else {
354 		sdev_ncache_setup();
355 		rw_enter(&dv->sdev_contents, RW_WRITER);
356 		sdev_filldir_dynamic(dv);
357 		rw_exit(&dv->sdev_contents);
358 	}
359 
360 	sdev_update_timestamps(dv->sdev_attrvp,
361 	    cr, AT_CTIME|AT_MTIME|AT_ATIME);
362 
363 cleanup:
364 	if (args)
365 		kmem_free(args, sizeof (*args));
366 	return (error);
367 }
368 
369 /*
370  * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
371  */
372 static int
373 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
374 {
375 	struct sdev_node *dv;
376 	int error;
377 	struct sdev_data *sdev_data, *prev, *next;
378 
379 	/*
380 	 * enforce the security policies
381 	 */
382 	if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
383 	    (secpolicy_sys_devices(cr) != 0))
384 		return (EPERM);
385 
386 	if (flag & MS_FORCE)
387 		return (ENOTSUP);
388 
389 	mutex_enter(&sdev_lock);
390 	dv = VFSTOSDEVFS(vfsp)->sdev_root;
391 	ASSERT(dv == dv->sdev_dotdot);
392 	if (SDEVTOV(dv)->v_count > 1) {
393 		mutex_exit(&sdev_lock);
394 		return (EBUSY);
395 	}
396 
397 	/*
398 	 * global instance remains mounted
399 	 */
400 	if (SDEV_IS_GLOBAL(dv)) {
401 		mutex_exit(&sdev_lock);
402 		return (EBUSY);
403 	}
404 	mutex_exit(&sdev_lock);
405 
406 	/* verify the v_count */
407 	if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
408 		return (error);
409 	}
410 	ASSERT(SDEVTOV(dv)->v_count == 1);
411 
412 	/* release hold on root node and destroy it */
413 	SDEV_RELE(dv);
414 	dv->sdev_nlink -= 2;
415 	sdev_nodedestroy(dv, 0);
416 
417 	sdev_data = (struct sdev_data *)vfsp->vfs_data;
418 	vfsp->vfs_data = (caddr_t)0;
419 
420 	/*
421 	 * XXX separate it into sdev_delete_mntinfo() if useful
422 	 */
423 	mutex_enter(&sdev_lock);
424 	prev = sdev_data->sdev_prev;
425 	next = sdev_data->sdev_next;
426 	if (prev)
427 		prev->sdev_next = next;
428 	else
429 		sdev_mntinfo = next;
430 	if (next)
431 		next->sdev_prev = prev;
432 	mutex_exit(&sdev_lock);
433 
434 	if (sdev_data->sdev_mountargs) {
435 		kmem_free(sdev_data->sdev_mountargs,
436 		    sizeof (struct sdev_mountargs));
437 	}
438 	kmem_free(sdev_data, sizeof (struct sdev_data));
439 	return (0);
440 }
441 
442 /*
443  * return root vnode for given vfs
444  */
445 static int
446 sdev_root(struct vfs *vfsp, struct vnode **vpp)
447 {
448 	*vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
449 	VN_HOLD(*vpp);
450 	return (0);
451 }
452 
453 /*
454  * return 'generic superblock' information to userland.
455  *
456  * not much that we can usefully admit to here
457  */
458 static int
459 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
460 {
461 	dev32_t d32;
462 
463 	bzero(sbp, sizeof (*sbp));
464 	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
465 	sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
466 
467 	/* no illusions that free/avail files is relevant to dev */
468 	sbp->f_ffree = 0;
469 	sbp->f_favail = 0;
470 
471 	/* no illusions that blocks are relevant to devfs */
472 	sbp->f_bfree = 0;
473 	sbp->f_bavail = 0;
474 	sbp->f_blocks = 0;
475 
476 	(void) cmpldev(&d32, vfsp->vfs_dev);
477 	sbp->f_fsid = d32;
478 	(void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
479 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
480 	sbp->f_namemax = MAXNAMELEN - 1;
481 	(void) strcpy(sbp->f_fstr, "dev");
482 
483 	return (0);
484 }
485 
486 static void
487 sdev_insert_mntinfo(struct sdev_data *data)
488 {
489 	ASSERT(mutex_owned(&sdev_lock));
490 	data->sdev_next = sdev_mntinfo;
491 	data->sdev_prev = NULL;
492 	if (sdev_mntinfo) {
493 		sdev_mntinfo->sdev_prev = data;
494 	} else {
495 		sdev_origins = data;
496 	}
497 	sdev_mntinfo = data;
498 }
499 
500 struct sdev_data *
501 sdev_find_mntinfo(char *mntpt)
502 {
503 	struct sdev_data *mntinfo;
504 
505 	mutex_enter(&sdev_lock);
506 	mntinfo = sdev_mntinfo;
507 	while (mntinfo) {
508 		if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
509 			VN_HOLD(SDEVTOV(mntinfo->sdev_root));
510 			break;
511 		}
512 		mntinfo = mntinfo->sdev_next;
513 	}
514 	mutex_exit(&sdev_lock);
515 	return (mntinfo);
516 }
517 
518 void
519 sdev_mntinfo_rele(struct sdev_data *mntinfo)
520 {
521 	vnode_t *vp;
522 
523 	mutex_enter(&sdev_lock);
524 	vp = SDEVTOV(mntinfo->sdev_root);
525 	mutex_enter(&vp->v_lock);
526 	VN_RELE_LOCKED(vp);
527 	mutex_exit(&vp->v_lock);
528 	mutex_exit(&sdev_lock);
529 }
530