xref: /titanic_51/usr/src/uts/common/fs/lofs/lofs_vfsops.c (revision 12ef07e9fce5f59ad851a4e70cb765f3c56c11a5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/errno.h>
30 #include <sys/vfs.h>
31 #include <sys/vnode.h>
32 #include <sys/uio.h>
33 #include <sys/pathname.h>
34 #include <sys/kmem.h>
35 #include <sys/cred.h>
36 #include <sys/statvfs.h>
37 #include <sys/fs/lofs_info.h>
38 #include <sys/fs/lofs_node.h>
39 #include <sys/mount.h>
40 #include <sys/mntent.h>
41 #include <sys/mkdev.h>
42 #include <sys/sysmacros.h>
43 #include <sys/systm.h>
44 #include <sys/cmn_err.h>
45 #include <sys/policy.h>
46 #include "fs/fs_subr.h"
47 
48 /*
49  * This is the loadable module wrapper.
50  */
51 #include <sys/modctl.h>
52 
53 static mntopts_t lofs_mntopts;
54 
55 static int lofsinit(int, char *);
56 
57 static vfsdef_t vfw = {
58 	VFSDEF_VERSION,
59 	"lofs",
60 	lofsinit,
61 	VSW_HASPROTO|VSW_STATS,
62 	&lofs_mntopts
63 };
64 
65 /*
66  * Stuff needed to support "zonedevfs" mode.
67  */
68 static major_t lofs_major;
69 static minor_t lofs_minor;
70 static kmutex_t lofs_minor_lock;
71 
72 /*
73  * LOFS mount options table
74  */
75 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
76 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
77 static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL };
78 static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL };
79 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
80 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
81 
82 static mntopt_t mntopts[] = {
83 /*
84  *	option name		cancel option	default arg	flags
85  *		private data
86  */
87 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,		0,
88 		(void *)0 },
89 	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,		0,
90 		(void *)0 },
91 	{ MNTOPT_LOFS_ZONEDEVFS,	zonedevfs_cancel,	NULL,	0,
92 		(void *)0 },
93 	{ MNTOPT_LOFS_NOZONEDEVFS,	nozonedevfs_cancel,	NULL,	0,
94 		(void *)0 },
95 	{ MNTOPT_LOFS_SUB,	sub_cancel,	NULL,		0,
96 		(void *)0 },
97 	{ MNTOPT_LOFS_NOSUB,	nosub_cancel,	NULL,		0,
98 		(void *)0 },
99 };
100 
101 static mntopts_t lofs_mntopts = {
102 	sizeof (mntopts) / sizeof (mntopt_t),
103 	mntopts
104 };
105 
106 /*
107  * Module linkage information for the kernel.
108  */
109 
110 static struct modlfs modlfs = {
111 	&mod_fsops, "filesystem for lofs", &vfw
112 };
113 
114 static struct modlinkage modlinkage = {
115 	MODREV_1, (void *)&modlfs, NULL
116 };
117 
118 /*
119  * This is the module initialization routine.
120  */
121 int
122 _init()
123 {
124 	int status;
125 
126 	lofs_subrinit();
127 	status = mod_install(&modlinkage);
128 	if (status != 0) {
129 		/*
130 		 * Cleanup previously initialized work.
131 		 */
132 		lofs_subrfini();
133 	}
134 
135 	return (status);
136 }
137 
138 /*
139  * Don't allow the lofs module to be unloaded for now.
140  * There is a memory leak if it gets unloaded.
141  */
142 int
143 _fini()
144 {
145 	return (EBUSY);
146 }
147 
148 int
149 _info(struct modinfo *modinfop)
150 {
151 	return (mod_info(&modlinkage, modinfop));
152 }
153 
154 
155 static int lofsfstype;
156 vfsops_t *lo_vfsops;
157 
158 /*
159  * lo mount vfsop
160  * Set up mount info record and attach it to vfs struct.
161  */
162 /*ARGSUSED*/
163 static int
164 lo_mount(struct vfs *vfsp,
165 	struct vnode *vp,
166 	struct mounta *uap,
167 	struct cred *cr)
168 {
169 	int error;
170 	struct vnode *srootvp = NULL;	/* the server's root */
171 	struct vnode *realrootvp;
172 	struct loinfo *li;
173 	int is_zonedevfs = 0;
174 	int nodev;
175 
176 	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
177 
178 	if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
179 		return (EPERM);
180 
181 	/*
182 	 * Loopback devices which get "nodevices" added can be done without
183 	 * "nodevices" set because we cannot import devices into a zone
184 	 * with loopback.  Note that we have all zone privileges when
185 	 * this happens; if not, we'd have gotten "nosuid".
186 	 */
187 	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
188 		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
189 
190 	/*
191 	 * We must ensure that only the global zone applies the 'zonedevfs'
192 	 * option; we don't want non-global zones to be able to establish
193 	 * lofs mounts using the special dev_t we use to ensure that the
194 	 * contents of a zone's /dev cannot be victim to link(2) or rename(2).
195 	 * See below, where we set all of this up.
196 	 *
197 	 * Since this is more like a privilege check, we use crgetzoneid(cr)
198 	 * instead of getzoneid().
199 	 */
200 	is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL);
201 	if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs)
202 		return (EPERM);
203 
204 	mutex_enter(&vp->v_lock);
205 	if (!(uap->flags & MS_OVERLAY) &&
206 		(vp->v_count != 1 || (vp->v_flag & VROOT))) {
207 		mutex_exit(&vp->v_lock);
208 		return (EBUSY);
209 	}
210 	mutex_exit(&vp->v_lock);
211 
212 	/*
213 	 * Find real root, and make vfs point to real vfs
214 	 */
215 	if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
216 		UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP,
217 	    &realrootvp))
218 		return (error);
219 
220 	/*
221 	 * realrootvp may be an AUTOFS node, in which case we
222 	 * perform a VOP_ACCESS() to trigger the mount of the
223 	 * intended filesystem, so we loopback mount the intended
224 	 * filesystem instead of the AUTOFS filesystem.
225 	 */
226 	(void) VOP_ACCESS(realrootvp, 0, 0, cr);
227 
228 	/*
229 	 * We're interested in the top most filesystem.
230 	 * This is specially important when uap->spec is a trigger
231 	 * AUTOFS node, since we're really interested in mounting the
232 	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
233 	 * call not the AUTOFS node itself.
234 	 */
235 	if (vn_mountedvfs(realrootvp) != NULL) {
236 		if (error = traverse(&realrootvp)) {
237 			VN_RELE(realrootvp);
238 			return (error);
239 		}
240 	}
241 
242 	/*
243 	 * Allocate a vfs info struct and attach it
244 	 */
245 	li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
246 	li->li_realvfs = realrootvp->v_vfsp;
247 	li->li_mountvfs = vfsp;
248 
249 	/*
250 	 * Set mount flags to be inherited by loopback vfs's
251 	 */
252 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
253 		li->li_mflag |= VFS_RDONLY;
254 	}
255 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
256 		li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
257 	}
258 	if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
259 		li->li_mflag |= VFS_NODEVICES;
260 	}
261 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
262 		li->li_mflag |= VFS_NOSETUID;
263 	}
264 	/*
265 	 * Permissive flags are added to the "deny" bitmap.
266 	 */
267 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
268 		li->li_dflag |= VFS_XATTR;
269 	}
270 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
271 		li->li_dflag |= VFS_NBMAND;
272 	}
273 
274 	/*
275 	 * Propagate inheritable mount flags from the real vfs.
276 	 */
277 	if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
278 	    !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
279 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
280 		    VFS_NODISPLAY);
281 	if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
282 	    !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
283 		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
284 		    VFS_NODISPLAY);
285 	if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
286 	    !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
287 		vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
288 		    VFS_NODISPLAY);
289 	/*
290 	 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
291 	 * such as VFS_RDONLY, are handled differently.  An explicit
292 	 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
293 	 */
294 	if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
295 	    !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
296 	    !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
297 		vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
298 		    VFS_NODISPLAY);
299 	if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
300 	    !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
301 	    !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
302 		vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
303 		    VFS_NODISPLAY);
304 
305 	li->li_refct = 0;
306 	vfsp->vfs_data = (caddr_t)li;
307 	vfsp->vfs_bcount = 0;
308 	vfsp->vfs_fstype = lofsfstype;
309 	vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
310 
311 	/*
312 	 * Test to see if we need to be in "zone /dev" mode.  In zonedevfs
313 	 * mode, we pull a nasty trick; we make sure that the lofs dev_t does
314 	 * *not* reflect the underlying device, so that no renames or links
315 	 * can occur to or from the /dev hierarchy.
316 	 */
317 	if (is_zonedevfs) {
318 		dev_t dev;
319 
320 		mutex_enter(&lofs_minor_lock);
321 		do {
322 			lofs_minor = (lofs_minor + 1) & MAXMIN32;
323 			dev = makedevice(lofs_major, lofs_minor);
324 		} while (vfs_devismounted(dev));
325 		mutex_exit(&lofs_minor_lock);
326 
327 		vfsp->vfs_dev = dev;
328 		vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype);
329 
330 		li->li_flag |= LO_ZONEDEVFS;
331 	} else {
332 		vfsp->vfs_dev = li->li_realvfs->vfs_dev;
333 		vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
334 		vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
335 	}
336 
337 	if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
338 		li->li_flag |= LO_NOSUB;
339 	}
340 
341 	/*
342 	 * Setup the hashtable. If the root of this mount isn't a directory,
343 	 * there's no point in allocating a large hashtable. A table with one
344 	 * bucket is sufficient.
345 	 */
346 	if (realrootvp->v_type != VDIR)
347 		lsetup(li, 1);
348 	else
349 		lsetup(li, 0);
350 
351 	/*
352 	 * Make the root vnode
353 	 */
354 	srootvp = makelonode(realrootvp, li, 0);
355 	srootvp->v_flag |= VROOT;
356 	li->li_rootvp = srootvp;
357 
358 #ifdef LODEBUG
359 	lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
360 	    vfsp, li->li_realvfs, srootvp, realrootvp, li);
361 #endif
362 	return (0);
363 }
364 
365 /*
366  * Undo loopback mount
367  */
368 static int
369 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
370 {
371 	struct loinfo *li;
372 
373 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
374 		return (EPERM);
375 
376 	/*
377 	 * Forced unmount is not supported by this file system
378 	 * and thus, ENOTSUP, is being returned.
379 	 */
380 	if (flag & MS_FORCE)
381 		return (ENOTSUP);
382 
383 	li = vtoli(vfsp);
384 #ifdef LODEBUG
385 	lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
386 #endif
387 	if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
388 #ifdef LODEBUG
389 		lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
390 		    li->li_rootvp->v_count);
391 #endif
392 		return (EBUSY);
393 	}
394 	VN_RELE(li->li_rootvp);
395 	return (0);
396 }
397 
398 /*
399  * Find root of lofs mount.
400  */
401 static int
402 lo_root(struct vfs *vfsp, struct vnode **vpp)
403 {
404 	*vpp = vtoli(vfsp)->li_rootvp;
405 #ifdef LODEBUG
406 	lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
407 #endif
408 	/*
409 	 * If the root of the filesystem is a special file, return the specvp
410 	 * version of the vnode. We don't save the specvp vnode in our
411 	 * hashtable since that's exclusively for lnodes.
412 	 */
413 	if (IS_DEVVP(*vpp)) {
414 		struct vnode *svp;
415 
416 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
417 		if (svp == NULL)
418 			return (ENOSYS);
419 		*vpp = svp;
420 	} else {
421 		VN_HOLD(*vpp);
422 	}
423 
424 	return (0);
425 }
426 
427 /*
428  * Get file system statistics.
429  */
430 static int
431 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
432 {
433 	vnode_t *realrootvp;
434 
435 #ifdef LODEBUG
436 	lo_dprint(4, "lostatvfs %p\n", vfsp);
437 #endif
438 	/*
439 	 * Using realrootvp->v_vfsp (instead of the realvfsp that was
440 	 * cached) is necessary to make lofs work woth forced UFS unmounts.
441 	 * In the case of a forced unmount, UFS stores a set of dummy vfsops
442 	 * in all the (i)vnodes in the filesystem. The dummy ops simply
443 	 * returns back EIO.
444 	 */
445 	(void) lo_realvfs(vfsp, &realrootvp);
446 	if (realrootvp != NULL)
447 		return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
448 	else
449 		return (EIO);
450 }
451 
452 /*
453  * LOFS doesn't have any data or metadata to flush, pending I/O on the
454  * underlying filesystem will be flushed when such filesystem is synched.
455  */
456 /* ARGSUSED */
457 static int
458 lo_sync(struct vfs *vfsp,
459 	short flag,
460 	struct cred *cr)
461 {
462 #ifdef LODEBUG
463 	lo_dprint(4, "lo_sync: %p\n", vfsp);
464 #endif
465 	return (0);
466 }
467 
468 /*
469  * Obtain the vnode from the underlying filesystem.
470  */
471 static int
472 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
473 {
474 	vnode_t *realrootvp;
475 
476 #ifdef LODEBUG
477 	lo_dprint(4, "lo_vget: %p\n", vfsp);
478 #endif
479 	(void) lo_realvfs(vfsp, &realrootvp);
480 	if (realrootvp != NULL)
481 		return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
482 	else
483 		return (EIO);
484 }
485 
486 /*
487  * Free mount-specific data.
488  */
489 static void
490 lo_freevfs(struct vfs *vfsp)
491 {
492 	struct loinfo *li = vtoli(vfsp);
493 
494 	ldestroy(li);
495 	kmem_free(li, sizeof (struct loinfo));
496 }
497 
498 static int
499 lofsinit(int fstyp, char *name)
500 {
501 	static const fs_operation_def_t lo_vfsops_template[] = {
502 		VFSNAME_MOUNT, lo_mount,
503 		VFSNAME_UNMOUNT, lo_unmount,
504 		VFSNAME_ROOT, lo_root,
505 		VFSNAME_STATVFS, lo_statvfs,
506 		VFSNAME_SYNC, (fs_generic_func_p) lo_sync,
507 		VFSNAME_VGET, lo_vget,
508 		VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs,
509 		NULL, NULL
510 	};
511 	int error;
512 
513 	error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
514 	if (error != 0) {
515 		cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
516 		return (error);
517 	}
518 
519 	error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
520 	if (error != 0) {
521 		(void) vfs_freevfsops_by_type(fstyp);
522 		cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
523 		return (error);
524 	}
525 
526 	lofsfstype = fstyp;
527 
528 	if ((lofs_major = getudev()) == (major_t)-1) {
529 		(void) vfs_freevfsops_by_type(fstyp);
530 		cmn_err(CE_WARN, "lofsinit: Can't get unique device number.");
531 		return (ENXIO);
532 	}
533 
534 	lofs_minor = 0;
535 	mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
536 
537 	return (0);
538 }
539