1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/param.h>
26 #include <sys/errno.h>
27 #include <sys/vfs.h>
28 #include <sys/vfs_opreg.h>
29 #include <sys/vnode.h>
30 #include <sys/uio.h>
31 #include <sys/pathname.h>
32 #include <sys/kmem.h>
33 #include <sys/cred.h>
34 #include <sys/statvfs.h>
35 #include <sys/fs/lofs_info.h>
36 #include <sys/fs/lofs_node.h>
37 #include <sys/mount.h>
38 #include <sys/mntent.h>
39 #include <sys/mkdev.h>
40 #include <sys/priv.h>
41 #include <sys/sysmacros.h>
42 #include <sys/systm.h>
43 #include <sys/cmn_err.h>
44 #include <sys/policy.h>
45 #include <sys/tsol/label.h>
46 #include "fs/fs_subr.h"
47
48 /*
49 * This is the loadable module wrapper.
50 */
51 #include <sys/modctl.h>
52
53 static mntopts_t lofs_mntopts;
54
55 static int lofsinit(int, char *);
56
57 static vfsdef_t vfw = {
58 VFSDEF_VERSION,
59 "lofs",
60 lofsinit,
61 VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT,
62 &lofs_mntopts
63 };
64
65 /*
66 * LOFS mount options table
67 */
68 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
69 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
70 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
71 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
72
73 static mntopt_t mntopts[] = {
74 /*
75 * option name cancel option default arg flags
76 * private data
77 */
78 { MNTOPT_XATTR, xattr_cancel, NULL, 0,
79 (void *)0 },
80 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0,
81 (void *)0 },
82 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0,
83 (void *)0 },
84 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0,
85 (void *)0 },
86 };
87
88 static mntopts_t lofs_mntopts = {
89 sizeof (mntopts) / sizeof (mntopt_t),
90 mntopts
91 };
92
93 /*
94 * Module linkage information for the kernel.
95 */
96
97 static struct modlfs modlfs = {
98 &mod_fsops, "filesystem for lofs", &vfw
99 };
100
101 static struct modlinkage modlinkage = {
102 MODREV_1, (void *)&modlfs, NULL
103 };
104
105 /*
106 * This is the module initialization routine.
107 */
108
109 int
_init(void)110 _init(void)
111 {
112 int status;
113
114 lofs_subrinit();
115 status = mod_install(&modlinkage);
116 if (status != 0) {
117 /*
118 * Cleanup previously initialized work.
119 */
120 lofs_subrfini();
121 }
122
123 return (status);
124 }
125
126 /*
127 * Don't allow the lofs module to be unloaded for now.
128 * There is a memory leak if it gets unloaded.
129 */
130
131 int
_fini(void)132 _fini(void)
133 {
134 return (EBUSY);
135 }
136
137 int
_info(struct modinfo * modinfop)138 _info(struct modinfo *modinfop)
139 {
140 return (mod_info(&modlinkage, modinfop));
141 }
142
143
144 static int lofsfstype;
145 vfsops_t *lo_vfsops;
146
147 /*
148 * lo mount vfsop
149 * Set up mount info record and attach it to vfs struct.
150 */
151 /*ARGSUSED*/
152 static int
lo_mount(struct vfs * vfsp,struct vnode * vp,struct mounta * uap,struct cred * cr)153 lo_mount(struct vfs *vfsp,
154 struct vnode *vp,
155 struct mounta *uap,
156 struct cred *cr)
157 {
158 int error;
159 struct vnode *srootvp = NULL; /* the server's root */
160 struct vnode *realrootvp;
161 struct loinfo *li;
162 int nodev;
163
164 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
165
166 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
167 return (EPERM);
168
169 /*
170 * Loopback devices which get "nodevices" added can be done without
171 * "nodevices" set because we cannot import devices into a zone
172 * with loopback. Note that we have all zone privileges when
173 * this happens; if not, we'd have gotten "nosuid".
174 */
175 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
176 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
177
178 mutex_enter(&vp->v_lock);
179 if (!(uap->flags & MS_OVERLAY) &&
180 (vp->v_count != 1 || (vp->v_flag & VROOT))) {
181 mutex_exit(&vp->v_lock);
182 return (EBUSY);
183 }
184 mutex_exit(&vp->v_lock);
185
186 /*
187 * Find real root, and make vfs point to real vfs
188 */
189
190 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
191 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp))
192 return (error);
193
194 /*
195 * Enforce MAC policy if needed.
196 *
197 * Loopback mounts must not allow writing up. The dominance test
198 * is intended to prevent a global zone caller from accidentally
199 * creating write-up conditions between two labeled zones.
200 * Local zones can't violate MAC on their own without help from
201 * the global zone because they can't name a pathname that
202 * they don't already have.
203 *
204 * The special case check for the NET_MAC_AWARE process flag is
205 * to support the case of the automounter in the global zone. We
206 * permit automounting of local zone directories such as home
207 * directories, into the global zone as required by setlabel,
208 * zonecopy, and saving of desktop sessions. Such mounts are
209 * trusted not to expose the contents of one zone's directories
210 * to another by leaking them through the global zone.
211 */
212 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) {
213 char specname[MAXPATHLEN];
214 zone_t *from_zptr;
215 zone_t *to_zptr;
216
217 if (vnodetopath(NULL, realrootvp, specname,
218 sizeof (specname), CRED()) != 0) {
219 VN_RELE(realrootvp);
220 return (EACCES);
221 }
222
223 from_zptr = zone_find_by_path(specname);
224 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
225
226 /*
227 * Special case for scratch zones used for Live Upgrade:
228 * this is used to mount the zone's root from /root to /a in
229 * the scratch zone. As with the other special case, this
230 * appears to be outside of the zone because it's not under
231 * the zone rootpath, which is $ZONEPATH/lu in the scratch
232 * zone case.
233 */
234
235 if (from_zptr != to_zptr &&
236 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) {
237 /*
238 * We know at this point that the labels aren't equal
239 * because the zone pointers aren't equal, and zones
240 * can't share a label.
241 *
242 * If the source is the global zone then making
243 * it available to a local zone must be done in
244 * read-only mode as the label will become admin_low.
245 *
246 * If it is a mount between local zones then if
247 * the current process is in the global zone and has
248 * the NET_MAC_AWARE flag, then regular read-write
249 * access is allowed. If it's in some other zone, but
250 * the label on the mount point dominates the original
251 * source, then allow the mount as read-only
252 * ("read-down").
253 */
254 if (from_zptr->zone_id == GLOBAL_ZONEID) {
255 /* make the mount read-only */
256 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
257 } else { /* cross-zone mount */
258 if (to_zptr->zone_id == GLOBAL_ZONEID &&
259 /* LINTED: no consequent */
260 getpflags(NET_MAC_AWARE, cr) != 0) {
261 /* Allow the mount as read-write */
262 } else if (bldominates(
263 label2bslabel(to_zptr->zone_slabel),
264 label2bslabel(from_zptr->zone_slabel))) {
265 /* make the mount read-only */
266 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
267 } else {
268 VN_RELE(realrootvp);
269 zone_rele(to_zptr);
270 zone_rele(from_zptr);
271 return (EACCES);
272 }
273 }
274 }
275 zone_rele(to_zptr);
276 zone_rele(from_zptr);
277 }
278
279 /*
280 * realrootvp may be an AUTOFS node, in which case we perform a
281 * VOP_ACCESS() to trigger the mount of the intended filesystem.
282 * This causes a loopback mount of the intended filesystem instead
283 * of the AUTOFS filesystem.
284 *
285 * If a lofs mount creates a mount loop (such that a lofs vfs is
286 * mounted on an autofs node and that lofs vfs points back to the
287 * autofs node which it is mounted on) then a VOP_ACCESS call will
288 * create a deadlock. Once this deadlock is released, VOP_ACCESS will
289 * return EINTR. In such a case we don't want the lofs vfs to be
290 * created as the loop could panic the system.
291 */
292 if ((error = VOP_ACCESS(realrootvp, 0, 0, cr, NULL)) != 0) {
293 VN_RELE(realrootvp);
294 return (error);
295 }
296
297 /*
298 * We're interested in the top most filesystem.
299 * This is specially important when uap->spec is a trigger
300 * AUTOFS node, since we're really interested in mounting the
301 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
302 * call not the AUTOFS node itself.
303 */
304 if (vn_mountedvfs(realrootvp) != NULL) {
305 if (error = traverse(&realrootvp)) {
306 VN_RELE(realrootvp);
307 return (error);
308 }
309 }
310
311 /*
312 * Allocate a vfs info struct and attach it
313 */
314 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
315 li->li_realvfs = realrootvp->v_vfsp;
316 li->li_mountvfs = vfsp;
317
318 /*
319 * Set mount flags to be inherited by loopback vfs's
320 */
321 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
322 li->li_mflag |= VFS_RDONLY;
323 }
324 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
325 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
326 }
327 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
328 li->li_mflag |= VFS_NODEVICES;
329 }
330 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
331 li->li_mflag |= VFS_NOSETUID;
332 }
333 /*
334 * Permissive flags are added to the "deny" bitmap.
335 */
336 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
337 li->li_dflag |= VFS_XATTR;
338 }
339 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
340 li->li_dflag |= VFS_NBMAND;
341 }
342
343 /*
344 * Propagate inheritable mount flags from the real vfs.
345 */
346 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
347 !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
348 vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
349 VFS_NODISPLAY);
350 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
351 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
352 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
353 VFS_NODISPLAY);
354 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
355 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
356 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
357 VFS_NODISPLAY);
358 /*
359 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
360 * such as VFS_RDONLY, are handled differently. An explicit
361 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
362 */
363 if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
364 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
365 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
366 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
367 VFS_NODISPLAY);
368 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
369 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
370 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
371 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
372 VFS_NODISPLAY);
373
374 li->li_refct = 0;
375 vfsp->vfs_data = (caddr_t)li;
376 vfsp->vfs_bcount = 0;
377 vfsp->vfs_fstype = lofsfstype;
378 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
379
380 vfsp->vfs_dev = li->li_realvfs->vfs_dev;
381 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
382 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
383
384 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
385 li->li_flag |= LO_NOSUB;
386 }
387
388 /*
389 * Propagate any VFS features
390 */
391
392 vfs_propagate_features(li->li_realvfs, vfsp);
393
394 /*
395 * Setup the hashtable. If the root of this mount isn't a directory,
396 * there's no point in allocating a large hashtable. A table with one
397 * bucket is sufficient.
398 */
399 if (realrootvp->v_type != VDIR)
400 lsetup(li, 1);
401 else
402 lsetup(li, 0);
403
404 /*
405 * Make the root vnode
406 */
407 srootvp = makelonode(realrootvp, li, 0);
408 srootvp->v_flag |= VROOT;
409 li->li_rootvp = srootvp;
410
411 #ifdef LODEBUG
412 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
413 vfsp, li->li_realvfs, srootvp, realrootvp, li);
414 #endif
415 return (0);
416 }
417
418 /*
419 * Undo loopback mount
420 */
421 static int
lo_unmount(struct vfs * vfsp,int flag,struct cred * cr)422 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
423 {
424 struct loinfo *li;
425
426 if (secpolicy_fs_unmount(cr, vfsp) != 0)
427 return (EPERM);
428
429 /*
430 * Forced unmount is not supported by this file system
431 * and thus, ENOTSUP, is being returned.
432 */
433 if (flag & MS_FORCE)
434 return (ENOTSUP);
435
436 li = vtoli(vfsp);
437 #ifdef LODEBUG
438 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
439 #endif
440 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
441 #ifdef LODEBUG
442 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
443 li->li_rootvp->v_count);
444 #endif
445 return (EBUSY);
446 }
447 VN_RELE(li->li_rootvp);
448 return (0);
449 }
450
451 /*
452 * Find root of lofs mount.
453 */
454 static int
lo_root(struct vfs * vfsp,struct vnode ** vpp)455 lo_root(struct vfs *vfsp, struct vnode **vpp)
456 {
457 *vpp = vtoli(vfsp)->li_rootvp;
458 #ifdef LODEBUG
459 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
460 #endif
461 /*
462 * If the root of the filesystem is a special file, return the specvp
463 * version of the vnode. We don't save the specvp vnode in our
464 * hashtable since that's exclusively for lnodes.
465 */
466 if (IS_DEVVP(*vpp)) {
467 struct vnode *svp;
468
469 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
470 if (svp == NULL)
471 return (ENOSYS);
472 *vpp = svp;
473 } else {
474 VN_HOLD(*vpp);
475 }
476
477 return (0);
478 }
479
480 /*
481 * Get file system statistics.
482 */
483 static int
lo_statvfs(register struct vfs * vfsp,struct statvfs64 * sbp)484 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
485 {
486 vnode_t *realrootvp;
487
488 #ifdef LODEBUG
489 lo_dprint(4, "lostatvfs %p\n", vfsp);
490 #endif
491 /*
492 * Using realrootvp->v_vfsp (instead of the realvfsp that was
493 * cached) is necessary to make lofs work woth forced UFS unmounts.
494 * In the case of a forced unmount, UFS stores a set of dummy vfsops
495 * in all the (i)vnodes in the filesystem. The dummy ops simply
496 * returns back EIO.
497 */
498 (void) lo_realvfs(vfsp, &realrootvp);
499 if (realrootvp != NULL)
500 return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
501 else
502 return (EIO);
503 }
504
505 /*
506 * LOFS doesn't have any data or metadata to flush, pending I/O on the
507 * underlying filesystem will be flushed when such filesystem is synched.
508 */
509 /* ARGSUSED */
510 static int
lo_sync(struct vfs * vfsp,short flag,struct cred * cr)511 lo_sync(struct vfs *vfsp,
512 short flag,
513 struct cred *cr)
514 {
515 #ifdef LODEBUG
516 lo_dprint(4, "lo_sync: %p\n", vfsp);
517 #endif
518 return (0);
519 }
520
521 /*
522 * Obtain the vnode from the underlying filesystem.
523 */
524 static int
lo_vget(struct vfs * vfsp,struct vnode ** vpp,struct fid * fidp)525 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
526 {
527 vnode_t *realrootvp;
528
529 #ifdef LODEBUG
530 lo_dprint(4, "lo_vget: %p\n", vfsp);
531 #endif
532 (void) lo_realvfs(vfsp, &realrootvp);
533 if (realrootvp != NULL)
534 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
535 else
536 return (EIO);
537 }
538
539 /*
540 * Free mount-specific data.
541 */
542 static void
lo_freevfs(struct vfs * vfsp)543 lo_freevfs(struct vfs *vfsp)
544 {
545 struct loinfo *li = vtoli(vfsp);
546
547 ldestroy(li);
548 kmem_free(li, sizeof (struct loinfo));
549 }
550
551 static int
lofsinit(int fstyp,char * name)552 lofsinit(int fstyp, char *name)
553 {
554 static const fs_operation_def_t lo_vfsops_template[] = {
555 VFSNAME_MOUNT, { .vfs_mount = lo_mount },
556 VFSNAME_UNMOUNT, { .vfs_unmount = lo_unmount },
557 VFSNAME_ROOT, { .vfs_root = lo_root },
558 VFSNAME_STATVFS, { .vfs_statvfs = lo_statvfs },
559 VFSNAME_SYNC, { .vfs_sync = lo_sync },
560 VFSNAME_VGET, { .vfs_vget = lo_vget },
561 VFSNAME_FREEVFS, { .vfs_freevfs = lo_freevfs },
562 NULL, NULL
563 };
564 int error;
565
566 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
567 if (error != 0) {
568 cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
569 return (error);
570 }
571
572 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
573 if (error != 0) {
574 (void) vfs_freevfsops_by_type(fstyp);
575 cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
576 return (error);
577 }
578
579 lofsfstype = fstyp;
580
581 return (0);
582 }
583