xref: /illumos-gate/usr/src/uts/common/fs/lofs/lofs_vnops.c (revision 2e837a72011f54762249b6612c2a64f171efcd43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2015 Joyent, Inc.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
30 #include <sys/vnode.h>
31 #include <sys/vfs.h>
32 #include <sys/vfs_opreg.h>
33 #include <sys/uio.h>
34 #include <sys/cred.h>
35 #include <sys/pathname.h>
36 #include <sys/debug.h>
37 #include <sys/fs/lofs_node.h>
38 #include <sys/fs/lofs_info.h>
39 #include <fs/fs_subr.h>
40 #include <vm/as.h>
41 #include <vm/seg.h>
42 
43 /*
44  * These are the vnode ops routines which implement the vnode interface to
45  * the looped-back file system.  These routines just take their parameters,
46  * and then calling the appropriate real vnode routine(s) to do the work.
47  */
48 
49 static int
50 lo_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
51 {
52 	vnode_t *vp = *vpp;
53 	vnode_t *rvp;
54 	vnode_t *oldvp;
55 	int error;
56 
57 #ifdef LODEBUG
58 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
59 	    vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
60 #endif
61 
62 	oldvp = vp;
63 	vp = rvp = realvp(vp);
64 	/*
65 	 * Need to hold new reference to vp since VOP_OPEN() may
66 	 * decide to release it.
67 	 */
68 	VN_HOLD(vp);
69 	error = VOP_OPEN(&rvp, flag, cr, ct);
70 
71 	if (!error && rvp != vp) {
72 		/*
73 		 * the FS which we called should have released the
74 		 * new reference on vp
75 		 */
76 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
77 		if ((*vpp)->v_type == VDIR) {
78 			/*
79 			 * Copy over any looping flags to the new lnode.
80 			 */
81 			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
82 		}
83 		if (IS_DEVVP(*vpp)) {
84 			vnode_t *svp;
85 
86 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
87 			VN_RELE(*vpp);
88 			if (svp == NULL)
89 				error = ENOSYS;
90 			else
91 				*vpp = svp;
92 		}
93 		VN_RELE(oldvp);
94 	} else {
95 		ASSERT(rvp->v_count > 1);
96 		VN_RELE(rvp);
97 	}
98 
99 	return (error);
100 }
101 
102 static int
103 lo_close(
104 	vnode_t *vp,
105 	int flag,
106 	int count,
107 	offset_t offset,
108 	struct cred *cr,
109 	caller_context_t *ct)
110 {
111 #ifdef LODEBUG
112 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
113 #endif
114 	vp = realvp(vp);
115 	return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
116 }
117 
118 static int
119 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
120 	caller_context_t *ct)
121 {
122 #ifdef LODEBUG
123 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
124 #endif
125 	vp = realvp(vp);
126 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
127 }
128 
129 static int
130 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
131 	caller_context_t *ct)
132 {
133 #ifdef LODEBUG
134 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
135 #endif
136 	vp = realvp(vp);
137 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
138 }
139 
140 static int
141 lo_ioctl(
142 	vnode_t *vp,
143 	int cmd,
144 	intptr_t arg,
145 	int flag,
146 	struct cred *cr,
147 	int *rvalp,
148 	caller_context_t *ct)
149 {
150 #ifdef LODEBUG
151 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
152 #endif
153 	vp = realvp(vp);
154 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
155 }
156 
157 static int
158 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
159 {
160 	vp = realvp(vp);
161 	return (VOP_SETFL(vp, oflags, nflags, cr, ct));
162 }
163 
164 static int
165 lo_getattr(
166 	vnode_t *vp,
167 	struct vattr *vap,
168 	int flags,
169 	struct cred *cr,
170 	caller_context_t *ct)
171 {
172 	int error;
173 
174 #ifdef LODEBUG
175 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
176 #endif
177 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr, ct))
178 		return (error);
179 
180 	return (0);
181 }
182 
183 static int
184 lo_setattr(
185 	vnode_t *vp,
186 	struct vattr *vap,
187 	int flags,
188 	struct cred *cr,
189 	caller_context_t *ct)
190 {
191 #ifdef LODEBUG
192 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
193 #endif
194 	vp = realvp(vp);
195 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
196 }
197 
198 static int
199 lo_access(
200 	vnode_t *vp,
201 	int mode,
202 	int flags,
203 	struct cred *cr,
204 	caller_context_t *ct)
205 {
206 #ifdef LODEBUG
207 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
208 #endif
209 	if (mode & VWRITE) {
210 		if (vp->v_type == VREG && vn_is_readonly(vp))
211 			return (EROFS);
212 	}
213 	vp = realvp(vp);
214 	return (VOP_ACCESS(vp, mode, flags, cr, ct));
215 }
216 
217 static int
218 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
219 {
220 #ifdef LODEBUG
221 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
222 #endif
223 	vp = realvp(vp);
224 	return (VOP_FSYNC(vp, syncflag, cr, ct));
225 }
226 
227 /*ARGSUSED*/
228 static void
229 lo_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
230 {
231 #ifdef LODEBUG
232 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
233 #endif
234 	freelonode(vtol(vp));
235 }
236 
237 /* ARGSUSED */
238 static int
239 lo_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
240 {
241 #ifdef LODEBUG
242 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
243 #endif
244 	vp = realvp(vp);
245 	return (VOP_FID(vp, fidp, ct));
246 }
247 
248 /*
249  * Given a vnode of lofs type, lookup nm name and
250  * return a shadow vnode (of lofs type) of the
251  * real vnode found.
252  *
253  * Due to the nature of lofs, there is a potential
254  * looping in path traversal.
255  *
256  * starting from the mount point of an lofs;
257  * a loop is defined to be a traversal path
258  * where the mount point or the real vnode of
259  * the root of this lofs is encountered twice.
260  * Once at the start of traversal and second
261  * when the looping is found.
262  *
263  * When a loop is encountered, a shadow of the
264  * covered vnode is returned to stop the looping.
265  *
266  * This normally works, but with the advent of
267  * the new automounter, returning the shadow of the
268  * covered vnode (autonode, in this case) does not
269  * stop the loop.  Because further lookup on this
270  * lonode will cause the autonode to call lo_lookup()
271  * on the lonode covering it.
272  *
273  * example "/net/jurassic/net/jurassic" is a loop.
274  * returning the shadow of the autonode corresponding to
275  * "/net/jurassic/net/jurassic" will not terminate the
276  * loop.   To solve this problem we allow the loop to go
277  * through one more level component lookup.  Whichever
278  * directory is then looked up in "/net/jurassic/net/jurassic"
279  * the vnode returned is the vnode covered by the autonode
280  * "net" and this will terminate the loop.
281  *
282  * Lookup for dot dot has to be dealt with separately.
283  * It will be nice to have a "one size fits all" kind
284  * of solution, so that we don't have so many ifs statement
285  * in the lo_lookup() to handle dotdot.  But, since
286  * there are so many special cases to handle different
287  * kinds looping above, we need special codes to handle
288  * dotdot lookup as well.
289  */
290 static int
291 lo_lookup(
292 	vnode_t *dvp,
293 	char *nm,
294 	vnode_t **vpp,
295 	struct pathname *pnp,
296 	int flags,
297 	vnode_t *rdir,
298 	struct cred *cr,
299 	caller_context_t *ct,
300 	int *direntflags,
301 	pathname_t *realpnp)
302 {
303 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
304 	int error, is_indirectloop;
305 	vnode_t *realdvp = realvp(dvp);
306 	struct loinfo *li = vtoli(dvp->v_vfsp);
307 	int looping = 0;
308 	int autoloop = 0;
309 	int doingdotdot = 0;
310 	int nosub = 0;
311 	int mkflag = 0;
312 
313 	/*
314 	 * If name is empty and no XATTR flags are set, then return
315 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
316 	 * then we need to call VOP_LOOKUP to get the xattr dir.
317 	 */
318 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
319 		VN_HOLD(dvp);
320 		*vpp = dvp;
321 		return (0);
322 	}
323 
324 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
325 		doingdotdot++;
326 		/*
327 		 * Handle ".." out of mounted filesystem
328 		 */
329 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
330 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
331 			ASSERT(realdvp != NULL);
332 		}
333 	}
334 
335 	*vpp = NULL;	/* default(error) case */
336 
337 	/*
338 	 * Do the normal lookup
339 	 */
340 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
341 	    ct, direntflags, realpnp)) {
342 		vp = NULL;
343 		goto out;
344 	}
345 
346 	/*
347 	 * We do this check here to avoid returning a stale file handle to the
348 	 * caller.
349 	 */
350 	if (nm[0] == '.' && nm[1] == '\0') {
351 		ASSERT(vp == realdvp);
352 		VN_HOLD(dvp);
353 		VN_RELE(vp);
354 		*vpp = dvp;
355 		return (0);
356 	}
357 
358 	if (doingdotdot) {
359 		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
360 			vfs_t *vfsp;
361 
362 			error = vn_vfsrlock_wait(realdvp);
363 			if (error)
364 				goto out;
365 			vfsp = vn_mountedvfs(realdvp);
366 			/*
367 			 * In the standard case if the looping flag is set and
368 			 * performing dotdot we would be returning from a
369 			 * covered vnode, implying vfsp could not be null. The
370 			 * exceptions being if we have looping and overlay
371 			 * mounts or looping and covered file systems.
372 			 */
373 			if (vfsp == NULL) {
374 				/*
375 				 * Overlay mount or covered file system,
376 				 * so just make the shadow node.
377 				 */
378 				vn_vfsunlock(realdvp);
379 				*vpp = makelonode(vp, li, 0);
380 				(vtol(*vpp))->lo_looping |= LO_LOOPING;
381 				return (0);
382 			}
383 			/*
384 			 * When looping get the actual found vnode
385 			 * instead of the vnode covered.
386 			 * Here we have to hold the lock for realdvp
387 			 * since an unmount during the traversal to the
388 			 * root vnode would turn *vfsp into garbage
389 			 * which would be fatal.
390 			 */
391 			error = VFS_ROOT(vfsp, &tvp);
392 			vn_vfsunlock(realdvp);
393 
394 			if (error)
395 				goto out;
396 
397 			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
398 				/*
399 				 * we're back at the real vnode
400 				 * of the rootvp
401 				 *
402 				 * return the rootvp
403 				 * Ex: /mnt/mnt/..
404 				 * where / has been lofs-mounted
405 				 * onto /mnt.  Return the lofs
406 				 * node mounted at /mnt.
407 				 */
408 				*vpp = tvp;
409 				VN_RELE(vp);
410 				return (0);
411 			} else {
412 				/*
413 				 * We are returning from a covered
414 				 * node whose vfs_mountedhere is
415 				 * not pointing to vfs of the current
416 				 * root vnode.
417 				 * This is a condn where in we
418 				 * returned a covered node say Zc
419 				 * but Zc is not the cover of current
420 				 * root.
421 				 * i.e.., if X is the root vnode
422 				 * lookup(Zc,"..") is taking us to
423 				 * X.
424 				 * Ex: /net/X/net/X/Y
425 				 *
426 				 * If LO_AUTOLOOP (autofs/lofs looping detected)
427 				 * has been set then we are encountering the
428 				 * cover of Y (Y being any directory vnode
429 				 * under /net/X/net/X/).
430 				 * When performing a dotdot set the
431 				 * returned vp to the vnode covered
432 				 * by the mounted lofs, ie /net/X/net/X
433 				 */
434 				VN_RELE(tvp);
435 				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
436 					VN_RELE(vp);
437 					vp = li->li_rootvp;
438 					vp = vp->v_vfsp->vfs_vnodecovered;
439 					VN_HOLD(vp);
440 					*vpp = makelonode(vp, li, 0);
441 					(vtol(*vpp))->lo_looping |= LO_LOOPING;
442 					return (0);
443 				}
444 			}
445 		} else {
446 			/*
447 			 * No frills just make the shadow node.
448 			 */
449 			*vpp = makelonode(vp, li, 0);
450 			return (0);
451 		}
452 	}
453 
454 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
455 
456 	/*
457 	 * If this vnode is mounted on, then we
458 	 * traverse to the vnode which is the root of
459 	 * the mounted file system.
460 	 */
461 	if (!nosub && (error = traverse(&vp)))
462 		goto out;
463 
464 	/*
465 	 * Make a lnode for the real vnode.
466 	 */
467 	if (vp->v_type != VDIR || nosub) {
468 		*vpp = makelonode(vp, li, 0);
469 		if (IS_DEVVP(*vpp)) {
470 			vnode_t *svp;
471 
472 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
473 			VN_RELE(*vpp);
474 			if (svp == NULL)
475 				error = ENOSYS;
476 			else
477 				*vpp = svp;
478 		}
479 		return (error);
480 	}
481 
482 	/*
483 	 * if the found vnode (vp) is not of type lofs
484 	 * then we're just going to make a shadow of that
485 	 * vp and get out.
486 	 *
487 	 * If the found vnode (vp) is of lofs type, and
488 	 * we're not doing dotdot, check if we are
489 	 * looping.
490 	 */
491 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
492 		/*
493 		 * Check if we're looping, i.e.
494 		 * vp equals the root vp of the lofs, directly
495 		 * or indirectly, return the covered node.
496 		 */
497 
498 		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
499 			if (vp == li->li_rootvp) {
500 				/*
501 				 * Direct looping condn.
502 				 * Ex:- X is / mounted directory so lookup of
503 				 * /X/X is a direct looping condn.
504 				 */
505 				tvp = vp;
506 				vp = vp->v_vfsp->vfs_vnodecovered;
507 				VN_HOLD(vp);
508 				VN_RELE(tvp);
509 				looping++;
510 			} else {
511 				/*
512 				 * Indirect looping can be defined as
513 				 * real lookup returning rootvp of the current
514 				 * tree in any level of recursion.
515 				 *
516 				 * This check is useful if there are multiple
517 				 * levels of lofs indirections. Suppose vnode X
518 				 * in the current lookup has as its real vnode
519 				 * another lofs node. Y = realvp(X) Y should be
520 				 * a lofs node for the check to continue or Y
521 				 * is not the rootvp of X.
522 				 * Ex:- say X and Y are two vnodes
523 				 * say real(Y) is X and real(X) is Z
524 				 * parent vnode for X and Y is Z
525 				 * lookup(Y,"path") say we are looking for Y
526 				 * again under Y and we have to return Yc.
527 				 * but the lookup of Y under Y doesnot return
528 				 * Y the root vnode again here is why.
529 				 * 1. lookup(Y,"path of Y") will go to
530 				 * 2. lookup(real(Y),"path of Y") and then to
531 				 * 3. lookup(real(X),"path of Y").
532 				 * and now what lookup level 1 sees is the
533 				 * outcome of 2 but the vnode Y is due to
534 				 * lookup(Z,"path of Y") so we have to skip
535 				 * intermediate levels to find if in any level
536 				 * there is a looping.
537 				 */
538 				is_indirectloop = 0;
539 				nonlovp = vp;
540 				while (
541 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
542 				    !(is_indirectloop)) {
543 					if (li->li_rootvp  == nonlovp) {
544 						is_indirectloop++;
545 						break;
546 					}
547 					nonlovp = realvp(nonlovp);
548 				}
549 
550 				if (is_indirectloop) {
551 					VN_RELE(vp);
552 					vp = nonlovp;
553 					vp = vp->v_vfsp->vfs_vnodecovered;
554 					VN_HOLD(vp);
555 					looping++;
556 				}
557 			}
558 		} else {
559 			/*
560 			 * come here only because of the interaction between
561 			 * the autofs and lofs.
562 			 *
563 			 * Lookup of "/net/X/net/X" will return a shadow of
564 			 * an autonode X_a which we call X_l.
565 			 *
566 			 * Lookup of anything under X_l, will trigger a call to
567 			 * auto_lookup(X_a,nm) which will eventually call
568 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
569 			 * the current lofs.
570 			 *
571 			 * We come here only when we are called with X_l as dvp
572 			 * and look for something underneath.
573 			 *
574 			 * Now that an autofs/lofs looping condition has been
575 			 * identified any directory vnode contained within
576 			 * dvp will be set to the vnode covered by the
577 			 * mounted autofs. Thus all directories within dvp
578 			 * will appear empty hence teminating the looping.
579 			 * The LO_AUTOLOOP flag is set on the returned lonode
580 			 * to indicate the termination of the autofs/lofs
581 			 * looping. This is required for the correct behaviour
582 			 * when performing a dotdot.
583 			 */
584 			realdvp = realvp(dvp);
585 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
586 				realdvp = realvp(realdvp);
587 			}
588 
589 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
590 			if (error)
591 				goto out;
592 			/*
593 			 * tvp now contains the rootvp of the vfs of the
594 			 * real vnode of dvp. The directory vnode vp is set
595 			 * to the covered vnode to terminate looping. No
596 			 * distinction is made between any vp as all directory
597 			 * vnodes contained in dvp are returned as the covered
598 			 * vnode.
599 			 */
600 			VN_RELE(vp);
601 			vp = tvp;	/* possibly is an autonode */
602 
603 			/*
604 			 * Need to find the covered vnode
605 			 */
606 			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
607 				/*
608 				 * We don't have a covered vnode so this isn't
609 				 * an autonode. To find the autonode simply
610 				 * find the vnode covered by the lofs rootvp.
611 				 */
612 				vp = li->li_rootvp;
613 				vp = vp->v_vfsp->vfs_vnodecovered;
614 				VN_RELE(tvp);
615 				error = VFS_ROOT(vp->v_vfsp, &tvp);
616 				if (error)
617 					goto out;
618 				vp = tvp;	/* now this is an autonode */
619 				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
620 					/*
621 					 * Still can't find a covered vnode.
622 					 * Fail the lookup, or we'd loop.
623 					 */
624 					error = ENOENT;
625 					goto out;
626 				}
627 			}
628 			vp = vp->v_vfsp->vfs_vnodecovered;
629 			VN_HOLD(vp);
630 			VN_RELE(tvp);
631 			/*
632 			 * Force the creation of a new lnode even if the hash
633 			 * table contains a lnode that references this vnode.
634 			 */
635 			mkflag = LOF_FORCE;
636 			autoloop++;
637 		}
638 	}
639 	*vpp = makelonode(vp, li, mkflag);
640 
641 	if ((looping) ||
642 	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
643 		(vtol(*vpp))->lo_looping |= LO_LOOPING;
644 	}
645 
646 	if (autoloop) {
647 		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
648 	}
649 
650 out:
651 	if (error != 0 && vp != NULL)
652 		VN_RELE(vp);
653 #ifdef LODEBUG
654 	lo_dprint(4,
655 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
656 	    dvp, realvp(dvp), nm, *vpp, vp, error);
657 #endif
658 	return (error);
659 }
660 
661 /*ARGSUSED*/
662 static int
663 lo_create(
664 	vnode_t *dvp,
665 	char *nm,
666 	struct vattr *va,
667 	enum vcexcl exclusive,
668 	int mode,
669 	vnode_t **vpp,
670 	struct cred *cr,
671 	int flag,
672 	caller_context_t *ct,
673 	vsecattr_t *vsecp)
674 {
675 	int error;
676 	vnode_t *vp = NULL;
677 
678 #ifdef LODEBUG
679 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
680 #endif
681 	if (*nm == '\0') {
682 		ASSERT(vpp && dvp == *vpp);
683 		vp = realvp(*vpp);
684 	}
685 
686 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag,
687 	    ct, vsecp);
688 	if (!error) {
689 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
690 		if (IS_DEVVP(*vpp)) {
691 			vnode_t *svp;
692 
693 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
694 			VN_RELE(*vpp);
695 			if (svp == NULL)
696 				error = ENOSYS;
697 			else
698 				*vpp = svp;
699 		}
700 	} else if (error == ENOSYS && exclusive == NONEXCL &&
701 	    dvp == vtoli(dvp->v_vfsp)->li_rootvp &&
702 	    realvp(dvp)->v_type == VREG) {
703 		/*
704 		 * We have a single regular file lofs mounted, thus the file is
705 		 * the root vnode (the directory vp is the file vp). Some
706 		 * underlying file systems (e.g. tmpfs or ufs) properly handle
707 		 * this style of create but at least zfs won't support create
708 		 * this way (see zfs_fvnodeops_template which has fs_nosys for
709 		 * the vop_create entry because zfs_create doesn't work
710 		 * properly for this case).
711 		 */
712 		if ((error = VOP_ACCESS(dvp, mode, 0, cr, NULL)) == 0) {
713 			/*
714 			 * Since we already know the vnode for the existing
715 			 * file we can handle create as a no-op, as expected,
716 			 * truncating the file if necessary.
717 			 */
718 			struct vattr vattr;
719 
720 			vattr.va_size = 0;
721 			vattr.va_mask = AT_SIZE;
722 
723 			if ((va->va_mask & AT_SIZE) != 0 && va->va_size == 0 &&
724 			    VOP_SETATTR(dvp, &vattr, 0, CRED(), NULL) != 0)
725 				return (error);
726 
727 			/*
728 			 * vn_createat will do a vn_rele on the file if it is
729 			 * pre-existing, which it is in the case of a single
730 			 * file mounted as the root. Thus, when we eventually
731 			 * close the file the count will already be 1 so the
732 			 * vnode would be freed. To prevent that, we add an
733 			 * extra hold here.
734 			 */
735 			VN_HOLD(dvp);
736 			*vpp = dvp;
737 			error = 0;
738 		}
739 	}
740 
741 	return (error);
742 }
743 
744 static int
745 lo_remove(
746 	vnode_t *dvp,
747 	char *nm,
748 	struct cred *cr,
749 	caller_context_t *ct,
750 	int flags)
751 {
752 #ifdef LODEBUG
753 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
754 #endif
755 	dvp = realvp(dvp);
756 	return (VOP_REMOVE(dvp, nm, cr, ct, flags));
757 }
758 
759 static int
760 lo_link(
761 	vnode_t *tdvp,
762 	vnode_t *vp,
763 	char *tnm,
764 	struct cred *cr,
765 	caller_context_t *ct,
766 	int flags)
767 {
768 	vnode_t *realvp;
769 
770 #ifdef LODEBUG
771 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
772 #endif
773 
774 	/*
775 	 * The source and destination vnodes may be in different lofs
776 	 * filesystems sharing the same underlying filesystem, so we need to
777 	 * make sure that the filesystem containing the source vnode is not
778 	 * mounted read-only (vn_link() has already checked the target vnode).
779 	 *
780 	 * In a situation such as:
781 	 *
782 	 * /data	- regular filesystem
783 	 * /foo		- lofs mount of /data/foo
784 	 * /bar		- read-only lofs mount of /data/bar
785 	 *
786 	 * This disallows a link from /bar/somefile to /foo/somefile,
787 	 * which would otherwise allow changes to somefile on the read-only
788 	 * mounted /bar.
789 	 */
790 
791 	if (vn_is_readonly(vp)) {
792 		return (EROFS);
793 	}
794 	while (vn_matchops(vp, lo_vnodeops)) {
795 		vp = realvp(vp);
796 	}
797 
798 	/*
799 	 * In the case where the source vnode is on another stacking
800 	 * filesystem (such as specfs), the loop above will
801 	 * terminate before finding the true underlying vnode.
802 	 *
803 	 * We use VOP_REALVP here to continue the search.
804 	 */
805 	if (VOP_REALVP(vp, &realvp, ct) == 0)
806 		vp = realvp;
807 
808 	while (vn_matchops(tdvp, lo_vnodeops)) {
809 		tdvp = realvp(tdvp);
810 	}
811 	if (vp->v_vfsp != tdvp->v_vfsp)
812 		return (EXDEV);
813 	return (VOP_LINK(tdvp, vp, tnm, cr, ct, flags));
814 }
815 
816 static int
817 lo_rename(
818 	vnode_t *odvp,
819 	char *onm,
820 	vnode_t *ndvp,
821 	char *nnm,
822 	struct cred *cr,
823 	caller_context_t *ct,
824 	int flags)
825 {
826 	vnode_t *tnvp;
827 
828 #ifdef LODEBUG
829 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
830 #endif
831 	/*
832 	 * If we are coming from a loop back mounted fs, that has been
833 	 * mounted in the same filesystem as where we want to move to,
834 	 * and that filesystem is read/write, but the lofs filesystem is
835 	 * read only, we don't want to allow a rename of the file. The
836 	 * vn_rename code checks to be sure the target is read/write already
837 	 * so that is not necessary here. However, consider the following
838 	 * example:
839 	 *		/ - regular root fs
840 	 *		/foo - directory in root
841 	 *		/foo/bar - file in foo directory(in root fs)
842 	 *		/baz - directory in root
843 	 *		mount -F lofs -o ro /foo /baz - all still in root
844 	 *			directory
845 	 * The fact that we mounted /foo on /baz read only should stop us
846 	 * from renaming the file /foo/bar /bar, but it doesn't since
847 	 * / is read/write. We are still renaming here since we are still
848 	 * in the same filesystem, it is just that we do not check to see
849 	 * if the filesystem we are coming from in this case is read only.
850 	 */
851 	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
852 		return (EROFS);
853 	/*
854 	 * We need to make sure we're not trying to remove a mount point for a
855 	 * filesystem mounted on top of lofs, which only we know about.
856 	 */
857 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
858 		goto rename;
859 
860 	/*
861 	 * XXXci - Once case-insensitive behavior is implemented, it should
862 	 * be added here.
863 	 */
864 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr,
865 	    ct, NULL, NULL) != 0)
866 		goto rename;
867 	if (tnvp->v_type != VDIR) {
868 		VN_RELE(tnvp);
869 		goto rename;
870 	}
871 	if (vn_mountedvfs(tnvp)) {
872 		VN_RELE(tnvp);
873 		return (EBUSY);
874 	}
875 	VN_RELE(tnvp);
876 rename:
877 	/*
878 	 * Since the case we're dealing with above can happen at any layer in
879 	 * the stack of lofs filesystems, we need to recurse down the stack,
880 	 * checking to see if there are any instances of a filesystem mounted on
881 	 * top of lofs. In order to keep on using the lofs version of
882 	 * VOP_RENAME(), we make sure that while the target directory is of type
883 	 * lofs, the source directory (the one used for getting the fs-specific
884 	 * version of VOP_RENAME()) is also of type lofs.
885 	 */
886 	if (vn_matchops(ndvp, lo_vnodeops)) {
887 		ndvp = realvp(ndvp);	/* Check the next layer */
888 	} else {
889 		/*
890 		 * We can go fast here
891 		 */
892 		while (vn_matchops(odvp, lo_vnodeops)) {
893 			odvp = realvp(odvp);
894 		}
895 		if (odvp->v_vfsp != ndvp->v_vfsp)
896 			return (EXDEV);
897 	}
898 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr, ct, flags));
899 }
900 
901 static int
902 lo_mkdir(
903 	vnode_t *dvp,
904 	char *nm,
905 	struct vattr *va,
906 	vnode_t **vpp,
907 	struct cred *cr,
908 	caller_context_t *ct,
909 	int flags,
910 	vsecattr_t *vsecp)
911 {
912 	int error;
913 
914 #ifdef LODEBUG
915 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
916 #endif
917 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr, ct, flags, vsecp);
918 	if (!error)
919 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
920 	return (error);
921 }
922 
923 static int
924 lo_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
925 {
926 #ifdef LODEBUG
927 	lo_dprint(4, "lo_realvp %p\n", vp);
928 #endif
929 	while (vn_matchops(vp, lo_vnodeops))
930 		vp = realvp(vp);
931 
932 	if (VOP_REALVP(vp, vpp, ct) != 0)
933 		*vpp = vp;
934 	return (0);
935 }
936 
937 static int
938 lo_rmdir(
939 	vnode_t *dvp,
940 	char *nm,
941 	vnode_t *cdir,
942 	struct cred *cr,
943 	caller_context_t *ct,
944 	int flags)
945 {
946 	vnode_t *rvp = cdir;
947 
948 #ifdef LODEBUG
949 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
950 #endif
951 	/* if cdir is lofs vnode ptr get its real vnode ptr */
952 	if (vn_matchops(dvp, vn_getops(rvp)))
953 		(void) lo_realvp(cdir, &rvp, ct);
954 	dvp = realvp(dvp);
955 	return (VOP_RMDIR(dvp, nm, rvp, cr, ct, flags));
956 }
957 
958 static int
959 lo_symlink(
960 	vnode_t *dvp,
961 	char *lnm,
962 	struct vattr *tva,
963 	char *tnm,
964 	struct cred *cr,
965 	caller_context_t *ct,
966 	int flags)
967 {
968 #ifdef LODEBUG
969 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
970 #endif
971 	dvp = realvp(dvp);
972 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr, ct, flags));
973 }
974 
975 static int
976 lo_readlink(
977 	vnode_t *vp,
978 	struct uio *uiop,
979 	struct cred *cr,
980 	caller_context_t *ct)
981 {
982 	vp = realvp(vp);
983 	return (VOP_READLINK(vp, uiop, cr, ct));
984 }
985 
986 static int
987 lo_readdir(
988 	vnode_t *vp,
989 	struct uio *uiop,
990 	struct cred *cr,
991 	int *eofp,
992 	caller_context_t *ct,
993 	int flags)
994 {
995 #ifdef LODEBUG
996 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
997 #endif
998 	vp = realvp(vp);
999 	return (VOP_READDIR(vp, uiop, cr, eofp, ct, flags));
1000 }
1001 
1002 static int
1003 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
1004 {
1005 	vp = realvp(vp);
1006 	return (VOP_RWLOCK(vp, write_lock, ct));
1007 }
1008 
1009 static void
1010 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
1011 {
1012 	vp = realvp(vp);
1013 	VOP_RWUNLOCK(vp, write_lock, ct);
1014 }
1015 
1016 static int
1017 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1018 {
1019 	vp = realvp(vp);
1020 	return (VOP_SEEK(vp, ooff, noffp, ct));
1021 }
1022 
1023 static int
1024 lo_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
1025 {
1026 	while (vn_matchops(vp1, lo_vnodeops))
1027 		vp1 = realvp(vp1);
1028 	while (vn_matchops(vp2, lo_vnodeops))
1029 		vp2 = realvp(vp2);
1030 	return (VOP_CMP(vp1, vp2, ct));
1031 }
1032 
1033 static int
1034 lo_frlock(
1035 	vnode_t *vp,
1036 	int cmd,
1037 	struct flock64 *bfp,
1038 	int flag,
1039 	offset_t offset,
1040 	struct flk_callback *flk_cbp,
1041 	cred_t *cr,
1042 	caller_context_t *ct)
1043 {
1044 	vp = realvp(vp);
1045 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1046 }
1047 
1048 static int
1049 lo_space(
1050 	vnode_t *vp,
1051 	int cmd,
1052 	struct flock64 *bfp,
1053 	int flag,
1054 	offset_t offset,
1055 	struct cred *cr,
1056 	caller_context_t *ct)
1057 {
1058 	vp = realvp(vp);
1059 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
1060 }
1061 
1062 static int
1063 lo_getpage(
1064 	vnode_t *vp,
1065 	offset_t off,
1066 	size_t len,
1067 	uint_t *prot,
1068 	struct page *parr[],
1069 	size_t psz,
1070 	struct seg *seg,
1071 	caddr_t addr,
1072 	enum seg_rw rw,
1073 	struct cred *cr,
1074 	caller_context_t *ct)
1075 {
1076 	vp = realvp(vp);
1077 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
1078 	    ct));
1079 }
1080 
1081 static int
1082 lo_putpage(
1083 	vnode_t *vp,
1084 	offset_t off,
1085 	size_t len,
1086 	int flags,
1087 	struct cred *cr,
1088 	caller_context_t *ct)
1089 {
1090 	vp = realvp(vp);
1091 	return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
1092 }
1093 
1094 static int
1095 lo_map(
1096 	vnode_t *vp,
1097 	offset_t off,
1098 	struct as *as,
1099 	caddr_t *addrp,
1100 	size_t len,
1101 	uchar_t prot,
1102 	uchar_t maxprot,
1103 	uint_t flags,
1104 	struct cred *cr,
1105 	caller_context_t *ct)
1106 {
1107 	vp = realvp(vp);
1108 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
1109 }
1110 
1111 static int
1112 lo_addmap(
1113 	vnode_t *vp,
1114 	offset_t off,
1115 	struct as *as,
1116 	caddr_t addr,
1117 	size_t len,
1118 	uchar_t prot,
1119 	uchar_t maxprot,
1120 	uint_t flags,
1121 	struct cred *cr,
1122 	caller_context_t *ct)
1123 {
1124 	vp = realvp(vp);
1125 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1126 	    ct));
1127 }
1128 
1129 static int
1130 lo_delmap(
1131 	vnode_t *vp,
1132 	offset_t off,
1133 	struct as *as,
1134 	caddr_t addr,
1135 	size_t len,
1136 	uint_t prot,
1137 	uint_t maxprot,
1138 	uint_t flags,
1139 	struct cred *cr,
1140 	caller_context_t *ct)
1141 {
1142 	vp = realvp(vp);
1143 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1144 	    ct));
1145 }
1146 
1147 static int
1148 lo_poll(
1149 	vnode_t *vp,
1150 	short events,
1151 	int anyyet,
1152 	short *reventsp,
1153 	struct pollhead **phpp,
1154 	caller_context_t *ct)
1155 {
1156 	vp = realvp(vp);
1157 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
1158 }
1159 
1160 static int
1161 lo_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
1162     caller_context_t *ct)
1163 {
1164 	vp = realvp(vp);
1165 	return (VOP_DUMP(vp, addr, bn, count, ct));
1166 }
1167 
1168 static int
1169 lo_pathconf(
1170 	vnode_t *vp,
1171 	int cmd,
1172 	ulong_t *valp,
1173 	struct cred *cr,
1174 	caller_context_t *ct)
1175 {
1176 	vp = realvp(vp);
1177 	return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
1178 }
1179 
1180 static int
1181 lo_pageio(
1182 	vnode_t *vp,
1183 	struct page *pp,
1184 	u_offset_t io_off,
1185 	size_t io_len,
1186 	int flags,
1187 	cred_t *cr,
1188 	caller_context_t *ct)
1189 {
1190 	vp = realvp(vp);
1191 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
1192 }
1193 
1194 static void
1195 lo_dispose(
1196 	vnode_t *vp,
1197 	page_t *pp,
1198 	int fl,
1199 	int dn,
1200 	cred_t *cr,
1201 	caller_context_t *ct)
1202 {
1203 	vp = realvp(vp);
1204 	if (vp != NULL && !VN_ISKAS(vp))
1205 		VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
1206 }
1207 
1208 static int
1209 lo_setsecattr(
1210 	vnode_t *vp,
1211 	vsecattr_t *secattr,
1212 	int flags,
1213 	struct cred *cr,
1214 	caller_context_t *ct)
1215 {
1216 	if (vn_is_readonly(vp))
1217 		return (EROFS);
1218 	vp = realvp(vp);
1219 	return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
1220 }
1221 
1222 static int
1223 lo_getsecattr(
1224 	vnode_t *vp,
1225 	vsecattr_t *secattr,
1226 	int flags,
1227 	struct cred *cr,
1228 	caller_context_t *ct)
1229 {
1230 	vp = realvp(vp);
1231 	return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
1232 }
1233 
1234 static int
1235 lo_shrlock(
1236 	vnode_t *vp,
1237 	int cmd,
1238 	struct shrlock *shr,
1239 	int flag,
1240 	cred_t *cr,
1241 	caller_context_t *ct)
1242 {
1243 	vp = realvp(vp);
1244 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
1245 }
1246 
1247 /*
1248  * Loopback vnode operations vector.
1249  */
1250 
1251 struct vnodeops *lo_vnodeops;
1252 
1253 const fs_operation_def_t lo_vnodeops_template[] = {
1254 	VOPNAME_OPEN,		{ .vop_open = lo_open },
1255 	VOPNAME_CLOSE,		{ .vop_close = lo_close },
1256 	VOPNAME_READ,		{ .vop_read = lo_read },
1257 	VOPNAME_WRITE,		{ .vop_write = lo_write },
1258 	VOPNAME_IOCTL,		{ .vop_ioctl = lo_ioctl },
1259 	VOPNAME_SETFL,		{ .vop_setfl = lo_setfl },
1260 	VOPNAME_GETATTR,	{ .vop_getattr = lo_getattr },
1261 	VOPNAME_SETATTR,	{ .vop_setattr = lo_setattr },
1262 	VOPNAME_ACCESS,		{ .vop_access = lo_access },
1263 	VOPNAME_LOOKUP,		{ .vop_lookup = lo_lookup },
1264 	VOPNAME_CREATE,		{ .vop_create = lo_create },
1265 	VOPNAME_REMOVE,		{ .vop_remove = lo_remove },
1266 	VOPNAME_LINK,		{ .vop_link = lo_link },
1267 	VOPNAME_RENAME,		{ .vop_rename = lo_rename },
1268 	VOPNAME_MKDIR,		{ .vop_mkdir = lo_mkdir },
1269 	VOPNAME_RMDIR,		{ .vop_rmdir = lo_rmdir },
1270 	VOPNAME_READDIR,	{ .vop_readdir = lo_readdir },
1271 	VOPNAME_SYMLINK,	{ .vop_symlink = lo_symlink },
1272 	VOPNAME_READLINK,	{ .vop_readlink = lo_readlink },
1273 	VOPNAME_FSYNC,		{ .vop_fsync = lo_fsync },
1274 	VOPNAME_INACTIVE,	{ .vop_inactive = lo_inactive },
1275 	VOPNAME_FID,		{ .vop_fid = lo_fid },
1276 	VOPNAME_RWLOCK,		{ .vop_rwlock = lo_rwlock },
1277 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = lo_rwunlock },
1278 	VOPNAME_SEEK,		{ .vop_seek = lo_seek },
1279 	VOPNAME_CMP,		{ .vop_cmp = lo_cmp },
1280 	VOPNAME_FRLOCK,		{ .vop_frlock = lo_frlock },
1281 	VOPNAME_SPACE,		{ .vop_space = lo_space },
1282 	VOPNAME_REALVP,		{ .vop_realvp = lo_realvp },
1283 	VOPNAME_GETPAGE,	{ .vop_getpage = lo_getpage },
1284 	VOPNAME_PUTPAGE,	{ .vop_putpage = lo_putpage },
1285 	VOPNAME_MAP,		{ .vop_map = lo_map },
1286 	VOPNAME_ADDMAP,		{ .vop_addmap = lo_addmap },
1287 	VOPNAME_DELMAP,		{ .vop_delmap = lo_delmap },
1288 	VOPNAME_POLL,		{ .vop_poll = lo_poll },
1289 	VOPNAME_DUMP,		{ .vop_dump = lo_dump },
1290 	VOPNAME_DUMPCTL,	{ .error = fs_error },	/* XXX - why? */
1291 	VOPNAME_PATHCONF,	{ .vop_pathconf = lo_pathconf },
1292 	VOPNAME_PAGEIO,		{ .vop_pageio = lo_pageio },
1293 	VOPNAME_DISPOSE,	{ .vop_dispose = lo_dispose },
1294 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = lo_setsecattr },
1295 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = lo_getsecattr },
1296 	VOPNAME_SHRLOCK,	{ .vop_shrlock = lo_shrlock },
1297 	NULL,			NULL
1298 };
1299