xref: /titanic_52/usr/src/uts/common/fs/lofs/lofs_vnops.c (revision 672986541be54a7a471bb088e60780c37e371d7e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/errno.h>
31 #include <sys/vnode.h>
32 #include <sys/vfs.h>
33 #include <sys/vfs_opreg.h>
34 #include <sys/uio.h>
35 #include <sys/cred.h>
36 #include <sys/pathname.h>
37 #include <sys/debug.h>
38 #include <sys/fs/lofs_node.h>
39 #include <sys/fs/lofs_info.h>
40 #include <fs/fs_subr.h>
41 #include <vm/as.h>
42 #include <vm/seg.h>
43 
44 /*
45  * These are the vnode ops routines which implement the vnode interface to
46  * the looped-back file system.  These routines just take their parameters,
47  * and then calling the appropriate real vnode routine(s) to do the work.
48  */
49 
50 static int
51 lo_open(vnode_t **vpp, int flag, struct cred *cr)
52 {
53 	vnode_t *vp = *vpp;
54 	vnode_t *rvp;
55 	vnode_t *oldvp;
56 	int error;
57 
58 #ifdef LODEBUG
59 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
60 		vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
61 #endif
62 
63 	oldvp = vp;
64 	vp = rvp = realvp(vp);
65 	/*
66 	 * Need to hold new reference to vp since VOP_OPEN() may
67 	 * decide to release it.
68 	 */
69 	VN_HOLD(vp);
70 	error = VOP_OPEN(&rvp, flag, cr);
71 
72 	if (!error && rvp != vp) {
73 		/*
74 		 * the FS which we called should have released the
75 		 * new reference on vp
76 		 */
77 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
78 		if ((*vpp)->v_type == VDIR) {
79 			/*
80 			 * Copy over any looping flags to the new lnode.
81 			 */
82 			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
83 		}
84 		if (IS_DEVVP(*vpp)) {
85 			vnode_t *svp;
86 
87 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
88 			VN_RELE(*vpp);
89 			if (svp == NULL)
90 				error = ENOSYS;
91 			else
92 				*vpp = svp;
93 		}
94 		VN_RELE(oldvp);
95 	} else {
96 		ASSERT(rvp->v_count > 1);
97 		VN_RELE(rvp);
98 	}
99 
100 	return (error);
101 }
102 
103 static int
104 lo_close(
105 	vnode_t *vp,
106 	int flag,
107 	int count,
108 	offset_t offset,
109 	struct cred *cr)
110 {
111 #ifdef LODEBUG
112 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
113 #endif
114 	vp = realvp(vp);
115 	return (VOP_CLOSE(vp, flag, count, offset, cr));
116 }
117 
118 static int
119 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
120 	caller_context_t *ct)
121 {
122 #ifdef LODEBUG
123 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
124 #endif
125 	vp = realvp(vp);
126 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
127 }
128 
129 static int
130 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
131 	caller_context_t *ct)
132 {
133 #ifdef LODEBUG
134 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
135 #endif
136 	vp = realvp(vp);
137 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
138 }
139 
140 static int
141 lo_ioctl(
142 	vnode_t *vp,
143 	int cmd,
144 	intptr_t arg,
145 	int flag,
146 	struct cred *cr,
147 	int *rvalp)
148 {
149 #ifdef LODEBUG
150 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
151 #endif
152 	vp = realvp(vp);
153 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp));
154 }
155 
156 static int
157 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
158 {
159 	vp = realvp(vp);
160 	return (VOP_SETFL(vp, oflags, nflags, cr));
161 }
162 
163 static int
164 lo_getattr(
165 	vnode_t *vp,
166 	struct vattr *vap,
167 	int flags,
168 	struct cred *cr)
169 {
170 	int error;
171 
172 #ifdef LODEBUG
173 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
174 #endif
175 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr))
176 		return (error);
177 
178 	return (0);
179 }
180 
181 static int
182 lo_setattr(
183 	vnode_t *vp,
184 	struct vattr *vap,
185 	int flags,
186 	struct cred *cr,
187 	caller_context_t *ct)
188 {
189 #ifdef LODEBUG
190 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
191 #endif
192 	vp = realvp(vp);
193 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
194 }
195 
196 static int
197 lo_access(vnode_t *vp, int mode, int flags, struct cred *cr)
198 {
199 #ifdef LODEBUG
200 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
201 #endif
202 	if (mode & VWRITE) {
203 		if (vp->v_type == VREG && vn_is_readonly(vp))
204 			return (EROFS);
205 	}
206 	vp = realvp(vp);
207 	return (VOP_ACCESS(vp, mode, flags, cr));
208 }
209 
210 static int
211 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr)
212 {
213 #ifdef LODEBUG
214 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
215 #endif
216 	vp = realvp(vp);
217 	return (VOP_FSYNC(vp, syncflag, cr));
218 }
219 
220 /*ARGSUSED*/
221 static void
222 lo_inactive(vnode_t *vp, struct cred *cr)
223 {
224 #ifdef LODEBUG
225 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
226 #endif
227 	freelonode(vtol(vp));
228 }
229 
230 /* ARGSUSED */
231 static int
232 lo_fid(vnode_t *vp, struct fid *fidp)
233 {
234 #ifdef LODEBUG
235 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
236 #endif
237 	vp = realvp(vp);
238 	return (VOP_FID(vp, fidp));
239 }
240 
241 /*
242  * Given a vnode of lofs type, lookup nm name and
243  * return a shadow vnode (of lofs type) of the
244  * real vnode found.
245  *
246  * Due to the nature of lofs, there is a potential
247  * looping in path traversal.
248  *
249  * starting from the mount point of an lofs;
250  * a loop is defined to be a traversal path
251  * where the mount point or the real vnode of
252  * the root of this lofs is encountered twice.
253  * Once at the start of traversal and second
254  * when the looping is found.
255  *
256  * When a loop is encountered, a shadow of the
257  * covered vnode is returned to stop the looping.
258  *
259  * This normally works, but with the advent of
260  * the new automounter, returning the shadow of the
261  * covered vnode (autonode, in this case) does not
262  * stop the loop.  Because further lookup on this
263  * lonode will cause the autonode to call lo_lookup()
264  * on the lonode covering it.
265  *
266  * example "/net/jurassic/net/jurassic" is a loop.
267  * returning the shadow of the autonode corresponding to
268  * "/net/jurassic/net/jurassic" will not terminate the
269  * loop.   To solve this problem we allow the loop to go
270  * through one more level component lookup.  Whichever
271  * directory is then looked up in "/net/jurassic/net/jurassic"
272  * the vnode returned is the vnode covered by the autonode
273  * "net" and this will terminate the loop.
274  *
275  * Lookup for dot dot has to be dealt with separately.
276  * It will be nice to have a "one size fits all" kind
277  * of solution, so that we don't have so many ifs statement
278  * in the lo_lookup() to handle dotdot.  But, since
279  * there are so many special cases to handle different
280  * kinds looping above, we need special codes to handle
281  * dotdot lookup as well.
282  */
283 static int
284 lo_lookup(
285 	vnode_t *dvp,
286 	char *nm,
287 	vnode_t **vpp,
288 	struct pathname *pnp,
289 	int flags,
290 	vnode_t *rdir,
291 	struct cred *cr)
292 {
293 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
294 	int error, is_indirectloop;
295 	vnode_t *realdvp = realvp(dvp);
296 	struct loinfo *li = vtoli(dvp->v_vfsp);
297 	int looping = 0;
298 	int autoloop = 0;
299 	int doingdotdot = 0;
300 	int nosub = 0;
301 	int mkflag = 0;
302 
303 	/*
304 	 * If name is empty and no XATTR flags are set, then return
305 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
306 	 * then we need to call VOP_LOOKUP to get the xattr dir.
307 	 */
308 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
309 		VN_HOLD(dvp);
310 		*vpp = dvp;
311 		return (0);
312 	}
313 
314 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
315 		doingdotdot++;
316 		/*
317 		 * Handle ".." out of mounted filesystem
318 		 */
319 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
320 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
321 			ASSERT(realdvp != NULL);
322 		}
323 	}
324 
325 	*vpp = NULL;	/* default(error) case */
326 
327 	/*
328 	 * Do the normal lookup
329 	 */
330 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr)) {
331 		vp = NULL;
332 		goto out;
333 	}
334 
335 	/*
336 	 * We do this check here to avoid returning a stale file handle to the
337 	 * caller.
338 	 */
339 	if (nm[0] == '.' && nm[1] == '\0') {
340 		ASSERT(vp == realdvp);
341 		VN_HOLD(dvp);
342 		VN_RELE(vp);
343 		*vpp = dvp;
344 		return (0);
345 	}
346 
347 	if (doingdotdot) {
348 		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
349 			vfs_t *vfsp;
350 
351 			error = vn_vfsrlock_wait(realdvp);
352 			if (error)
353 				goto out;
354 			vfsp = vn_mountedvfs(realdvp);
355 			/*
356 			 * In the standard case if the looping flag is set and
357 			 * performing dotdot we would be returning from a
358 			 * covered vnode, implying vfsp could not be null. The
359 			 * exceptions being if we have looping and overlay
360 			 * mounts or looping and covered file systems.
361 			 */
362 			if (vfsp == NULL) {
363 				/*
364 				 * Overlay mount or covered file system,
365 				 * so just make the shadow node.
366 				 */
367 				vn_vfsunlock(realdvp);
368 				*vpp = makelonode(vp, li, 0);
369 				(vtol(*vpp))->lo_looping |= LO_LOOPING;
370 				return (0);
371 			}
372 			/*
373 			 * When looping get the actual found vnode
374 			 * instead of the vnode covered.
375 			 * Here we have to hold the lock for realdvp
376 			 * since an unmount during the traversal to the
377 			 * root vnode would turn *vfsp into garbage
378 			 * which would be fatal.
379 			 */
380 			error = VFS_ROOT(vfsp, &tvp);
381 			vn_vfsunlock(realdvp);
382 
383 			if (error)
384 				goto out;
385 
386 			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
387 				/*
388 				 * we're back at the real vnode
389 				 * of the rootvp
390 				 *
391 				 * return the rootvp
392 				 * Ex: /mnt/mnt/..
393 				 * where / has been lofs-mounted
394 				 * onto /mnt.  Return the lofs
395 				 * node mounted at /mnt.
396 				 */
397 				*vpp = tvp;
398 				VN_RELE(vp);
399 				return (0);
400 			} else {
401 				/*
402 				 * We are returning from a covered
403 				 * node whose vfs_mountedhere is
404 				 * not pointing to vfs of the current
405 				 * root vnode.
406 				 * This is a condn where in we
407 				 * returned a covered node say Zc
408 				 * but Zc is not the cover of current
409 				 * root.
410 				 * i.e.., if X is the root vnode
411 				 * lookup(Zc,"..") is taking us to
412 				 * X.
413 				 * Ex: /net/X/net/X/Y
414 				 *
415 				 * If LO_AUTOLOOP (autofs/lofs looping detected)
416 				 * has been set then we are encountering the
417 				 * cover of Y (Y being any directory vnode
418 				 * under /net/X/net/X/).
419 				 * When performing a dotdot set the
420 				 * returned vp to the vnode covered
421 				 * by the mounted lofs, ie /net/X/net/X
422 				 */
423 				VN_RELE(tvp);
424 				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
425 					VN_RELE(vp);
426 					vp = li->li_rootvp;
427 					vp = vp->v_vfsp->vfs_vnodecovered;
428 					VN_HOLD(vp);
429 					*vpp = makelonode(vp, li, 0);
430 					(vtol(*vpp))->lo_looping |= LO_LOOPING;
431 					return (0);
432 				}
433 			}
434 		} else {
435 			/*
436 			 * No frills just make the shadow node.
437 			 */
438 			*vpp = makelonode(vp, li, 0);
439 			return (0);
440 		}
441 	}
442 
443 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
444 
445 	/*
446 	 * If this vnode is mounted on, then we
447 	 * traverse to the vnode which is the root of
448 	 * the mounted file system.
449 	 */
450 	if (!nosub && (error = traverse(&vp)))
451 		goto out;
452 
453 	/*
454 	 * Make a lnode for the real vnode.
455 	 */
456 	if (vp->v_type != VDIR || nosub) {
457 		*vpp = makelonode(vp, li, 0);
458 		if (IS_DEVVP(*vpp)) {
459 			vnode_t *svp;
460 
461 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
462 			VN_RELE(*vpp);
463 			if (svp == NULL)
464 				error = ENOSYS;
465 			else
466 				*vpp = svp;
467 		}
468 		return (error);
469 	}
470 
471 	/*
472 	 * if the found vnode (vp) is not of type lofs
473 	 * then we're just going to make a shadow of that
474 	 * vp and get out.
475 	 *
476 	 * If the found vnode (vp) is of lofs type, and
477 	 * we're not doing dotdot, check if we are
478 	 * looping.
479 	 */
480 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
481 		/*
482 		 * Check if we're looping, i.e.
483 		 * vp equals the root vp of the lofs, directly
484 		 * or indirectly, return the covered node.
485 		 */
486 
487 		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
488 			if (vp == li->li_rootvp) {
489 				/*
490 				 * Direct looping condn.
491 				 * Ex:- X is / mounted directory so lookup of
492 				 * /X/X is a direct looping condn.
493 				 */
494 				tvp = vp;
495 				vp = vp->v_vfsp->vfs_vnodecovered;
496 				VN_HOLD(vp);
497 				VN_RELE(tvp);
498 				looping++;
499 			} else {
500 				/*
501 				 * Indirect looping can be defined as
502 				 * real lookup returning rootvp of the current
503 				 * tree in any level of recursion.
504 				 *
505 				 * This check is useful if there are multiple
506 				 * levels of lofs indirections. Suppose vnode X
507 				 * in the current lookup has as its real vnode
508 				 * another lofs node. Y = realvp(X) Y should be
509 				 * a lofs node for the check to continue or Y
510 				 * is not the rootvp of X.
511 				 * Ex:- say X and Y are two vnodes
512 				 * say real(Y) is X and real(X) is Z
513 				 * parent vnode for X and Y is Z
514 				 * lookup(Y,"path") say we are looking for Y
515 				 * again under Y and we have to return Yc.
516 				 * but the lookup of Y under Y doesnot return
517 				 * Y the root vnode again here is why.
518 				 * 1. lookup(Y,"path of Y") will go to
519 				 * 2. lookup(real(Y),"path of Y") and then to
520 				 * 3. lookup(real(X),"path of Y").
521 				 * and now what lookup level 1 sees is the
522 				 * outcome of 2 but the vnode Y is due to
523 				 * lookup(Z,"path of Y") so we have to skip
524 				 * intermediate levels to find if in any level
525 				 * there is a looping.
526 				 */
527 				is_indirectloop = 0;
528 				nonlovp = vp;
529 				while (
530 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
531 				    !(is_indirectloop)) {
532 					if (li->li_rootvp  == nonlovp) {
533 						is_indirectloop++;
534 						break;
535 					}
536 					nonlovp = realvp(nonlovp);
537 				}
538 
539 				if (is_indirectloop) {
540 					VN_RELE(vp);
541 					vp = nonlovp;
542 					vp = vp->v_vfsp->vfs_vnodecovered;
543 					VN_HOLD(vp);
544 					looping++;
545 				}
546 			}
547 		} else {
548 			/*
549 			 * come here only because of the interaction between
550 			 * the autofs and lofs.
551 			 *
552 			 * Lookup of "/net/X/net/X" will return a shadow of
553 			 * an autonode X_a which we call X_l.
554 			 *
555 			 * Lookup of anything under X_l, will trigger a call to
556 			 * auto_lookup(X_a,nm) which will eventually call
557 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
558 			 * the current lofs.
559 			 *
560 			 * We come here only when we are called with X_l as dvp
561 			 * and look for something underneath.
562 			 *
563 			 * Now that an autofs/lofs looping condition has been
564 			 * identified any directory vnode contained within
565 			 * dvp will be set to the vnode covered by the
566 			 * mounted autofs. Thus all directories within dvp
567 			 * will appear empty hence teminating the looping.
568 			 * The LO_AUTOLOOP flag is set on the returned lonode
569 			 * to indicate the termination of the autofs/lofs
570 			 * looping. This is required for the correct behaviour
571 			 * when performing a dotdot.
572 			 */
573 			realdvp = realvp(dvp);
574 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
575 				realdvp = realvp(realdvp);
576 			}
577 
578 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
579 			if (error)
580 				goto out;
581 			/*
582 			 * tvp now contains the rootvp of the vfs of the
583 			 * real vnode of dvp. The directory vnode vp is set
584 			 * to the covered vnode to terminate looping. No
585 			 * distinction is made between any vp as all directory
586 			 * vnodes contained in dvp are returned as the covered
587 			 * vnode.
588 			 */
589 			VN_RELE(vp);
590 			vp = tvp;	/* possibly is an autonode */
591 
592 			/*
593 			 * Need to find the covered vnode
594 			 */
595 			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
596 				/*
597 				 * We don't have a covered vnode so this isn't
598 				 * an autonode. To find the autonode simply
599 				 * find the vnode covered by the lofs rootvp.
600 				 */
601 				vp = li->li_rootvp;
602 				vp = vp->v_vfsp->vfs_vnodecovered;
603 				VN_RELE(tvp);
604 				error = VFS_ROOT(vp->v_vfsp, &tvp);
605 				if (error)
606 					goto out;
607 				vp = tvp;	/* now this is an autonode */
608 				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
609 					/*
610 					 * Still can't find a covered vnode.
611 					 * Fail the lookup, or we'd loop.
612 					 */
613 					error = ENOENT;
614 					goto out;
615 				}
616 			}
617 			vp = vp->v_vfsp->vfs_vnodecovered;
618 			VN_HOLD(vp);
619 			VN_RELE(tvp);
620 			/*
621 			 * Force the creation of a new lnode even if the hash
622 			 * table contains a lnode that references this vnode.
623 			 */
624 			mkflag = LOF_FORCE;
625 			autoloop++;
626 		}
627 	}
628 	*vpp = makelonode(vp, li, mkflag);
629 
630 	if ((looping) ||
631 	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
632 		(vtol(*vpp))->lo_looping |= LO_LOOPING;
633 	}
634 
635 	if (autoloop) {
636 		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
637 	}
638 
639 out:
640 	if (error != 0 && vp != NULL)
641 		VN_RELE(vp);
642 #ifdef LODEBUG
643 	lo_dprint(4,
644 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
645 		dvp, realvp(dvp), nm, *vpp, vp, error);
646 #endif
647 	return (error);
648 }
649 
650 /*ARGSUSED*/
651 static int
652 lo_create(
653 	vnode_t *dvp,
654 	char *nm,
655 	struct vattr *va,
656 	enum vcexcl exclusive,
657 	int mode,
658 	vnode_t **vpp,
659 	struct cred *cr,
660 	int flag)
661 {
662 	int error;
663 	vnode_t *vp = NULL;
664 
665 #ifdef LODEBUG
666 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
667 #endif
668 	if (*nm == '\0') {
669 		ASSERT(vpp && dvp == *vpp);
670 		vp = realvp(*vpp);
671 	}
672 
673 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag);
674 	if (!error) {
675 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
676 		if (IS_DEVVP(*vpp)) {
677 			vnode_t *svp;
678 
679 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
680 			VN_RELE(*vpp);
681 			if (svp == NULL)
682 				error = ENOSYS;
683 			else
684 				*vpp = svp;
685 		}
686 	}
687 	return (error);
688 }
689 
690 static int
691 lo_remove(vnode_t *dvp, char *nm, struct cred *cr)
692 {
693 #ifdef LODEBUG
694 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
695 #endif
696 	dvp = realvp(dvp);
697 	return (VOP_REMOVE(dvp, nm, cr));
698 }
699 
700 static int
701 lo_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr)
702 {
703 	vnode_t *realvp;
704 
705 #ifdef LODEBUG
706 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
707 #endif
708 
709 	/*
710 	 * The source and destination vnodes may be in different lofs
711 	 * filesystems sharing the same underlying filesystem, so we need to
712 	 * make sure that the filesystem containing the source vnode is not
713 	 * mounted read-only (vn_link() has already checked the target vnode).
714 	 *
715 	 * In a situation such as:
716 	 *
717 	 * /data	- regular filesystem
718 	 * /foo		- lofs mount of /data/foo
719 	 * /bar		- read-only lofs mount of /data/bar
720 	 *
721 	 * This disallows a link from /bar/somefile to /foo/somefile,
722 	 * which would otherwise allow changes to somefile on the read-only
723 	 * mounted /bar.
724 	 */
725 
726 	if (vn_is_readonly(vp)) {
727 		return (EROFS);
728 	}
729 	while (vn_matchops(vp, lo_vnodeops)) {
730 		vp = realvp(vp);
731 	}
732 
733 	/*
734 	 * In the case where the source vnode is on another stacking
735 	 * filesystem (such as specfs), the loop above will
736 	 * terminate before finding the true underlying vnode.
737 	 *
738 	 * We use VOP_REALVP here to continue the search.
739 	 */
740 	if (VOP_REALVP(vp, &realvp) == 0)
741 		vp = realvp;
742 
743 	while (vn_matchops(tdvp, lo_vnodeops)) {
744 		tdvp = realvp(tdvp);
745 	}
746 	if (vp->v_vfsp != tdvp->v_vfsp)
747 		return (EXDEV);
748 	return (VOP_LINK(tdvp, vp, tnm, cr));
749 }
750 
751 static int
752 lo_rename(
753 	vnode_t *odvp,
754 	char *onm,
755 	vnode_t *ndvp,
756 	char *nnm,
757 	struct cred *cr)
758 {
759 	vnode_t *tnvp;
760 
761 #ifdef LODEBUG
762 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
763 #endif
764 	/*
765 	 * If we are coming from a loop back mounted fs, that has been
766 	 * mounted in the same filesystem as where we want to move to,
767 	 * and that filesystem is read/write, but the lofs filesystem is
768 	 * read only, we don't want to allow a rename of the file. The
769 	 * vn_rename code checks to be sure the target is read/write already
770 	 * so that is not necessary here. However, consider the following
771 	 * example:
772 	 *		/ - regular root fs
773 	 *		/foo - directory in root
774 	 *		/foo/bar - file in foo directory(in root fs)
775 	 *		/baz - directory in root
776 	 *		mount -F lofs -o ro /foo /baz - all still in root
777 	 *			directory
778 	 * The fact that we mounted /foo on /baz read only should stop us
779 	 * from renaming the file /foo/bar /bar, but it doesn't since
780 	 * / is read/write. We are still renaming here since we are still
781 	 * in the same filesystem, it is just that we do not check to see
782 	 * if the filesystem we are coming from in this case is read only.
783 	 */
784 	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
785 		return (EROFS);
786 	/*
787 	 * We need to make sure we're not trying to remove a mount point for a
788 	 * filesystem mounted on top of lofs, which only we know about.
789 	 */
790 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
791 		goto rename;
792 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr) != 0)
793 		goto rename;
794 	if (tnvp->v_type != VDIR) {
795 		VN_RELE(tnvp);
796 		goto rename;
797 	}
798 	if (vn_mountedvfs(tnvp)) {
799 		VN_RELE(tnvp);
800 		return (EBUSY);
801 	}
802 	VN_RELE(tnvp);
803 rename:
804 	/*
805 	 * Since the case we're dealing with above can happen at any layer in
806 	 * the stack of lofs filesystems, we need to recurse down the stack,
807 	 * checking to see if there are any instances of a filesystem mounted on
808 	 * top of lofs. In order to keep on using the lofs version of
809 	 * VOP_RENAME(), we make sure that while the target directory is of type
810 	 * lofs, the source directory (the one used for getting the fs-specific
811 	 * version of VOP_RENAME()) is also of type lofs.
812 	 */
813 	if (vn_matchops(ndvp, lo_vnodeops)) {
814 		ndvp = realvp(ndvp);	/* Check the next layer */
815 	} else {
816 		/*
817 		 * We can go fast here
818 		 */
819 		while (vn_matchops(odvp, lo_vnodeops)) {
820 			odvp = realvp(odvp);
821 		}
822 		if (odvp->v_vfsp != ndvp->v_vfsp)
823 			return (EXDEV);
824 	}
825 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr));
826 }
827 
828 static int
829 lo_mkdir(
830 	vnode_t *dvp,
831 	char *nm,
832 	struct vattr *va,
833 	vnode_t **vpp,
834 	struct cred *cr)
835 {
836 	int error;
837 
838 #ifdef LODEBUG
839 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
840 #endif
841 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr);
842 	if (!error)
843 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
844 	return (error);
845 }
846 
847 static int
848 lo_realvp(vnode_t *vp, vnode_t **vpp)
849 {
850 #ifdef LODEBUG
851 	lo_dprint(4, "lo_realvp %p\n", vp);
852 #endif
853 	while (vn_matchops(vp, lo_vnodeops))
854 		vp = realvp(vp);
855 
856 	if (VOP_REALVP(vp, vpp) != 0)
857 		*vpp = vp;
858 	return (0);
859 }
860 
861 static int
862 lo_rmdir(
863 	vnode_t *dvp,
864 	char *nm,
865 	vnode_t *cdir,
866 	struct cred *cr)
867 {
868 	vnode_t *rvp = cdir;
869 
870 #ifdef LODEBUG
871 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
872 #endif
873 	/* if cdir is lofs vnode ptr get its real vnode ptr */
874 	if (vn_matchops(dvp, vn_getops(rvp)))
875 		(void) lo_realvp(cdir, &rvp);
876 	dvp = realvp(dvp);
877 	return (VOP_RMDIR(dvp, nm, rvp, cr));
878 }
879 
880 static int
881 lo_symlink(
882 	vnode_t *dvp,
883 	char *lnm,
884 	struct vattr *tva,
885 	char *tnm,
886 	struct cred *cr)
887 {
888 #ifdef LODEBUG
889 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
890 #endif
891 	dvp = realvp(dvp);
892 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr));
893 }
894 
895 static int
896 lo_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr)
897 {
898 	vp = realvp(vp);
899 	return (VOP_READLINK(vp, uiop, cr));
900 }
901 
902 static int
903 lo_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp)
904 {
905 #ifdef LODEBUG
906 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
907 #endif
908 	vp = realvp(vp);
909 	return (VOP_READDIR(vp, uiop, cr, eofp));
910 }
911 
912 static int
913 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
914 {
915 	vp = realvp(vp);
916 	return (VOP_RWLOCK(vp, write_lock, ct));
917 }
918 
919 static void
920 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
921 {
922 	vp = realvp(vp);
923 	VOP_RWUNLOCK(vp, write_lock, ct);
924 }
925 
926 static int
927 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
928 {
929 	vp = realvp(vp);
930 	return (VOP_SEEK(vp, ooff, noffp));
931 }
932 
933 static int
934 lo_cmp(vnode_t *vp1, vnode_t *vp2)
935 {
936 	while (vn_matchops(vp1, lo_vnodeops))
937 		vp1 = realvp(vp1);
938 	while (vn_matchops(vp2, lo_vnodeops))
939 		vp2 = realvp(vp2);
940 	return (VOP_CMP(vp1, vp2));
941 }
942 
943 static int
944 lo_frlock(
945 	vnode_t *vp,
946 	int cmd,
947 	struct flock64 *bfp,
948 	int flag,
949 	offset_t offset,
950 	struct flk_callback *flk_cbp,
951 	cred_t *cr)
952 {
953 	vp = realvp(vp);
954 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr));
955 }
956 
957 static int
958 lo_space(
959 	vnode_t *vp,
960 	int cmd,
961 	struct flock64 *bfp,
962 	int flag,
963 	offset_t offset,
964 	struct cred *cr,
965 	caller_context_t *ct)
966 {
967 	vp = realvp(vp);
968 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
969 }
970 
971 static int
972 lo_getpage(
973 	vnode_t *vp,
974 	offset_t off,
975 	size_t len,
976 	uint_t *prot,
977 	struct page *parr[],
978 	size_t psz,
979 	struct seg *seg,
980 	caddr_t addr,
981 	enum seg_rw rw,
982 	struct cred *cr)
983 {
984 	vp = realvp(vp);
985 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr));
986 }
987 
988 static int
989 lo_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr)
990 {
991 	vp = realvp(vp);
992 	return (VOP_PUTPAGE(vp, off, len, flags, cr));
993 }
994 
995 static int
996 lo_map(
997 	vnode_t *vp,
998 	offset_t off,
999 	struct as *as,
1000 	caddr_t *addrp,
1001 	size_t len,
1002 	uchar_t prot,
1003 	uchar_t maxprot,
1004 	uint_t flags,
1005 	struct cred *cr)
1006 {
1007 	vp = realvp(vp);
1008 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr));
1009 }
1010 
1011 static int
1012 lo_addmap(
1013 	vnode_t *vp,
1014 	offset_t off,
1015 	struct as *as,
1016 	caddr_t addr,
1017 	size_t len,
1018 	uchar_t prot,
1019 	uchar_t maxprot,
1020 	uint_t flags,
1021 	struct cred *cr)
1022 {
1023 	vp = realvp(vp);
1024 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1025 }
1026 
1027 static int
1028 lo_delmap(
1029 	vnode_t *vp,
1030 	offset_t off,
1031 	struct as *as,
1032 	caddr_t addr,
1033 	size_t len,
1034 	uint_t prot,
1035 	uint_t maxprot,
1036 	uint_t flags,
1037 	struct cred *cr)
1038 {
1039 	vp = realvp(vp);
1040 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1041 }
1042 
1043 static int
1044 lo_poll(
1045 	vnode_t *vp,
1046 	short events,
1047 	int anyyet,
1048 	short *reventsp,
1049 	struct pollhead **phpp)
1050 {
1051 	vp = realvp(vp);
1052 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp));
1053 }
1054 
1055 static int
1056 lo_dump(vnode_t *vp, caddr_t addr, int bn, int count)
1057 {
1058 	vp = realvp(vp);
1059 	return (VOP_DUMP(vp, addr, bn, count));
1060 }
1061 
1062 static int
1063 lo_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr)
1064 {
1065 	vp = realvp(vp);
1066 	return (VOP_PATHCONF(vp, cmd, valp, cr));
1067 }
1068 
1069 static int
1070 lo_pageio(
1071 	vnode_t *vp,
1072 	struct page *pp,
1073 	u_offset_t io_off,
1074 	size_t io_len,
1075 	int flags,
1076 	cred_t *cr)
1077 {
1078 	vp = realvp(vp);
1079 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr));
1080 }
1081 
1082 static void
1083 lo_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
1084 {
1085 	vp = realvp(vp);
1086 	if (vp != NULL && !VN_ISKAS(vp))
1087 		VOP_DISPOSE(vp, pp, fl, dn, cr);
1088 }
1089 
1090 static int
1091 lo_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1092 {
1093 	if (vn_is_readonly(vp))
1094 		return (EROFS);
1095 	vp = realvp(vp);
1096 	return (VOP_SETSECATTR(vp, secattr, flags, cr));
1097 }
1098 
1099 static int
1100 lo_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1101 {
1102 	vp = realvp(vp);
1103 	return (VOP_GETSECATTR(vp, secattr, flags, cr));
1104 }
1105 
1106 static int
1107 lo_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
1108 {
1109 	vp = realvp(vp);
1110 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr));
1111 }
1112 
1113 /*
1114  * Loopback vnode operations vector.
1115  */
1116 
1117 struct vnodeops *lo_vnodeops;
1118 
1119 const fs_operation_def_t lo_vnodeops_template[] = {
1120 	VOPNAME_OPEN,		{ .vop_open = lo_open },
1121 	VOPNAME_CLOSE,		{ .vop_close = lo_close },
1122 	VOPNAME_READ,		{ .vop_read = lo_read },
1123 	VOPNAME_WRITE,		{ .vop_write = lo_write },
1124 	VOPNAME_IOCTL,		{ .vop_ioctl = lo_ioctl },
1125 	VOPNAME_SETFL,		{ .vop_setfl = lo_setfl },
1126 	VOPNAME_GETATTR,	{ .vop_getattr = lo_getattr },
1127 	VOPNAME_SETATTR,	{ .vop_setattr = lo_setattr },
1128 	VOPNAME_ACCESS,		{ .vop_access = lo_access },
1129 	VOPNAME_LOOKUP,		{ .vop_lookup = lo_lookup },
1130 	VOPNAME_CREATE,		{ .vop_create = lo_create },
1131 	VOPNAME_REMOVE,		{ .vop_remove = lo_remove },
1132 	VOPNAME_LINK,		{ .vop_link = lo_link },
1133 	VOPNAME_RENAME,		{ .vop_rename = lo_rename },
1134 	VOPNAME_MKDIR,		{ .vop_mkdir = lo_mkdir },
1135 	VOPNAME_RMDIR,		{ .vop_rmdir = lo_rmdir },
1136 	VOPNAME_READDIR,	{ .vop_readdir = lo_readdir },
1137 	VOPNAME_SYMLINK,	{ .vop_symlink = lo_symlink },
1138 	VOPNAME_READLINK,	{ .vop_readlink = lo_readlink },
1139 	VOPNAME_FSYNC,		{ .vop_fsync = lo_fsync },
1140 	VOPNAME_INACTIVE,	{ .vop_inactive = lo_inactive },
1141 	VOPNAME_FID,		{ .vop_fid = lo_fid },
1142 	VOPNAME_RWLOCK,		{ .vop_rwlock = lo_rwlock },
1143 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = lo_rwunlock },
1144 	VOPNAME_SEEK,		{ .vop_seek = lo_seek },
1145 	VOPNAME_CMP,		{ .vop_cmp = lo_cmp },
1146 	VOPNAME_FRLOCK,		{ .vop_frlock = lo_frlock },
1147 	VOPNAME_SPACE,		{ .vop_space = lo_space },
1148 	VOPNAME_REALVP,		{ .vop_realvp = lo_realvp },
1149 	VOPNAME_GETPAGE,	{ .vop_getpage = lo_getpage },
1150 	VOPNAME_PUTPAGE,	{ .vop_putpage = lo_putpage },
1151 	VOPNAME_MAP,		{ .vop_map = lo_map },
1152 	VOPNAME_ADDMAP,		{ .vop_addmap = lo_addmap },
1153 	VOPNAME_DELMAP,		{ .vop_delmap = lo_delmap },
1154 	VOPNAME_POLL,		{ .vop_poll = lo_poll },
1155 	VOPNAME_DUMP,		{ .vop_dump = lo_dump },
1156 	VOPNAME_DUMPCTL,	{ .error = fs_error },	/* XXX - why? */
1157 	VOPNAME_PATHCONF,	{ .vop_pathconf = lo_pathconf },
1158 	VOPNAME_PAGEIO,		{ .vop_pageio = lo_pageio },
1159 	VOPNAME_DISPOSE,	{ .vop_dispose = lo_dispose },
1160 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = lo_setsecattr },
1161 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = lo_getsecattr },
1162 	VOPNAME_SHRLOCK,	{ .vop_shrlock = lo_shrlock },
1163 	NULL,			NULL
1164 };
1165