xref: /titanic_44/usr/src/uts/common/fs/lofs/lofs_vnops.c (revision 40e5e17b3361b3eea56a9723071c406894a20b78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/errno.h>
31 #include <sys/vnode.h>
32 #include <sys/vfs.h>
33 #include <sys/uio.h>
34 #include <sys/cred.h>
35 #include <sys/pathname.h>
36 #include <sys/debug.h>
37 #include <sys/fs/lofs_node.h>
38 #include <sys/fs/lofs_info.h>
39 #include <fs/fs_subr.h>
40 #include <vm/as.h>
41 #include <vm/seg.h>
42 
43 /*
44  * These are the vnode ops routines which implement the vnode interface to
45  * the looped-back file system.  These routines just take their parameters,
46  * and then calling the appropriate real vnode routine(s) to do the work.
47  */
48 
49 static int
50 lo_open(vnode_t **vpp, int flag, struct cred *cr)
51 {
52 	vnode_t *vp = *vpp;
53 	vnode_t *rvp;
54 	vnode_t *oldvp;
55 	int error;
56 
57 #ifdef LODEBUG
58 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
59 		vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
60 #endif
61 
62 	oldvp = vp;
63 	vp = rvp = realvp(vp);
64 	/*
65 	 * Need to hold new reference to vp since VOP_OPEN() may
66 	 * decide to release it.
67 	 */
68 	VN_HOLD(vp);
69 	error = VOP_OPEN(&rvp, flag, cr);
70 
71 	if (!error && rvp != vp) {
72 		/*
73 		 * the FS which we called should have released the
74 		 * new reference on vp
75 		 */
76 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
77 		if ((*vpp)->v_type == VDIR) {
78 			/*
79 			 * Copy over any looping flags to the new lnode.
80 			 */
81 			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
82 		}
83 		if (IS_DEVVP(*vpp)) {
84 			vnode_t *svp;
85 
86 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
87 			VN_RELE(*vpp);
88 			if (svp == NULL)
89 				error = ENOSYS;
90 			else
91 				*vpp = svp;
92 		}
93 		VN_RELE(oldvp);
94 	} else {
95 		ASSERT(rvp->v_count > 1);
96 		VN_RELE(rvp);
97 	}
98 
99 	return (error);
100 }
101 
102 static int
103 lo_close(
104 	vnode_t *vp,
105 	int flag,
106 	int count,
107 	offset_t offset,
108 	struct cred *cr)
109 {
110 #ifdef LODEBUG
111 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
112 #endif
113 	vp = realvp(vp);
114 	return (VOP_CLOSE(vp, flag, count, offset, cr));
115 }
116 
117 static int
118 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
119 	caller_context_t *ct)
120 {
121 #ifdef LODEBUG
122 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
123 #endif
124 	vp = realvp(vp);
125 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
126 }
127 
128 static int
129 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
130 	caller_context_t *ct)
131 {
132 #ifdef LODEBUG
133 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
134 #endif
135 	vp = realvp(vp);
136 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
137 }
138 
139 static int
140 lo_ioctl(
141 	vnode_t *vp,
142 	int cmd,
143 	intptr_t arg,
144 	int flag,
145 	struct cred *cr,
146 	int *rvalp)
147 {
148 #ifdef LODEBUG
149 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
150 #endif
151 	vp = realvp(vp);
152 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp));
153 }
154 
155 static int
156 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
157 {
158 	vp = realvp(vp);
159 	return (VOP_SETFL(vp, oflags, nflags, cr));
160 }
161 
162 static int
163 lo_getattr(
164 	vnode_t *vp,
165 	struct vattr *vap,
166 	int flags,
167 	struct cred *cr)
168 {
169 	int error;
170 
171 #ifdef LODEBUG
172 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
173 #endif
174 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr))
175 		return (error);
176 
177 	return (0);
178 }
179 
180 static int
181 lo_setattr(
182 	vnode_t *vp,
183 	struct vattr *vap,
184 	int flags,
185 	struct cred *cr,
186 	caller_context_t *ct)
187 {
188 #ifdef LODEBUG
189 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
190 #endif
191 	vp = realvp(vp);
192 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
193 }
194 
195 static int
196 lo_access(vnode_t *vp, int mode, int flags, struct cred *cr)
197 {
198 #ifdef LODEBUG
199 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
200 #endif
201 	if (mode & VWRITE) {
202 		if (vp->v_type == VREG && vn_is_readonly(vp))
203 			return (EROFS);
204 	}
205 	vp = realvp(vp);
206 	return (VOP_ACCESS(vp, mode, flags, cr));
207 }
208 
209 static int
210 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr)
211 {
212 #ifdef LODEBUG
213 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
214 #endif
215 	vp = realvp(vp);
216 	return (VOP_FSYNC(vp, syncflag, cr));
217 }
218 
219 /*ARGSUSED*/
220 static void
221 lo_inactive(vnode_t *vp, struct cred *cr)
222 {
223 #ifdef LODEBUG
224 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
225 #endif
226 	freelonode(vtol(vp));
227 }
228 
229 /* ARGSUSED */
230 static int
231 lo_fid(vnode_t *vp, struct fid *fidp)
232 {
233 #ifdef LODEBUG
234 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
235 #endif
236 	vp = realvp(vp);
237 	return (VOP_FID(vp, fidp));
238 }
239 
240 /*
241  * Given a vnode of lofs type, lookup nm name and
242  * return a shadow vnode (of lofs type) of the
243  * real vnode found.
244  *
245  * Due to the nature of lofs, there is a potential
246  * looping in path traversal.
247  *
248  * starting from the mount point of an lofs;
249  * a loop is defined to be a traversal path
250  * where the mount point or the real vnode of
251  * the root of this lofs is encountered twice.
252  * Once at the start of traversal and second
253  * when the looping is found.
254  *
255  * When a loop is encountered, a shadow of the
256  * covered vnode is returned to stop the looping.
257  *
258  * This normally works, but with the advent of
259  * the new automounter, returning the shadow of the
260  * covered vnode (autonode, in this case) does not
261  * stop the loop.  Because further lookup on this
262  * lonode will cause the autonode to call lo_lookup()
263  * on the lonode covering it.
264  *
265  * example "/net/jurassic/net/jurassic" is a loop.
266  * returning the shadow of the autonode corresponding to
267  * "/net/jurassic/net/jurassic" will not terminate the
268  * loop.   To solve this problem we allow the loop to go
269  * through one more level component lookup.  Whichever
270  * directory is then looked up in "/net/jurassic/net/jurassic"
271  * the vnode returned is the vnode covered by the autonode
272  * "net" and this will terminate the loop.
273  *
274  * Lookup for dot dot has to be dealt with separately.
275  * It will be nice to have a "one size fits all" kind
276  * of solution, so that we don't have so many ifs statement
277  * in the lo_lookup() to handle dotdot.  But, since
278  * there are so many special cases to handle different
279  * kinds looping above, we need special codes to handle
280  * dotdot lookup as well.
281  */
282 static int
283 lo_lookup(
284 	vnode_t *dvp,
285 	char *nm,
286 	vnode_t **vpp,
287 	struct pathname *pnp,
288 	int flags,
289 	vnode_t *rdir,
290 	struct cred *cr)
291 {
292 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
293 	int error, is_indirectloop;
294 	vnode_t *realdvp = realvp(dvp);
295 	struct loinfo *li = vtoli(dvp->v_vfsp);
296 	int looping = 0;
297 	int autoloop = 0;
298 	int doingdotdot = 0;
299 	int nosub = 0;
300 	int mkflag = 0;
301 
302 	/*
303 	 * If name is empty and no XATTR flags are set, then return
304 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
305 	 * then we need to call VOP_LOOKUP to get the xattr dir.
306 	 */
307 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
308 		VN_HOLD(dvp);
309 		*vpp = dvp;
310 		return (0);
311 	}
312 
313 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
314 		doingdotdot++;
315 		/*
316 		 * Handle ".." out of mounted filesystem
317 		 */
318 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
319 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
320 			ASSERT(realdvp != NULL);
321 		}
322 	}
323 
324 	*vpp = NULL;	/* default(error) case */
325 
326 	/*
327 	 * Do the normal lookup
328 	 */
329 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr)) {
330 		vp = NULL;
331 		goto out;
332 	}
333 
334 	/*
335 	 * We do this check here to avoid returning a stale file handle to the
336 	 * caller.
337 	 */
338 	if (nm[0] == '.' && nm[1] == '\0') {
339 		ASSERT(vp == realdvp);
340 		VN_HOLD(dvp);
341 		VN_RELE(vp);
342 		*vpp = dvp;
343 		return (0);
344 	}
345 
346 	if (doingdotdot) {
347 		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
348 			vfs_t *vfsp;
349 
350 			error = vn_vfsrlock_wait(realdvp);
351 			if (error)
352 				goto out;
353 			vfsp = vn_mountedvfs(realdvp);
354 			/*
355 			 * In the standard case if the looping flag is set and
356 			 * performing dotdot we would be returning from a
357 			 * covered vnode, implying vfsp could not be null. The
358 			 * exceptions being if we have looping and overlay
359 			 * mounts or looping and covered file systems.
360 			 */
361 			if (vfsp == NULL) {
362 				/*
363 				 * Overlay mount or covered file system,
364 				 * so just make the shadow node.
365 				 */
366 				vn_vfsunlock(realdvp);
367 				*vpp = makelonode(vp, li, 0);
368 				(vtol(*vpp))->lo_looping |= LO_LOOPING;
369 				return (0);
370 			}
371 			/*
372 			 * When looping get the actual found vnode
373 			 * instead of the vnode covered.
374 			 * Here we have to hold the lock for realdvp
375 			 * since an unmount during the traversal to the
376 			 * root vnode would turn *vfsp into garbage
377 			 * which would be fatal.
378 			 */
379 			error = VFS_ROOT(vfsp, &tvp);
380 			vn_vfsunlock(realdvp);
381 
382 			if (error)
383 				goto out;
384 
385 			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
386 				/*
387 				 * we're back at the real vnode
388 				 * of the rootvp
389 				 *
390 				 * return the rootvp
391 				 * Ex: /mnt/mnt/..
392 				 * where / has been lofs-mounted
393 				 * onto /mnt.  Return the lofs
394 				 * node mounted at /mnt.
395 				 */
396 				*vpp = tvp;
397 				VN_RELE(vp);
398 				return (0);
399 			} else {
400 				/*
401 				 * We are returning from a covered
402 				 * node whose vfs_mountedhere is
403 				 * not pointing to vfs of the current
404 				 * root vnode.
405 				 * This is a condn where in we
406 				 * returned a covered node say Zc
407 				 * but Zc is not the cover of current
408 				 * root.
409 				 * i.e.., if X is the root vnode
410 				 * lookup(Zc,"..") is taking us to
411 				 * X.
412 				 * Ex: /net/X/net/X/Y
413 				 *
414 				 * If LO_AUTOLOOP (autofs/lofs looping detected)
415 				 * has been set then we are encountering the
416 				 * cover of Y (Y being any directory vnode
417 				 * under /net/X/net/X/).
418 				 * When performing a dotdot set the
419 				 * returned vp to the vnode covered
420 				 * by the mounted lofs, ie /net/X/net/X
421 				 */
422 				VN_RELE(tvp);
423 				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
424 					VN_RELE(vp);
425 					vp = li->li_rootvp;
426 					vp = vp->v_vfsp->vfs_vnodecovered;
427 					VN_HOLD(vp);
428 					*vpp = makelonode(vp, li, 0);
429 					(vtol(*vpp))->lo_looping |= LO_LOOPING;
430 					return (0);
431 				}
432 			}
433 		} else {
434 			/*
435 			 * No frills just make the shadow node.
436 			 */
437 			*vpp = makelonode(vp, li, 0);
438 			return (0);
439 		}
440 	}
441 
442 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
443 
444 	/*
445 	 * If this vnode is mounted on, then we
446 	 * traverse to the vnode which is the root of
447 	 * the mounted file system.
448 	 */
449 	if (!nosub && (error = traverse(&vp)))
450 		goto out;
451 
452 	/*
453 	 * Make a lnode for the real vnode.
454 	 */
455 	if (vp->v_type != VDIR || nosub) {
456 		*vpp = makelonode(vp, li, 0);
457 		if (IS_DEVVP(*vpp)) {
458 			vnode_t *svp;
459 
460 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
461 			VN_RELE(*vpp);
462 			if (svp == NULL)
463 				error = ENOSYS;
464 			else
465 				*vpp = svp;
466 		}
467 		return (error);
468 	}
469 
470 	/*
471 	 * if the found vnode (vp) is not of type lofs
472 	 * then we're just going to make a shadow of that
473 	 * vp and get out.
474 	 *
475 	 * If the found vnode (vp) is of lofs type, and
476 	 * we're not doing dotdot, check if we are
477 	 * looping.
478 	 */
479 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
480 		/*
481 		 * Check if we're looping, i.e.
482 		 * vp equals the root vp of the lofs, directly
483 		 * or indirectly, return the covered node.
484 		 */
485 
486 		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
487 			if (vp == li->li_rootvp) {
488 				/*
489 				 * Direct looping condn.
490 				 * Ex:- X is / mounted directory so lookup of
491 				 * /X/X is a direct looping condn.
492 				 */
493 				tvp = vp;
494 				vp = vp->v_vfsp->vfs_vnodecovered;
495 				VN_HOLD(vp);
496 				VN_RELE(tvp);
497 				looping++;
498 			} else {
499 				/*
500 				 * Indirect looping can be defined as
501 				 * real lookup returning rootvp of the current
502 				 * tree in any level of recursion.
503 				 *
504 				 * This check is useful if there are multiple
505 				 * levels of lofs indirections. Suppose vnode X
506 				 * in the current lookup has as its real vnode
507 				 * another lofs node. Y = realvp(X) Y should be
508 				 * a lofs node for the check to continue or Y
509 				 * is not the rootvp of X.
510 				 * Ex:- say X and Y are two vnodes
511 				 * say real(Y) is X and real(X) is Z
512 				 * parent vnode for X and Y is Z
513 				 * lookup(Y,"path") say we are looking for Y
514 				 * again under Y and we have to return Yc.
515 				 * but the lookup of Y under Y doesnot return
516 				 * Y the root vnode again here is why.
517 				 * 1. lookup(Y,"path of Y") will go to
518 				 * 2. lookup(real(Y),"path of Y") and then to
519 				 * 3. lookup(real(X),"path of Y").
520 				 * and now what lookup level 1 sees is the
521 				 * outcome of 2 but the vnode Y is due to
522 				 * lookup(Z,"path of Y") so we have to skip
523 				 * intermediate levels to find if in any level
524 				 * there is a looping.
525 				 */
526 				is_indirectloop = 0;
527 				nonlovp = vp;
528 				while (
529 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
530 				    !(is_indirectloop)) {
531 					if (li->li_rootvp  == nonlovp) {
532 						is_indirectloop++;
533 						break;
534 					}
535 					nonlovp = realvp(nonlovp);
536 				}
537 
538 				if (is_indirectloop) {
539 					VN_RELE(vp);
540 					vp = nonlovp;
541 					vp = vp->v_vfsp->vfs_vnodecovered;
542 					VN_HOLD(vp);
543 					looping++;
544 				}
545 			}
546 		} else {
547 			/*
548 			 * come here only because of the interaction between
549 			 * the autofs and lofs.
550 			 *
551 			 * Lookup of "/net/X/net/X" will return a shadow of
552 			 * an autonode X_a which we call X_l.
553 			 *
554 			 * Lookup of anything under X_l, will trigger a call to
555 			 * auto_lookup(X_a,nm) which will eventually call
556 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
557 			 * the current lofs.
558 			 *
559 			 * We come here only when we are called with X_l as dvp
560 			 * and look for something underneath.
561 			 *
562 			 * Now that an autofs/lofs looping condition has been
563 			 * identified any directory vnode contained within
564 			 * dvp will be set to the vnode covered by the
565 			 * mounted autofs. Thus all directories within dvp
566 			 * will appear empty hence teminating the looping.
567 			 * The LO_AUTOLOOP flag is set on the returned lonode
568 			 * to indicate the termination of the autofs/lofs
569 			 * looping. This is required for the correct behaviour
570 			 * when performing a dotdot.
571 			 */
572 			realdvp = realvp(dvp);
573 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
574 				realdvp = realvp(realdvp);
575 			}
576 
577 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
578 			if (error)
579 				goto out;
580 			/*
581 			 * tvp now contains the rootvp of the vfs of the
582 			 * real vnode of dvp. The directory vnode vp is set
583 			 * to the covered vnode to terminate looping. No
584 			 * distinction is made between any vp as all directory
585 			 * vnodes contained in dvp are returned as the covered
586 			 * vnode.
587 			 */
588 			VN_RELE(vp);
589 			vp = tvp;	/* possibly is an autonode */
590 
591 			/*
592 			 * Need to find the covered vnode
593 			 */
594 			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
595 				/*
596 				 * We don't have a covered vnode so this isn't
597 				 * an autonode. To find the autonode simply
598 				 * find the vnode covered by the lofs rootvp.
599 				 */
600 				vp = li->li_rootvp;
601 				vp = vp->v_vfsp->vfs_vnodecovered;
602 				VN_RELE(tvp);
603 				error = VFS_ROOT(vp->v_vfsp, &tvp);
604 				if (error)
605 					goto out;
606 				vp = tvp;	/* now this is an autonode */
607 				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
608 					/*
609 					 * Still can't find a covered vnode.
610 					 * Fail the lookup, or we'd loop.
611 					 */
612 					error = ENOENT;
613 					goto out;
614 				}
615 			}
616 			vp = vp->v_vfsp->vfs_vnodecovered;
617 			VN_HOLD(vp);
618 			VN_RELE(tvp);
619 			/*
620 			 * Force the creation of a new lnode even if the hash
621 			 * table contains a lnode that references this vnode.
622 			 */
623 			mkflag = LOF_FORCE;
624 			autoloop++;
625 		}
626 	}
627 	*vpp = makelonode(vp, li, mkflag);
628 
629 	if ((looping) ||
630 	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
631 		(vtol(*vpp))->lo_looping |= LO_LOOPING;
632 	}
633 
634 	if (autoloop) {
635 		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
636 	}
637 
638 out:
639 	if (error != 0 && vp != NULL)
640 		VN_RELE(vp);
641 #ifdef LODEBUG
642 	lo_dprint(4,
643 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
644 		dvp, realvp(dvp), nm, *vpp, vp, error);
645 #endif
646 	return (error);
647 }
648 
649 /*ARGSUSED*/
650 static int
651 lo_create(
652 	vnode_t *dvp,
653 	char *nm,
654 	struct vattr *va,
655 	enum vcexcl exclusive,
656 	int mode,
657 	vnode_t **vpp,
658 	struct cred *cr,
659 	int flag)
660 {
661 	int error;
662 	vnode_t *vp = NULL;
663 
664 #ifdef LODEBUG
665 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
666 #endif
667 	if (*nm == '\0') {
668 		ASSERT(vpp && dvp == *vpp);
669 		vp = realvp(*vpp);
670 	}
671 
672 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag);
673 	if (!error) {
674 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
675 		if (IS_DEVVP(*vpp)) {
676 			vnode_t *svp;
677 
678 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
679 			VN_RELE(*vpp);
680 			if (svp == NULL)
681 				error = ENOSYS;
682 			else
683 				*vpp = svp;
684 		}
685 	}
686 	return (error);
687 }
688 
689 static int
690 lo_remove(vnode_t *dvp, char *nm, struct cred *cr)
691 {
692 #ifdef LODEBUG
693 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
694 #endif
695 	dvp = realvp(dvp);
696 	return (VOP_REMOVE(dvp, nm, cr));
697 }
698 
699 static int
700 lo_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr)
701 {
702 	vnode_t *realvp;
703 
704 #ifdef LODEBUG
705 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
706 #endif
707 
708 	/*
709 	 * The source and destination vnodes may be in different lofs
710 	 * filesystems sharing the same underlying filesystem, so we need to
711 	 * make sure that the filesystem containing the source vnode is not
712 	 * mounted read-only (vn_link() has already checked the target vnode).
713 	 *
714 	 * In a situation such as:
715 	 *
716 	 * /data	- regular filesystem
717 	 * /foo		- lofs mount of /data/foo
718 	 * /bar		- read-only lofs mount of /data/bar
719 	 *
720 	 * This disallows a link from /bar/somefile to /foo/somefile,
721 	 * which would otherwise allow changes to somefile on the read-only
722 	 * mounted /bar.
723 	 */
724 
725 	if (vn_is_readonly(vp)) {
726 		return (EROFS);
727 	}
728 	while (vn_matchops(vp, lo_vnodeops)) {
729 		vp = realvp(vp);
730 	}
731 
732 	/*
733 	 * In the case where the source vnode is on another stacking
734 	 * filesystem (such as specfs), the loop above will
735 	 * terminate before finding the true underlying vnode.
736 	 *
737 	 * We use VOP_REALVP here to continue the search.
738 	 */
739 	if (VOP_REALVP(vp, &realvp) == 0)
740 		vp = realvp;
741 
742 	while (vn_matchops(tdvp, lo_vnodeops)) {
743 		tdvp = realvp(tdvp);
744 	}
745 	if (vp->v_vfsp != tdvp->v_vfsp)
746 		return (EXDEV);
747 	return (VOP_LINK(tdvp, vp, tnm, cr));
748 }
749 
750 static int
751 lo_rename(
752 	vnode_t *odvp,
753 	char *onm,
754 	vnode_t *ndvp,
755 	char *nnm,
756 	struct cred *cr)
757 {
758 	vnode_t *tnvp;
759 
760 #ifdef LODEBUG
761 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
762 #endif
763 	/*
764 	 * If we are coming from a loop back mounted fs, that has been
765 	 * mounted in the same filesystem as where we want to move to,
766 	 * and that filesystem is read/write, but the lofs filesystem is
767 	 * read only, we don't want to allow a rename of the file. The
768 	 * vn_rename code checks to be sure the target is read/write already
769 	 * so that is not necessary here. However, consider the following
770 	 * example:
771 	 *		/ - regular root fs
772 	 *		/foo - directory in root
773 	 *		/foo/bar - file in foo directory(in root fs)
774 	 *		/baz - directory in root
775 	 *		mount -F lofs -o ro /foo /baz - all still in root
776 	 *			directory
777 	 * The fact that we mounted /foo on /baz read only should stop us
778 	 * from renaming the file /foo/bar /bar, but it doesn't since
779 	 * / is read/write. We are still renaming here since we are still
780 	 * in the same filesystem, it is just that we do not check to see
781 	 * if the filesystem we are coming from in this case is read only.
782 	 */
783 	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
784 		return (EROFS);
785 	/*
786 	 * We need to make sure we're not trying to remove a mount point for a
787 	 * filesystem mounted on top of lofs, which only we know about.
788 	 */
789 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
790 		goto rename;
791 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr) != 0)
792 		goto rename;
793 	if (tnvp->v_type != VDIR) {
794 		VN_RELE(tnvp);
795 		goto rename;
796 	}
797 	if (vn_mountedvfs(tnvp)) {
798 		VN_RELE(tnvp);
799 		return (EBUSY);
800 	}
801 	VN_RELE(tnvp);
802 rename:
803 	/*
804 	 * Since the case we're dealing with above can happen at any layer in
805 	 * the stack of lofs filesystems, we need to recurse down the stack,
806 	 * checking to see if there are any instances of a filesystem mounted on
807 	 * top of lofs. In order to keep on using the lofs version of
808 	 * VOP_RENAME(), we make sure that while the target directory is of type
809 	 * lofs, the source directory (the one used for getting the fs-specific
810 	 * version of VOP_RENAME()) is also of type lofs.
811 	 */
812 	if (vn_matchops(ndvp, lo_vnodeops)) {
813 		ndvp = realvp(ndvp);	/* Check the next layer */
814 	} else {
815 		/*
816 		 * We can go fast here
817 		 */
818 		while (vn_matchops(odvp, lo_vnodeops)) {
819 			odvp = realvp(odvp);
820 		}
821 		if (odvp->v_vfsp != ndvp->v_vfsp)
822 			return (EXDEV);
823 	}
824 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr));
825 }
826 
827 static int
828 lo_mkdir(
829 	vnode_t *dvp,
830 	char *nm,
831 	struct vattr *va,
832 	vnode_t **vpp,
833 	struct cred *cr)
834 {
835 	int error;
836 
837 #ifdef LODEBUG
838 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
839 #endif
840 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr);
841 	if (!error)
842 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
843 	return (error);
844 }
845 
846 static int
847 lo_realvp(vnode_t *vp, vnode_t **vpp)
848 {
849 #ifdef LODEBUG
850 	lo_dprint(4, "lo_realvp %p\n", vp);
851 #endif
852 	while (vn_matchops(vp, lo_vnodeops))
853 		vp = realvp(vp);
854 
855 	if (VOP_REALVP(vp, vpp) != 0)
856 		*vpp = vp;
857 	return (0);
858 }
859 
860 static int
861 lo_rmdir(
862 	vnode_t *dvp,
863 	char *nm,
864 	vnode_t *cdir,
865 	struct cred *cr)
866 {
867 	vnode_t *rvp = cdir;
868 
869 #ifdef LODEBUG
870 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
871 #endif
872 	/* if cdir is lofs vnode ptr get its real vnode ptr */
873 	if (vn_matchops(dvp, vn_getops(rvp)))
874 		(void) lo_realvp(cdir, &rvp);
875 	dvp = realvp(dvp);
876 	return (VOP_RMDIR(dvp, nm, rvp, cr));
877 }
878 
879 static int
880 lo_symlink(
881 	vnode_t *dvp,
882 	char *lnm,
883 	struct vattr *tva,
884 	char *tnm,
885 	struct cred *cr)
886 {
887 #ifdef LODEBUG
888 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
889 #endif
890 	dvp = realvp(dvp);
891 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr));
892 }
893 
894 static int
895 lo_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr)
896 {
897 	vp = realvp(vp);
898 	return (VOP_READLINK(vp, uiop, cr));
899 }
900 
901 static int
902 lo_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp)
903 {
904 #ifdef LODEBUG
905 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
906 #endif
907 	vp = realvp(vp);
908 	return (VOP_READDIR(vp, uiop, cr, eofp));
909 }
910 
911 static int
912 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
913 {
914 	vp = realvp(vp);
915 	return (VOP_RWLOCK(vp, write_lock, ct));
916 }
917 
918 static void
919 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
920 {
921 	vp = realvp(vp);
922 	VOP_RWUNLOCK(vp, write_lock, ct);
923 }
924 
925 static int
926 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
927 {
928 	vp = realvp(vp);
929 	return (VOP_SEEK(vp, ooff, noffp));
930 }
931 
932 static int
933 lo_cmp(vnode_t *vp1, vnode_t *vp2)
934 {
935 	while (vn_matchops(vp1, lo_vnodeops))
936 		vp1 = realvp(vp1);
937 	while (vn_matchops(vp2, lo_vnodeops))
938 		vp2 = realvp(vp2);
939 	return (VOP_CMP(vp1, vp2));
940 }
941 
942 static int
943 lo_frlock(
944 	vnode_t *vp,
945 	int cmd,
946 	struct flock64 *bfp,
947 	int flag,
948 	offset_t offset,
949 	struct flk_callback *flk_cbp,
950 	cred_t *cr)
951 {
952 	vp = realvp(vp);
953 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr));
954 }
955 
956 static int
957 lo_space(
958 	vnode_t *vp,
959 	int cmd,
960 	struct flock64 *bfp,
961 	int flag,
962 	offset_t offset,
963 	struct cred *cr,
964 	caller_context_t *ct)
965 {
966 	vp = realvp(vp);
967 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
968 }
969 
970 static int
971 lo_getpage(
972 	vnode_t *vp,
973 	offset_t off,
974 	size_t len,
975 	uint_t *prot,
976 	struct page *parr[],
977 	size_t psz,
978 	struct seg *seg,
979 	caddr_t addr,
980 	enum seg_rw rw,
981 	struct cred *cr)
982 {
983 	vp = realvp(vp);
984 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr));
985 }
986 
987 static int
988 lo_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr)
989 {
990 	vp = realvp(vp);
991 	return (VOP_PUTPAGE(vp, off, len, flags, cr));
992 }
993 
994 static int
995 lo_map(
996 	vnode_t *vp,
997 	offset_t off,
998 	struct as *as,
999 	caddr_t *addrp,
1000 	size_t len,
1001 	uchar_t prot,
1002 	uchar_t maxprot,
1003 	uint_t flags,
1004 	struct cred *cr)
1005 {
1006 	vp = realvp(vp);
1007 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr));
1008 }
1009 
1010 static int
1011 lo_addmap(
1012 	vnode_t *vp,
1013 	offset_t off,
1014 	struct as *as,
1015 	caddr_t addr,
1016 	size_t len,
1017 	uchar_t prot,
1018 	uchar_t maxprot,
1019 	uint_t flags,
1020 	struct cred *cr)
1021 {
1022 	vp = realvp(vp);
1023 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1024 }
1025 
1026 static int
1027 lo_delmap(
1028 	vnode_t *vp,
1029 	offset_t off,
1030 	struct as *as,
1031 	caddr_t addr,
1032 	size_t len,
1033 	uint_t prot,
1034 	uint_t maxprot,
1035 	uint_t flags,
1036 	struct cred *cr)
1037 {
1038 	vp = realvp(vp);
1039 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1040 }
1041 
1042 static int
1043 lo_poll(
1044 	vnode_t *vp,
1045 	short events,
1046 	int anyyet,
1047 	short *reventsp,
1048 	struct pollhead **phpp)
1049 {
1050 	vp = realvp(vp);
1051 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp));
1052 }
1053 
1054 static int
1055 lo_dump(vnode_t *vp, caddr_t addr, int bn, int count)
1056 {
1057 	vp = realvp(vp);
1058 	return (VOP_DUMP(vp, addr, bn, count));
1059 }
1060 
1061 static int
1062 lo_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr)
1063 {
1064 	vp = realvp(vp);
1065 	return (VOP_PATHCONF(vp, cmd, valp, cr));
1066 }
1067 
1068 static int
1069 lo_pageio(
1070 	vnode_t *vp,
1071 	struct page *pp,
1072 	u_offset_t io_off,
1073 	size_t io_len,
1074 	int flags,
1075 	cred_t *cr)
1076 {
1077 	vp = realvp(vp);
1078 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr));
1079 }
1080 
1081 static void
1082 lo_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
1083 {
1084 	vp = realvp(vp);
1085 	if (vp != NULL && !VN_ISKAS(vp))
1086 		VOP_DISPOSE(vp, pp, fl, dn, cr);
1087 }
1088 
1089 static int
1090 lo_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1091 {
1092 	if (vn_is_readonly(vp))
1093 		return (EROFS);
1094 	vp = realvp(vp);
1095 	return (VOP_SETSECATTR(vp, secattr, flags, cr));
1096 }
1097 
1098 static int
1099 lo_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1100 {
1101 	vp = realvp(vp);
1102 	return (VOP_GETSECATTR(vp, secattr, flags, cr));
1103 }
1104 
1105 static int
1106 lo_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
1107 {
1108 	vp = realvp(vp);
1109 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr));
1110 }
1111 
1112 /*
1113  * Loopback vnode operations vector.
1114  */
1115 
1116 struct vnodeops *lo_vnodeops;
1117 
1118 const fs_operation_def_t lo_vnodeops_template[] = {
1119 	VOPNAME_OPEN, lo_open,
1120 	VOPNAME_CLOSE, lo_close,
1121 	VOPNAME_READ, lo_read,
1122 	VOPNAME_WRITE, lo_write,
1123 	VOPNAME_IOCTL, lo_ioctl,
1124 	VOPNAME_SETFL, lo_setfl,
1125 	VOPNAME_GETATTR, lo_getattr,
1126 	VOPNAME_SETATTR, lo_setattr,
1127 	VOPNAME_ACCESS, lo_access,
1128 	VOPNAME_LOOKUP, lo_lookup,
1129 	VOPNAME_CREATE, lo_create,
1130 	VOPNAME_REMOVE, lo_remove,
1131 	VOPNAME_LINK, lo_link,
1132 	VOPNAME_RENAME, lo_rename,
1133 	VOPNAME_MKDIR, lo_mkdir,
1134 	VOPNAME_RMDIR, lo_rmdir,
1135 	VOPNAME_READDIR, lo_readdir,
1136 	VOPNAME_SYMLINK, lo_symlink,
1137 	VOPNAME_READLINK, lo_readlink,
1138 	VOPNAME_FSYNC, lo_fsync,
1139 	VOPNAME_INACTIVE, (fs_generic_func_p) lo_inactive,
1140 	VOPNAME_FID, lo_fid,
1141 	VOPNAME_RWLOCK, lo_rwlock,
1142 	VOPNAME_RWUNLOCK, (fs_generic_func_p) lo_rwunlock,
1143 	VOPNAME_SEEK, lo_seek,
1144 	VOPNAME_CMP, lo_cmp,
1145 	VOPNAME_FRLOCK, lo_frlock,
1146 	VOPNAME_SPACE, lo_space,
1147 	VOPNAME_REALVP, lo_realvp,
1148 	VOPNAME_GETPAGE, lo_getpage,
1149 	VOPNAME_PUTPAGE, lo_putpage,
1150 	VOPNAME_MAP, (fs_generic_func_p) lo_map,
1151 	VOPNAME_ADDMAP, (fs_generic_func_p) lo_addmap,
1152 	VOPNAME_DELMAP, lo_delmap,
1153 	VOPNAME_POLL, (fs_generic_func_p) lo_poll,
1154 	VOPNAME_DUMP, lo_dump,
1155 	VOPNAME_DUMPCTL, fs_error,		/* XXX - why? */
1156 	VOPNAME_PATHCONF, lo_pathconf,
1157 	VOPNAME_PAGEIO, lo_pageio,
1158 	VOPNAME_DISPOSE, (fs_generic_func_p) lo_dispose,
1159 	VOPNAME_SETSECATTR, lo_setsecattr,
1160 	VOPNAME_GETSECATTR, lo_getsecattr,
1161 	VOPNAME_SHRLOCK, lo_shrlock,
1162 	NULL, NULL
1163 };
1164