xref: /titanic_44/usr/src/uts/common/fs/lofs/lofs_vnops.c (revision 4a16f9a6c1cc74aeed5ff36b4723c3e43bc67666)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/errno.h>
31 #include <sys/vnode.h>
32 #include <sys/vfs.h>
33 #include <sys/vfs_opreg.h>
34 #include <sys/uio.h>
35 #include <sys/cred.h>
36 #include <sys/pathname.h>
37 #include <sys/debug.h>
38 #include <sys/fs/lofs_node.h>
39 #include <sys/fs/lofs_info.h>
40 #include <fs/fs_subr.h>
41 #include <vm/as.h>
42 #include <vm/seg.h>
43 
44 /*
45  * These are the vnode ops routines which implement the vnode interface to
46  * the looped-back file system.  These routines just take their parameters,
47  * and then calling the appropriate real vnode routine(s) to do the work.
48  */
49 
50 static int
51 lo_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
52 {
53 	vnode_t *vp = *vpp;
54 	vnode_t *rvp;
55 	vnode_t *oldvp;
56 	int error;
57 
58 #ifdef LODEBUG
59 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
60 	    vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
61 #endif
62 
63 	oldvp = vp;
64 	vp = rvp = realvp(vp);
65 	/*
66 	 * Need to hold new reference to vp since VOP_OPEN() may
67 	 * decide to release it.
68 	 */
69 	VN_HOLD(vp);
70 	error = VOP_OPEN(&rvp, flag, cr, ct);
71 
72 	if (!error && rvp != vp) {
73 		/*
74 		 * the FS which we called should have released the
75 		 * new reference on vp
76 		 */
77 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
78 		if ((*vpp)->v_type == VDIR) {
79 			/*
80 			 * Copy over any looping flags to the new lnode.
81 			 */
82 			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
83 		}
84 		if (IS_DEVVP(*vpp)) {
85 			vnode_t *svp;
86 
87 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
88 			VN_RELE(*vpp);
89 			if (svp == NULL)
90 				error = ENOSYS;
91 			else
92 				*vpp = svp;
93 		}
94 		VN_RELE(oldvp);
95 	} else {
96 		ASSERT(rvp->v_count > 1);
97 		VN_RELE(rvp);
98 	}
99 
100 	return (error);
101 }
102 
103 static int
104 lo_close(
105 	vnode_t *vp,
106 	int flag,
107 	int count,
108 	offset_t offset,
109 	struct cred *cr,
110 	caller_context_t *ct)
111 {
112 #ifdef LODEBUG
113 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
114 #endif
115 	vp = realvp(vp);
116 	return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
117 }
118 
119 static int
120 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
121 	caller_context_t *ct)
122 {
123 #ifdef LODEBUG
124 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
125 #endif
126 	vp = realvp(vp);
127 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
128 }
129 
130 static int
131 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
132 	caller_context_t *ct)
133 {
134 #ifdef LODEBUG
135 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
136 #endif
137 	vp = realvp(vp);
138 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
139 }
140 
141 static int
142 lo_ioctl(
143 	vnode_t *vp,
144 	int cmd,
145 	intptr_t arg,
146 	int flag,
147 	struct cred *cr,
148 	int *rvalp,
149 	caller_context_t *ct)
150 {
151 #ifdef LODEBUG
152 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
153 #endif
154 	vp = realvp(vp);
155 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
156 }
157 
158 static int
159 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
160 {
161 	vp = realvp(vp);
162 	return (VOP_SETFL(vp, oflags, nflags, cr, ct));
163 }
164 
165 static int
166 lo_getattr(
167 	vnode_t *vp,
168 	struct vattr *vap,
169 	int flags,
170 	struct cred *cr,
171 	caller_context_t *ct)
172 {
173 	int error;
174 
175 #ifdef LODEBUG
176 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
177 #endif
178 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr, ct))
179 		return (error);
180 
181 	return (0);
182 }
183 
184 static int
185 lo_setattr(
186 	vnode_t *vp,
187 	struct vattr *vap,
188 	int flags,
189 	struct cred *cr,
190 	caller_context_t *ct)
191 {
192 #ifdef LODEBUG
193 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
194 #endif
195 	vp = realvp(vp);
196 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
197 }
198 
199 static int
200 lo_access(
201 	vnode_t *vp,
202 	int mode,
203 	int flags,
204 	struct cred *cr,
205 	caller_context_t *ct)
206 {
207 #ifdef LODEBUG
208 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
209 #endif
210 	if (mode & VWRITE) {
211 		if (vp->v_type == VREG && vn_is_readonly(vp))
212 			return (EROFS);
213 	}
214 	vp = realvp(vp);
215 	return (VOP_ACCESS(vp, mode, flags, cr, ct));
216 }
217 
218 static int
219 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
220 {
221 #ifdef LODEBUG
222 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
223 #endif
224 	vp = realvp(vp);
225 	return (VOP_FSYNC(vp, syncflag, cr, ct));
226 }
227 
228 /*ARGSUSED*/
229 static void
230 lo_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
231 {
232 #ifdef LODEBUG
233 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
234 #endif
235 	freelonode(vtol(vp));
236 }
237 
238 /* ARGSUSED */
239 static int
240 lo_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
241 {
242 #ifdef LODEBUG
243 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
244 #endif
245 	vp = realvp(vp);
246 	return (VOP_FID(vp, fidp, ct));
247 }
248 
249 /*
250  * Given a vnode of lofs type, lookup nm name and
251  * return a shadow vnode (of lofs type) of the
252  * real vnode found.
253  *
254  * Due to the nature of lofs, there is a potential
255  * looping in path traversal.
256  *
257  * starting from the mount point of an lofs;
258  * a loop is defined to be a traversal path
259  * where the mount point or the real vnode of
260  * the root of this lofs is encountered twice.
261  * Once at the start of traversal and second
262  * when the looping is found.
263  *
264  * When a loop is encountered, a shadow of the
265  * covered vnode is returned to stop the looping.
266  *
267  * This normally works, but with the advent of
268  * the new automounter, returning the shadow of the
269  * covered vnode (autonode, in this case) does not
270  * stop the loop.  Because further lookup on this
271  * lonode will cause the autonode to call lo_lookup()
272  * on the lonode covering it.
273  *
274  * example "/net/jurassic/net/jurassic" is a loop.
275  * returning the shadow of the autonode corresponding to
276  * "/net/jurassic/net/jurassic" will not terminate the
277  * loop.   To solve this problem we allow the loop to go
278  * through one more level component lookup.  Whichever
279  * directory is then looked up in "/net/jurassic/net/jurassic"
280  * the vnode returned is the vnode covered by the autonode
281  * "net" and this will terminate the loop.
282  *
283  * Lookup for dot dot has to be dealt with separately.
284  * It will be nice to have a "one size fits all" kind
285  * of solution, so that we don't have so many ifs statement
286  * in the lo_lookup() to handle dotdot.  But, since
287  * there are so many special cases to handle different
288  * kinds looping above, we need special codes to handle
289  * dotdot lookup as well.
290  */
291 static int
292 lo_lookup(
293 	vnode_t *dvp,
294 	char *nm,
295 	vnode_t **vpp,
296 	struct pathname *pnp,
297 	int flags,
298 	vnode_t *rdir,
299 	struct cred *cr,
300 	caller_context_t *ct,
301 	int *direntflags,
302 	pathname_t *realpnp)
303 {
304 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
305 	int error, is_indirectloop;
306 	vnode_t *realdvp = realvp(dvp);
307 	struct loinfo *li = vtoli(dvp->v_vfsp);
308 	int looping = 0;
309 	int autoloop = 0;
310 	int doingdotdot = 0;
311 	int nosub = 0;
312 	int mkflag = 0;
313 
314 	/*
315 	 * If name is empty and no XATTR flags are set, then return
316 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
317 	 * then we need to call VOP_LOOKUP to get the xattr dir.
318 	 */
319 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
320 		VN_HOLD(dvp);
321 		*vpp = dvp;
322 		return (0);
323 	}
324 
325 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
326 		doingdotdot++;
327 		/*
328 		 * Handle ".." out of mounted filesystem
329 		 */
330 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
331 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
332 			ASSERT(realdvp != NULL);
333 		}
334 	}
335 
336 	*vpp = NULL;	/* default(error) case */
337 
338 	/*
339 	 * Do the normal lookup
340 	 */
341 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
342 	    ct, direntflags, realpnp)) {
343 		vp = NULL;
344 		goto out;
345 	}
346 
347 	/*
348 	 * We do this check here to avoid returning a stale file handle to the
349 	 * caller.
350 	 */
351 	if (nm[0] == '.' && nm[1] == '\0') {
352 		ASSERT(vp == realdvp);
353 		VN_HOLD(dvp);
354 		VN_RELE(vp);
355 		*vpp = dvp;
356 		return (0);
357 	}
358 
359 	if (doingdotdot) {
360 		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
361 			vfs_t *vfsp;
362 
363 			error = vn_vfsrlock_wait(realdvp);
364 			if (error)
365 				goto out;
366 			vfsp = vn_mountedvfs(realdvp);
367 			/*
368 			 * In the standard case if the looping flag is set and
369 			 * performing dotdot we would be returning from a
370 			 * covered vnode, implying vfsp could not be null. The
371 			 * exceptions being if we have looping and overlay
372 			 * mounts or looping and covered file systems.
373 			 */
374 			if (vfsp == NULL) {
375 				/*
376 				 * Overlay mount or covered file system,
377 				 * so just make the shadow node.
378 				 */
379 				vn_vfsunlock(realdvp);
380 				*vpp = makelonode(vp, li, 0);
381 				(vtol(*vpp))->lo_looping |= LO_LOOPING;
382 				return (0);
383 			}
384 			/*
385 			 * When looping get the actual found vnode
386 			 * instead of the vnode covered.
387 			 * Here we have to hold the lock for realdvp
388 			 * since an unmount during the traversal to the
389 			 * root vnode would turn *vfsp into garbage
390 			 * which would be fatal.
391 			 */
392 			error = VFS_ROOT(vfsp, &tvp);
393 			vn_vfsunlock(realdvp);
394 
395 			if (error)
396 				goto out;
397 
398 			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
399 				/*
400 				 * we're back at the real vnode
401 				 * of the rootvp
402 				 *
403 				 * return the rootvp
404 				 * Ex: /mnt/mnt/..
405 				 * where / has been lofs-mounted
406 				 * onto /mnt.  Return the lofs
407 				 * node mounted at /mnt.
408 				 */
409 				*vpp = tvp;
410 				VN_RELE(vp);
411 				return (0);
412 			} else {
413 				/*
414 				 * We are returning from a covered
415 				 * node whose vfs_mountedhere is
416 				 * not pointing to vfs of the current
417 				 * root vnode.
418 				 * This is a condn where in we
419 				 * returned a covered node say Zc
420 				 * but Zc is not the cover of current
421 				 * root.
422 				 * i.e.., if X is the root vnode
423 				 * lookup(Zc,"..") is taking us to
424 				 * X.
425 				 * Ex: /net/X/net/X/Y
426 				 *
427 				 * If LO_AUTOLOOP (autofs/lofs looping detected)
428 				 * has been set then we are encountering the
429 				 * cover of Y (Y being any directory vnode
430 				 * under /net/X/net/X/).
431 				 * When performing a dotdot set the
432 				 * returned vp to the vnode covered
433 				 * by the mounted lofs, ie /net/X/net/X
434 				 */
435 				VN_RELE(tvp);
436 				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
437 					VN_RELE(vp);
438 					vp = li->li_rootvp;
439 					vp = vp->v_vfsp->vfs_vnodecovered;
440 					VN_HOLD(vp);
441 					*vpp = makelonode(vp, li, 0);
442 					(vtol(*vpp))->lo_looping |= LO_LOOPING;
443 					return (0);
444 				}
445 			}
446 		} else {
447 			/*
448 			 * No frills just make the shadow node.
449 			 */
450 			*vpp = makelonode(vp, li, 0);
451 			return (0);
452 		}
453 	}
454 
455 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
456 
457 	/*
458 	 * If this vnode is mounted on, then we
459 	 * traverse to the vnode which is the root of
460 	 * the mounted file system.
461 	 */
462 	if (!nosub && (error = traverse(&vp)))
463 		goto out;
464 
465 	/*
466 	 * Make a lnode for the real vnode.
467 	 */
468 	if (vp->v_type != VDIR || nosub) {
469 		*vpp = makelonode(vp, li, 0);
470 		if (IS_DEVVP(*vpp)) {
471 			vnode_t *svp;
472 
473 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
474 			VN_RELE(*vpp);
475 			if (svp == NULL)
476 				error = ENOSYS;
477 			else
478 				*vpp = svp;
479 		}
480 		return (error);
481 	}
482 
483 	/*
484 	 * if the found vnode (vp) is not of type lofs
485 	 * then we're just going to make a shadow of that
486 	 * vp and get out.
487 	 *
488 	 * If the found vnode (vp) is of lofs type, and
489 	 * we're not doing dotdot, check if we are
490 	 * looping.
491 	 */
492 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
493 		/*
494 		 * Check if we're looping, i.e.
495 		 * vp equals the root vp of the lofs, directly
496 		 * or indirectly, return the covered node.
497 		 */
498 
499 		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
500 			if (vp == li->li_rootvp) {
501 				/*
502 				 * Direct looping condn.
503 				 * Ex:- X is / mounted directory so lookup of
504 				 * /X/X is a direct looping condn.
505 				 */
506 				tvp = vp;
507 				vp = vp->v_vfsp->vfs_vnodecovered;
508 				VN_HOLD(vp);
509 				VN_RELE(tvp);
510 				looping++;
511 			} else {
512 				/*
513 				 * Indirect looping can be defined as
514 				 * real lookup returning rootvp of the current
515 				 * tree in any level of recursion.
516 				 *
517 				 * This check is useful if there are multiple
518 				 * levels of lofs indirections. Suppose vnode X
519 				 * in the current lookup has as its real vnode
520 				 * another lofs node. Y = realvp(X) Y should be
521 				 * a lofs node for the check to continue or Y
522 				 * is not the rootvp of X.
523 				 * Ex:- say X and Y are two vnodes
524 				 * say real(Y) is X and real(X) is Z
525 				 * parent vnode for X and Y is Z
526 				 * lookup(Y,"path") say we are looking for Y
527 				 * again under Y and we have to return Yc.
528 				 * but the lookup of Y under Y doesnot return
529 				 * Y the root vnode again here is why.
530 				 * 1. lookup(Y,"path of Y") will go to
531 				 * 2. lookup(real(Y),"path of Y") and then to
532 				 * 3. lookup(real(X),"path of Y").
533 				 * and now what lookup level 1 sees is the
534 				 * outcome of 2 but the vnode Y is due to
535 				 * lookup(Z,"path of Y") so we have to skip
536 				 * intermediate levels to find if in any level
537 				 * there is a looping.
538 				 */
539 				is_indirectloop = 0;
540 				nonlovp = vp;
541 				while (
542 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
543 				    !(is_indirectloop)) {
544 					if (li->li_rootvp  == nonlovp) {
545 						is_indirectloop++;
546 						break;
547 					}
548 					nonlovp = realvp(nonlovp);
549 				}
550 
551 				if (is_indirectloop) {
552 					VN_RELE(vp);
553 					vp = nonlovp;
554 					vp = vp->v_vfsp->vfs_vnodecovered;
555 					VN_HOLD(vp);
556 					looping++;
557 				}
558 			}
559 		} else {
560 			/*
561 			 * come here only because of the interaction between
562 			 * the autofs and lofs.
563 			 *
564 			 * Lookup of "/net/X/net/X" will return a shadow of
565 			 * an autonode X_a which we call X_l.
566 			 *
567 			 * Lookup of anything under X_l, will trigger a call to
568 			 * auto_lookup(X_a,nm) which will eventually call
569 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
570 			 * the current lofs.
571 			 *
572 			 * We come here only when we are called with X_l as dvp
573 			 * and look for something underneath.
574 			 *
575 			 * Now that an autofs/lofs looping condition has been
576 			 * identified any directory vnode contained within
577 			 * dvp will be set to the vnode covered by the
578 			 * mounted autofs. Thus all directories within dvp
579 			 * will appear empty hence teminating the looping.
580 			 * The LO_AUTOLOOP flag is set on the returned lonode
581 			 * to indicate the termination of the autofs/lofs
582 			 * looping. This is required for the correct behaviour
583 			 * when performing a dotdot.
584 			 */
585 			realdvp = realvp(dvp);
586 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
587 				realdvp = realvp(realdvp);
588 			}
589 
590 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
591 			if (error)
592 				goto out;
593 			/*
594 			 * tvp now contains the rootvp of the vfs of the
595 			 * real vnode of dvp. The directory vnode vp is set
596 			 * to the covered vnode to terminate looping. No
597 			 * distinction is made between any vp as all directory
598 			 * vnodes contained in dvp are returned as the covered
599 			 * vnode.
600 			 */
601 			VN_RELE(vp);
602 			vp = tvp;	/* possibly is an autonode */
603 
604 			/*
605 			 * Need to find the covered vnode
606 			 */
607 			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
608 				/*
609 				 * We don't have a covered vnode so this isn't
610 				 * an autonode. To find the autonode simply
611 				 * find the vnode covered by the lofs rootvp.
612 				 */
613 				vp = li->li_rootvp;
614 				vp = vp->v_vfsp->vfs_vnodecovered;
615 				VN_RELE(tvp);
616 				error = VFS_ROOT(vp->v_vfsp, &tvp);
617 				if (error)
618 					goto out;
619 				vp = tvp;	/* now this is an autonode */
620 				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
621 					/*
622 					 * Still can't find a covered vnode.
623 					 * Fail the lookup, or we'd loop.
624 					 */
625 					error = ENOENT;
626 					goto out;
627 				}
628 			}
629 			vp = vp->v_vfsp->vfs_vnodecovered;
630 			VN_HOLD(vp);
631 			VN_RELE(tvp);
632 			/*
633 			 * Force the creation of a new lnode even if the hash
634 			 * table contains a lnode that references this vnode.
635 			 */
636 			mkflag = LOF_FORCE;
637 			autoloop++;
638 		}
639 	}
640 	*vpp = makelonode(vp, li, mkflag);
641 
642 	if ((looping) ||
643 	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
644 		(vtol(*vpp))->lo_looping |= LO_LOOPING;
645 	}
646 
647 	if (autoloop) {
648 		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
649 	}
650 
651 out:
652 	if (error != 0 && vp != NULL)
653 		VN_RELE(vp);
654 #ifdef LODEBUG
655 	lo_dprint(4,
656 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
657 	    dvp, realvp(dvp), nm, *vpp, vp, error);
658 #endif
659 	return (error);
660 }
661 
662 /*ARGSUSED*/
663 static int
664 lo_create(
665 	vnode_t *dvp,
666 	char *nm,
667 	struct vattr *va,
668 	enum vcexcl exclusive,
669 	int mode,
670 	vnode_t **vpp,
671 	struct cred *cr,
672 	int flag,
673 	caller_context_t *ct,
674 	vsecattr_t *vsecp)
675 {
676 	int error;
677 	vnode_t *vp = NULL;
678 
679 #ifdef LODEBUG
680 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
681 #endif
682 	if (*nm == '\0') {
683 		ASSERT(vpp && dvp == *vpp);
684 		vp = realvp(*vpp);
685 	}
686 
687 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag,
688 	    ct, vsecp);
689 	if (!error) {
690 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
691 		if (IS_DEVVP(*vpp)) {
692 			vnode_t *svp;
693 
694 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
695 			VN_RELE(*vpp);
696 			if (svp == NULL)
697 				error = ENOSYS;
698 			else
699 				*vpp = svp;
700 		}
701 	}
702 	return (error);
703 }
704 
705 static int
706 lo_remove(
707 	vnode_t *dvp,
708 	char *nm,
709 	struct cred *cr,
710 	caller_context_t *ct,
711 	int flags)
712 {
713 #ifdef LODEBUG
714 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
715 #endif
716 	dvp = realvp(dvp);
717 	return (VOP_REMOVE(dvp, nm, cr, ct, flags));
718 }
719 
720 static int
721 lo_link(
722 	vnode_t *tdvp,
723 	vnode_t *vp,
724 	char *tnm,
725 	struct cred *cr,
726 	caller_context_t *ct,
727 	int flags)
728 {
729 	vnode_t *realvp;
730 
731 #ifdef LODEBUG
732 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
733 #endif
734 
735 	/*
736 	 * The source and destination vnodes may be in different lofs
737 	 * filesystems sharing the same underlying filesystem, so we need to
738 	 * make sure that the filesystem containing the source vnode is not
739 	 * mounted read-only (vn_link() has already checked the target vnode).
740 	 *
741 	 * In a situation such as:
742 	 *
743 	 * /data	- regular filesystem
744 	 * /foo		- lofs mount of /data/foo
745 	 * /bar		- read-only lofs mount of /data/bar
746 	 *
747 	 * This disallows a link from /bar/somefile to /foo/somefile,
748 	 * which would otherwise allow changes to somefile on the read-only
749 	 * mounted /bar.
750 	 */
751 
752 	if (vn_is_readonly(vp)) {
753 		return (EROFS);
754 	}
755 	while (vn_matchops(vp, lo_vnodeops)) {
756 		vp = realvp(vp);
757 	}
758 
759 	/*
760 	 * In the case where the source vnode is on another stacking
761 	 * filesystem (such as specfs), the loop above will
762 	 * terminate before finding the true underlying vnode.
763 	 *
764 	 * We use VOP_REALVP here to continue the search.
765 	 */
766 	if (VOP_REALVP(vp, &realvp, ct) == 0)
767 		vp = realvp;
768 
769 	while (vn_matchops(tdvp, lo_vnodeops)) {
770 		tdvp = realvp(tdvp);
771 	}
772 	if (vp->v_vfsp != tdvp->v_vfsp)
773 		return (EXDEV);
774 	return (VOP_LINK(tdvp, vp, tnm, cr, ct, flags));
775 }
776 
777 static int
778 lo_rename(
779 	vnode_t *odvp,
780 	char *onm,
781 	vnode_t *ndvp,
782 	char *nnm,
783 	struct cred *cr,
784 	caller_context_t *ct,
785 	int flags)
786 {
787 	vnode_t *tnvp;
788 
789 #ifdef LODEBUG
790 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
791 #endif
792 	/*
793 	 * If we are coming from a loop back mounted fs, that has been
794 	 * mounted in the same filesystem as where we want to move to,
795 	 * and that filesystem is read/write, but the lofs filesystem is
796 	 * read only, we don't want to allow a rename of the file. The
797 	 * vn_rename code checks to be sure the target is read/write already
798 	 * so that is not necessary here. However, consider the following
799 	 * example:
800 	 *		/ - regular root fs
801 	 *		/foo - directory in root
802 	 *		/foo/bar - file in foo directory(in root fs)
803 	 *		/baz - directory in root
804 	 *		mount -F lofs -o ro /foo /baz - all still in root
805 	 *			directory
806 	 * The fact that we mounted /foo on /baz read only should stop us
807 	 * from renaming the file /foo/bar /bar, but it doesn't since
808 	 * / is read/write. We are still renaming here since we are still
809 	 * in the same filesystem, it is just that we do not check to see
810 	 * if the filesystem we are coming from in this case is read only.
811 	 */
812 	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
813 		return (EROFS);
814 	/*
815 	 * We need to make sure we're not trying to remove a mount point for a
816 	 * filesystem mounted on top of lofs, which only we know about.
817 	 */
818 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
819 		goto rename;
820 
821 	/*
822 	 * XXXci - Once case-insensitive behavior is implemented, it should
823 	 * be added here.
824 	 */
825 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr,
826 	    ct, NULL, NULL) != 0)
827 		goto rename;
828 	if (tnvp->v_type != VDIR) {
829 		VN_RELE(tnvp);
830 		goto rename;
831 	}
832 	if (vn_mountedvfs(tnvp)) {
833 		VN_RELE(tnvp);
834 		return (EBUSY);
835 	}
836 	VN_RELE(tnvp);
837 rename:
838 	/*
839 	 * Since the case we're dealing with above can happen at any layer in
840 	 * the stack of lofs filesystems, we need to recurse down the stack,
841 	 * checking to see if there are any instances of a filesystem mounted on
842 	 * top of lofs. In order to keep on using the lofs version of
843 	 * VOP_RENAME(), we make sure that while the target directory is of type
844 	 * lofs, the source directory (the one used for getting the fs-specific
845 	 * version of VOP_RENAME()) is also of type lofs.
846 	 */
847 	if (vn_matchops(ndvp, lo_vnodeops)) {
848 		ndvp = realvp(ndvp);	/* Check the next layer */
849 	} else {
850 		/*
851 		 * We can go fast here
852 		 */
853 		while (vn_matchops(odvp, lo_vnodeops)) {
854 			odvp = realvp(odvp);
855 		}
856 		if (odvp->v_vfsp != ndvp->v_vfsp)
857 			return (EXDEV);
858 	}
859 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr, ct, flags));
860 }
861 
862 static int
863 lo_mkdir(
864 	vnode_t *dvp,
865 	char *nm,
866 	struct vattr *va,
867 	vnode_t **vpp,
868 	struct cred *cr,
869 	caller_context_t *ct,
870 	int flags,
871 	vsecattr_t *vsecp)
872 {
873 	int error;
874 
875 #ifdef LODEBUG
876 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
877 #endif
878 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr, ct, flags, vsecp);
879 	if (!error)
880 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
881 	return (error);
882 }
883 
884 static int
885 lo_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
886 {
887 #ifdef LODEBUG
888 	lo_dprint(4, "lo_realvp %p\n", vp);
889 #endif
890 	while (vn_matchops(vp, lo_vnodeops))
891 		vp = realvp(vp);
892 
893 	if (VOP_REALVP(vp, vpp, ct) != 0)
894 		*vpp = vp;
895 	return (0);
896 }
897 
898 static int
899 lo_rmdir(
900 	vnode_t *dvp,
901 	char *nm,
902 	vnode_t *cdir,
903 	struct cred *cr,
904 	caller_context_t *ct,
905 	int flags)
906 {
907 	vnode_t *rvp = cdir;
908 
909 #ifdef LODEBUG
910 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
911 #endif
912 	/* if cdir is lofs vnode ptr get its real vnode ptr */
913 	if (vn_matchops(dvp, vn_getops(rvp)))
914 		(void) lo_realvp(cdir, &rvp, ct);
915 	dvp = realvp(dvp);
916 	return (VOP_RMDIR(dvp, nm, rvp, cr, ct, flags));
917 }
918 
919 static int
920 lo_symlink(
921 	vnode_t *dvp,
922 	char *lnm,
923 	struct vattr *tva,
924 	char *tnm,
925 	struct cred *cr,
926 	caller_context_t *ct,
927 	int flags)
928 {
929 #ifdef LODEBUG
930 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
931 #endif
932 	dvp = realvp(dvp);
933 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr, ct, flags));
934 }
935 
936 static int
937 lo_readlink(
938 	vnode_t *vp,
939 	struct uio *uiop,
940 	struct cred *cr,
941 	caller_context_t *ct)
942 {
943 	vp = realvp(vp);
944 	return (VOP_READLINK(vp, uiop, cr, ct));
945 }
946 
947 static int
948 lo_readdir(
949 	vnode_t *vp,
950 	struct uio *uiop,
951 	struct cred *cr,
952 	int *eofp,
953 	caller_context_t *ct,
954 	int flags)
955 {
956 #ifdef LODEBUG
957 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
958 #endif
959 	vp = realvp(vp);
960 	return (VOP_READDIR(vp, uiop, cr, eofp, ct, flags));
961 }
962 
963 static int
964 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
965 {
966 	vp = realvp(vp);
967 	return (VOP_RWLOCK(vp, write_lock, ct));
968 }
969 
970 static void
971 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
972 {
973 	vp = realvp(vp);
974 	VOP_RWUNLOCK(vp, write_lock, ct);
975 }
976 
977 static int
978 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
979 {
980 	vp = realvp(vp);
981 	return (VOP_SEEK(vp, ooff, noffp, ct));
982 }
983 
984 static int
985 lo_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
986 {
987 	while (vn_matchops(vp1, lo_vnodeops))
988 		vp1 = realvp(vp1);
989 	while (vn_matchops(vp2, lo_vnodeops))
990 		vp2 = realvp(vp2);
991 	return (VOP_CMP(vp1, vp2, ct));
992 }
993 
994 static int
995 lo_frlock(
996 	vnode_t *vp,
997 	int cmd,
998 	struct flock64 *bfp,
999 	int flag,
1000 	offset_t offset,
1001 	struct flk_callback *flk_cbp,
1002 	cred_t *cr,
1003 	caller_context_t *ct)
1004 {
1005 	vp = realvp(vp);
1006 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1007 }
1008 
1009 static int
1010 lo_space(
1011 	vnode_t *vp,
1012 	int cmd,
1013 	struct flock64 *bfp,
1014 	int flag,
1015 	offset_t offset,
1016 	struct cred *cr,
1017 	caller_context_t *ct)
1018 {
1019 	vp = realvp(vp);
1020 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
1021 }
1022 
1023 static int
1024 lo_getpage(
1025 	vnode_t *vp,
1026 	offset_t off,
1027 	size_t len,
1028 	uint_t *prot,
1029 	struct page *parr[],
1030 	size_t psz,
1031 	struct seg *seg,
1032 	caddr_t addr,
1033 	enum seg_rw rw,
1034 	struct cred *cr,
1035 	caller_context_t *ct)
1036 {
1037 	vp = realvp(vp);
1038 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
1039 	    ct));
1040 }
1041 
1042 static int
1043 lo_putpage(
1044 	vnode_t *vp,
1045 	offset_t off,
1046 	size_t len,
1047 	int flags,
1048 	struct cred *cr,
1049 	caller_context_t *ct)
1050 {
1051 	vp = realvp(vp);
1052 	return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
1053 }
1054 
1055 static int
1056 lo_map(
1057 	vnode_t *vp,
1058 	offset_t off,
1059 	struct as *as,
1060 	caddr_t *addrp,
1061 	size_t len,
1062 	uchar_t prot,
1063 	uchar_t maxprot,
1064 	uint_t flags,
1065 	struct cred *cr,
1066 	caller_context_t *ct)
1067 {
1068 	vp = realvp(vp);
1069 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
1070 }
1071 
1072 static int
1073 lo_addmap(
1074 	vnode_t *vp,
1075 	offset_t off,
1076 	struct as *as,
1077 	caddr_t addr,
1078 	size_t len,
1079 	uchar_t prot,
1080 	uchar_t maxprot,
1081 	uint_t flags,
1082 	struct cred *cr,
1083 	caller_context_t *ct)
1084 {
1085 	vp = realvp(vp);
1086 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1087 	    ct));
1088 }
1089 
1090 static int
1091 lo_delmap(
1092 	vnode_t *vp,
1093 	offset_t off,
1094 	struct as *as,
1095 	caddr_t addr,
1096 	size_t len,
1097 	uint_t prot,
1098 	uint_t maxprot,
1099 	uint_t flags,
1100 	struct cred *cr,
1101 	caller_context_t *ct)
1102 {
1103 	vp = realvp(vp);
1104 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1105 	    ct));
1106 }
1107 
1108 static int
1109 lo_poll(
1110 	vnode_t *vp,
1111 	short events,
1112 	int anyyet,
1113 	short *reventsp,
1114 	struct pollhead **phpp,
1115 	caller_context_t *ct)
1116 {
1117 	vp = realvp(vp);
1118 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
1119 }
1120 
1121 static int
1122 lo_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
1123     caller_context_t *ct)
1124 {
1125 	vp = realvp(vp);
1126 	return (VOP_DUMP(vp, addr, bn, count, ct));
1127 }
1128 
1129 static int
1130 lo_pathconf(
1131 	vnode_t *vp,
1132 	int cmd,
1133 	ulong_t *valp,
1134 	struct cred *cr,
1135 	caller_context_t *ct)
1136 {
1137 	vp = realvp(vp);
1138 	return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
1139 }
1140 
1141 static int
1142 lo_pageio(
1143 	vnode_t *vp,
1144 	struct page *pp,
1145 	u_offset_t io_off,
1146 	size_t io_len,
1147 	int flags,
1148 	cred_t *cr,
1149 	caller_context_t *ct)
1150 {
1151 	vp = realvp(vp);
1152 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
1153 }
1154 
1155 static void
1156 lo_dispose(
1157 	vnode_t *vp,
1158 	page_t *pp,
1159 	int fl,
1160 	int dn,
1161 	cred_t *cr,
1162 	caller_context_t *ct)
1163 {
1164 	vp = realvp(vp);
1165 	if (vp != NULL && !VN_ISKAS(vp))
1166 		VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
1167 }
1168 
1169 static int
1170 lo_setsecattr(
1171 	vnode_t *vp,
1172 	vsecattr_t *secattr,
1173 	int flags,
1174 	struct cred *cr,
1175 	caller_context_t *ct)
1176 {
1177 	if (vn_is_readonly(vp))
1178 		return (EROFS);
1179 	vp = realvp(vp);
1180 	return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
1181 }
1182 
1183 static int
1184 lo_getsecattr(
1185 	vnode_t *vp,
1186 	vsecattr_t *secattr,
1187 	int flags,
1188 	struct cred *cr,
1189 	caller_context_t *ct)
1190 {
1191 	vp = realvp(vp);
1192 	return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
1193 }
1194 
1195 static int
1196 lo_shrlock(
1197 	vnode_t *vp,
1198 	int cmd,
1199 	struct shrlock *shr,
1200 	int flag,
1201 	cred_t *cr,
1202 	caller_context_t *ct)
1203 {
1204 	vp = realvp(vp);
1205 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
1206 }
1207 
1208 /*
1209  * Loopback vnode operations vector.
1210  */
1211 
1212 struct vnodeops *lo_vnodeops;
1213 
1214 const fs_operation_def_t lo_vnodeops_template[] = {
1215 	VOPNAME_OPEN,		{ .vop_open = lo_open },
1216 	VOPNAME_CLOSE,		{ .vop_close = lo_close },
1217 	VOPNAME_READ,		{ .vop_read = lo_read },
1218 	VOPNAME_WRITE,		{ .vop_write = lo_write },
1219 	VOPNAME_IOCTL,		{ .vop_ioctl = lo_ioctl },
1220 	VOPNAME_SETFL,		{ .vop_setfl = lo_setfl },
1221 	VOPNAME_GETATTR,	{ .vop_getattr = lo_getattr },
1222 	VOPNAME_SETATTR,	{ .vop_setattr = lo_setattr },
1223 	VOPNAME_ACCESS,		{ .vop_access = lo_access },
1224 	VOPNAME_LOOKUP,		{ .vop_lookup = lo_lookup },
1225 	VOPNAME_CREATE,		{ .vop_create = lo_create },
1226 	VOPNAME_REMOVE,		{ .vop_remove = lo_remove },
1227 	VOPNAME_LINK,		{ .vop_link = lo_link },
1228 	VOPNAME_RENAME,		{ .vop_rename = lo_rename },
1229 	VOPNAME_MKDIR,		{ .vop_mkdir = lo_mkdir },
1230 	VOPNAME_RMDIR,		{ .vop_rmdir = lo_rmdir },
1231 	VOPNAME_READDIR,	{ .vop_readdir = lo_readdir },
1232 	VOPNAME_SYMLINK,	{ .vop_symlink = lo_symlink },
1233 	VOPNAME_READLINK,	{ .vop_readlink = lo_readlink },
1234 	VOPNAME_FSYNC,		{ .vop_fsync = lo_fsync },
1235 	VOPNAME_INACTIVE,	{ .vop_inactive = lo_inactive },
1236 	VOPNAME_FID,		{ .vop_fid = lo_fid },
1237 	VOPNAME_RWLOCK,		{ .vop_rwlock = lo_rwlock },
1238 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = lo_rwunlock },
1239 	VOPNAME_SEEK,		{ .vop_seek = lo_seek },
1240 	VOPNAME_CMP,		{ .vop_cmp = lo_cmp },
1241 	VOPNAME_FRLOCK,		{ .vop_frlock = lo_frlock },
1242 	VOPNAME_SPACE,		{ .vop_space = lo_space },
1243 	VOPNAME_REALVP,		{ .vop_realvp = lo_realvp },
1244 	VOPNAME_GETPAGE,	{ .vop_getpage = lo_getpage },
1245 	VOPNAME_PUTPAGE,	{ .vop_putpage = lo_putpage },
1246 	VOPNAME_MAP,		{ .vop_map = lo_map },
1247 	VOPNAME_ADDMAP,		{ .vop_addmap = lo_addmap },
1248 	VOPNAME_DELMAP,		{ .vop_delmap = lo_delmap },
1249 	VOPNAME_POLL,		{ .vop_poll = lo_poll },
1250 	VOPNAME_DUMP,		{ .vop_dump = lo_dump },
1251 	VOPNAME_DUMPCTL,	{ .error = fs_error },	/* XXX - why? */
1252 	VOPNAME_PATHCONF,	{ .vop_pathconf = lo_pathconf },
1253 	VOPNAME_PAGEIO,		{ .vop_pageio = lo_pageio },
1254 	VOPNAME_DISPOSE,	{ .vop_dispose = lo_dispose },
1255 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = lo_setsecattr },
1256 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = lo_getsecattr },
1257 	VOPNAME_SHRLOCK,	{ .vop_shrlock = lo_shrlock },
1258 	NULL,			NULL
1259 };
1260