xref: /titanic_50/usr/src/uts/common/fs/lofs/lofs_vnops.c (revision fea9cb91bd8e12d84069b4dab1268363668b4bff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/errno.h>
32 #include <sys/vnode.h>
33 #include <sys/vfs.h>
34 #include <sys/uio.h>
35 #include <sys/cred.h>
36 #include <sys/pathname.h>
37 #include <sys/debug.h>
38 #include <sys/fs/lofs_node.h>
39 #include <sys/fs/lofs_info.h>
40 #include <fs/fs_subr.h>
41 #include <vm/as.h>
42 #include <vm/seg.h>
43 
44 #define	IS_ZONEDEVFS(vp) \
45 	(vtoli((vp)->v_vfsp)->li_flag & LO_ZONEDEVFS)
46 
47 /*
48  * These are the vnode ops routines which implement the vnode interface to
49  * the looped-back file system.  These routines just take their parameters,
50  * and then calling the appropriate real vnode routine(s) to do the work.
51  */
52 
53 static int
54 lo_open(vnode_t **vpp, int flag, struct cred *cr)
55 {
56 	vnode_t *vp = *vpp;
57 	vnode_t *rvp;
58 	vnode_t *oldvp;
59 	int error;
60 
61 #ifdef LODEBUG
62 	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
63 		vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
64 #endif
65 
66 	oldvp = vp;
67 	vp = rvp = realvp(vp);
68 	/*
69 	 * Need to hold new reference to vp since VOP_OPEN() may
70 	 * decide to release it.
71 	 */
72 	VN_HOLD(vp);
73 	error = VOP_OPEN(&rvp, flag, cr);
74 
75 	if (!error && rvp != vp) {
76 		/*
77 		 * the FS which we called should have released the
78 		 * new reference on vp
79 		 */
80 		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
81 		if (IS_DEVVP(*vpp)) {
82 			vnode_t *svp;
83 
84 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
85 			VN_RELE(*vpp);
86 			if (svp == NULL)
87 				error = ENOSYS;
88 			else
89 				*vpp = svp;
90 		}
91 		VN_RELE(oldvp);
92 	} else {
93 		ASSERT(rvp->v_count > 1);
94 		VN_RELE(rvp);
95 	}
96 
97 	return (error);
98 }
99 
100 static int
101 lo_close(
102 	vnode_t *vp,
103 	int flag,
104 	int count,
105 	offset_t offset,
106 	struct cred *cr)
107 {
108 #ifdef LODEBUG
109 	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
110 #endif
111 	vp = realvp(vp);
112 	return (VOP_CLOSE(vp, flag, count, offset, cr));
113 }
114 
115 static int
116 lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
117 	caller_context_t *ct)
118 {
119 #ifdef LODEBUG
120 	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
121 #endif
122 	vp = realvp(vp);
123 	return (VOP_READ(vp, uiop, ioflag, cr, ct));
124 }
125 
126 static int
127 lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
128 	caller_context_t *ct)
129 {
130 #ifdef LODEBUG
131 	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
132 #endif
133 	vp = realvp(vp);
134 	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
135 }
136 
137 static int
138 lo_ioctl(
139 	vnode_t *vp,
140 	int cmd,
141 	intptr_t arg,
142 	int flag,
143 	struct cred *cr,
144 	int *rvalp)
145 {
146 #ifdef LODEBUG
147 	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
148 #endif
149 	vp = realvp(vp);
150 	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp));
151 }
152 
153 static int
154 lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr)
155 {
156 	vp = realvp(vp);
157 	return (VOP_SETFL(vp, oflags, nflags, cr));
158 }
159 
160 static int
161 lo_getattr(
162 	vnode_t *vp,
163 	struct vattr *vap,
164 	int flags,
165 	struct cred *cr)
166 {
167 	int error;
168 
169 #ifdef LODEBUG
170 	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
171 #endif
172 	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr))
173 		return (error);
174 
175 	/*
176 	 * In zonedevfs mode, we pull a nasty trick; we make sure that
177 	 * the dev_t does *not* reflect the underlying device, so that
178 	 * no renames can occur to or from the /dev hierarchy.
179 	 */
180 	if (IS_ZONEDEVFS(vp)) {
181 		vap->va_fsid = expldev(vp->v_vfsp->vfs_fsid.val[0]);
182 	}
183 
184 	return (0);
185 }
186 
187 static int
188 lo_setattr(
189 	vnode_t *vp,
190 	struct vattr *vap,
191 	int flags,
192 	struct cred *cr,
193 	caller_context_t *ct)
194 {
195 #ifdef LODEBUG
196 	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
197 #endif
198 	if (IS_ZONEDEVFS(vp) && !IS_DEVVP(vp)) {
199 		return (EACCES);
200 	}
201 	vp = realvp(vp);
202 	return (VOP_SETATTR(vp, vap, flags, cr, ct));
203 }
204 
205 static int
206 lo_access(vnode_t *vp, int mode, int flags, struct cred *cr)
207 {
208 #ifdef LODEBUG
209 	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
210 #endif
211 	if (mode & VWRITE) {
212 		if (vp->v_type == VREG && vn_is_readonly(vp))
213 			return (EROFS);
214 		if (IS_ZONEDEVFS(vp) && !IS_DEVVP(vp))
215 			return (EACCES);
216 	}
217 	vp = realvp(vp);
218 	return (VOP_ACCESS(vp, mode, flags, cr));
219 }
220 
221 static int
222 lo_fsync(vnode_t *vp, int syncflag, struct cred *cr)
223 {
224 #ifdef LODEBUG
225 	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
226 #endif
227 	vp = realvp(vp);
228 	return (VOP_FSYNC(vp, syncflag, cr));
229 }
230 
231 /*ARGSUSED*/
232 static void
233 lo_inactive(vnode_t *vp, struct cred *cr)
234 {
235 #ifdef LODEBUG
236 	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
237 #endif
238 	freelonode(vtol(vp));
239 }
240 
241 /* ARGSUSED */
242 static int
243 lo_fid(vnode_t *vp, struct fid *fidp)
244 {
245 #ifdef LODEBUG
246 	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
247 #endif
248 	vp = realvp(vp);
249 	return (VOP_FID(vp, fidp));
250 }
251 
252 /*
253  * Given a vnode of lofs type, lookup nm name and
254  * return a shadow vnode (of lofs type) of the
255  * real vnode found.
256  *
257  * Due to the nature of lofs, there is a potential
258  * looping in path traversal.
259  *
260  * starting from the mount point of an lofs;
261  * a loop is defined to be a traversal path
262  * where the mount point or the real vnode of
263  * the root of this lofs is encountered twice.
264  * Once at the start of traversal and second
265  * when the looping is found.
266  *
267  * When a loop is encountered, a shadow of the
268  * covered vnode is returned to stop the looping.
269  *
270  * This normally works, but with the advent of
271  * the new automounter, returning the shadow of the
272  * covered vnode (autonode, in this case) does not
273  * stop the loop.  Because further lookup on this
274  * lonode will cause the autonode to call lo_lookup()
275  * on the lonode covering it.
276  *
277  * example "/net/jurassic/net/jurassic" is a loop.
278  * returning the shadow of the autonode corresponding to
279  * "/net/jurassic/net/jurassic" will not terminate the
280  * loop.   To solve this problem we allow the loop to go
281  * through one more level component lookup.  Whichever
282  * directory is then looked up in "/net/jurassic/net/jurassic"
283  * the vnode returned is the vnode covered by the autonode
284  * "net" and this will terminate the loop.
285  *
286  * Lookup for dot dot has to be dealt with separately.
287  * It will be nice to have a "one size fits all" kind
288  * of solution, so that we don't have so many ifs statement
289  * in the lo_lookup() to handle dotdot.  But, since
290  * there are so many special cases to handle different
291  * kinds looping above, we need special codes to handle
292  * dotdot lookup as well.
293  */
294 static int
295 lo_lookup(
296 	vnode_t *dvp,
297 	char *nm,
298 	vnode_t **vpp,
299 	struct pathname *pnp,
300 	int flags,
301 	vnode_t *rdir,
302 	struct cred *cr)
303 {
304 	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
305 	int error, is_indirectloop;
306 	vnode_t *realdvp = realvp(dvp);
307 	struct loinfo *li = vtoli(dvp->v_vfsp);
308 	int looping = 0;
309 	int autoloop = 0;
310 	int doingdotdot = 0;
311 	int nosub = 0;
312 	int mkflag = 0;
313 
314 	/*
315 	 * If name is empty and no XATTR flags are set, then return
316 	 * dvp (empty name == lookup ".").  If an XATTR flag is set
317 	 * then we need to call VOP_LOOKUP to get the xattr dir.
318 	 */
319 	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
320 		VN_HOLD(dvp);
321 		*vpp = dvp;
322 		return (0);
323 	}
324 
325 	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
326 		doingdotdot++;
327 		/*
328 		 * Handle ".." out of mounted filesystem
329 		 */
330 		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
331 			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
332 			ASSERT(realdvp != NULL);
333 		}
334 	}
335 
336 	*vpp = NULL;	/* default(error) case */
337 
338 	/*
339 	 * Do the normal lookup
340 	 */
341 	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr)) {
342 		vp = NULL;
343 		goto out;
344 	}
345 
346 	/*
347 	 * We do this check here to avoid returning a stale file handle to the
348 	 * caller.
349 	 */
350 	if (nm[0] == '.' && nm[1] == '\0') {
351 		ASSERT(vp == realdvp);
352 		VN_HOLD(dvp);
353 		VN_RELE(vp);
354 		*vpp = dvp;
355 		return (0);
356 	}
357 
358 	if (doingdotdot) {
359 		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
360 			vfs_t *vfsp;
361 
362 			error = vn_vfsrlock_wait(realdvp);
363 			if (error)
364 				goto out;
365 			vfsp = vn_mountedvfs(realdvp);
366 			/*
367 			 * In the standard case if the looping flag is set and
368 			 * performing dotdot we would be returning from a
369 			 * covered vnode, implying vfsp could not be null. The
370 			 * exceptions being if we have looping and overlay
371 			 * mounts or looping and covered file systems.
372 			 */
373 			if (vfsp == NULL) {
374 				/*
375 				 * Overlay mount or covered file system,
376 				 * so just make the shadow node.
377 				 */
378 				vn_vfsunlock(realdvp);
379 				*vpp = makelonode(vp, li, 0);
380 				(vtol(*vpp))->lo_looping |= LO_LOOPING;
381 				return (0);
382 			}
383 			/*
384 			 * When looping get the actual found vnode
385 			 * instead of the vnode covered.
386 			 * Here we have to hold the lock for realdvp
387 			 * since an unmount during the traversal to the
388 			 * root vnode would turn *vfsp into garbage
389 			 * which would be fatal.
390 			 */
391 			error = VFS_ROOT(vfsp, &tvp);
392 			vn_vfsunlock(realdvp);
393 
394 			if (error)
395 				goto out;
396 
397 			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
398 				/*
399 				 * we're back at the real vnode
400 				 * of the rootvp
401 				 *
402 				 * return the rootvp
403 				 * Ex: /mnt/mnt/..
404 				 * where / has been lofs-mounted
405 				 * onto /mnt.  Return the lofs
406 				 * node mounted at /mnt.
407 				 */
408 				*vpp = tvp;
409 				VN_RELE(vp);
410 				return (0);
411 			} else {
412 				/*
413 				 * We are returning from a covered
414 				 * node whose vfs_mountedhere is
415 				 * not pointing to vfs of the current
416 				 * root vnode.
417 				 * This is a condn where in we
418 				 * returned a covered node say Zc
419 				 * but Zc is not the cover of current
420 				 * root.
421 				 * i.e.., if X is the root vnode
422 				 * lookup(Zc,"..") is taking us to
423 				 * X.
424 				 * Ex: /net/X/net/X/Y
425 				 *
426 				 * If LO_AUTOLOOP (autofs/lofs looping detected)
427 				 * has been set then we are encountering the
428 				 * cover of Y (Y being any directory vnode
429 				 * under /net/X/net/X/).
430 				 * When performing a dotdot set the
431 				 * returned vp to the vnode covered
432 				 * by the mounted lofs, ie /net/X/net/X
433 				 */
434 				VN_RELE(tvp);
435 				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
436 					VN_RELE(vp);
437 					vp = li->li_rootvp;
438 					vp = vp->v_vfsp->vfs_vnodecovered;
439 					VN_HOLD(vp);
440 					*vpp = makelonode(vp, li, 0);
441 					(vtol(*vpp))->lo_looping |= LO_LOOPING;
442 					return (0);
443 				}
444 			}
445 		} else {
446 			/*
447 			 * No frills just make the shadow node.
448 			 */
449 			*vpp = makelonode(vp, li, 0);
450 			return (0);
451 		}
452 	}
453 
454 	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
455 
456 	/*
457 	 * If this vnode is mounted on, then we
458 	 * traverse to the vnode which is the root of
459 	 * the mounted file system.
460 	 */
461 	if (!nosub && (error = traverse(&vp)))
462 		goto out;
463 
464 	/*
465 	 * Make a lnode for the real vnode.
466 	 */
467 	if (vp->v_type != VDIR || nosub) {
468 		*vpp = makelonode(vp, li, 0);
469 		if (IS_DEVVP(*vpp)) {
470 			vnode_t *svp;
471 
472 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
473 			VN_RELE(*vpp);
474 			if (svp == NULL)
475 				error = ENOSYS;
476 			else
477 				*vpp = svp;
478 		}
479 		return (error);
480 	}
481 
482 	/*
483 	 * if the found vnode (vp) is not of type lofs
484 	 * then we're just going to make a shadow of that
485 	 * vp and get out.
486 	 *
487 	 * If the found vnode (vp) is of lofs type, and
488 	 * we're not doing dotdot, check if we are
489 	 * looping.
490 	 */
491 	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
492 		/*
493 		 * Check if we're looping, i.e.
494 		 * vp equals the root vp of the lofs, directly
495 		 * or indirectly, return the covered node.
496 		 */
497 
498 		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
499 			if (vp == li->li_rootvp) {
500 				/*
501 				 * Direct looping condn.
502 				 * Ex:- X is / mounted directory so lookup of
503 				 * /X/X is a direct looping condn.
504 				 */
505 				tvp = vp;
506 				vp = vp->v_vfsp->vfs_vnodecovered;
507 				VN_HOLD(vp);
508 				VN_RELE(tvp);
509 				looping++;
510 			} else {
511 				/*
512 				 * Indirect looping can be defined as
513 				 * real lookup returning rootvp of the current
514 				 * tree in any level of recursion.
515 				 *
516 				 * This check is useful if there are multiple
517 				 * levels of lofs indirections. Suppose vnode X
518 				 * in the current lookup has as its real vnode
519 				 * another lofs node. Y = realvp(X) Y should be
520 				 * a lofs node for the check to continue or Y
521 				 * is not the rootvp of X.
522 				 * Ex:- say X and Y are two vnodes
523 				 * say real(Y) is X and real(X) is Z
524 				 * parent vnode for X and Y is Z
525 				 * lookup(Y,"path") say we are looking for Y
526 				 * again under Y and we have to return Yc.
527 				 * but the lookup of Y under Y doesnot return
528 				 * Y the root vnode again here is why.
529 				 * 1. lookup(Y,"path of Y") will go to
530 				 * 2. lookup(real(Y),"path of Y") and then to
531 				 * 3. lookup(real(X),"path of Y").
532 				 * and now what lookup level 1 sees is the
533 				 * outcome of 2 but the vnode Y is due to
534 				 * lookup(Z,"path of Y") so we have to skip
535 				 * intermediate levels to find if in any level
536 				 * there is a looping.
537 				 */
538 				is_indirectloop = 0;
539 				nonlovp = vp;
540 				while (
541 				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
542 				    !(is_indirectloop)) {
543 					if (li->li_rootvp  == nonlovp) {
544 						is_indirectloop++;
545 						break;
546 					}
547 					nonlovp = realvp(nonlovp);
548 				}
549 
550 				if (is_indirectloop) {
551 					VN_RELE(vp);
552 					vp = nonlovp;
553 					vp = vp->v_vfsp->vfs_vnodecovered;
554 					VN_HOLD(vp);
555 					looping++;
556 				}
557 			}
558 		} else {
559 			/*
560 			 * come here only because of the interaction between
561 			 * the autofs and lofs.
562 			 *
563 			 * Lookup of "/net/X/net/X" will return a shadow of
564 			 * an autonode X_a which we call X_l.
565 			 *
566 			 * Lookup of anything under X_l, will trigger a call to
567 			 * auto_lookup(X_a,nm) which will eventually call
568 			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
569 			 * the current lofs.
570 			 *
571 			 * We come here only when we are called with X_l as dvp
572 			 * and look for something underneath.
573 			 *
574 			 * Now that an autofs/lofs looping condition has been
575 			 * identified any directory vnode contained within
576 			 * dvp will be set to the vnode covered by the
577 			 * mounted autofs. Thus all directories within dvp
578 			 * will appear empty hence teminating the looping.
579 			 * The LO_AUTOLOOP flag is set on the returned lonode
580 			 * to indicate the termination of the autofs/lofs
581 			 * looping. This is required for the correct behaviour
582 			 * when performing a dotdot.
583 			 */
584 			realdvp = realvp(dvp);
585 			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
586 				realdvp = realvp(realdvp);
587 			}
588 
589 			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
590 			if (error)
591 				goto out;
592 			/*
593 			 * tvp now contains the rootvp of the vfs of the
594 			 * real vnode of dvp. The directory vnode vp is set
595 			 * to the covered vnode to terminate looping. No
596 			 * distinction is made between any vp as all directory
597 			 * vnodes contained in dvp are returned as the covered
598 			 * vnode.
599 			 */
600 			VN_RELE(vp);
601 			vp = tvp;	/* this is an autonode */
602 
603 			/*
604 			 * Need to find the covered vnode
605 			 */
606 			vp = vp->v_vfsp->vfs_vnodecovered;
607 			ASSERT(vp);
608 			VN_HOLD(vp);
609 			VN_RELE(tvp);
610 			/*
611 			 * Force the creation of a new lnode even if the hash
612 			 * table contains a lnode that references this vnode.
613 			 */
614 			mkflag = LOF_FORCE;
615 			autoloop++;
616 		}
617 	}
618 	*vpp = makelonode(vp, li, mkflag);
619 
620 	if ((looping) ||
621 	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
622 		(vtol(*vpp))->lo_looping |= LO_LOOPING;
623 	}
624 
625 	if (autoloop) {
626 		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
627 	}
628 
629 out:
630 	if (error != 0 && vp != NULL)
631 		VN_RELE(vp);
632 #ifdef LODEBUG
633 	lo_dprint(4,
634 	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
635 		dvp, realvp(dvp), nm, *vpp, vp, error);
636 #endif
637 	return (error);
638 }
639 
640 /*ARGSUSED*/
641 static int
642 lo_create(
643 	vnode_t *dvp,
644 	char *nm,
645 	struct vattr *va,
646 	enum vcexcl exclusive,
647 	int mode,
648 	vnode_t **vpp,
649 	struct cred *cr,
650 	int flag)
651 {
652 	int error;
653 	vnode_t *vp = NULL;
654 	vnode_t *tvp = NULL;
655 
656 #ifdef LODEBUG
657 	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
658 #endif
659 	if (*nm == '\0') {
660 		ASSERT(vpp && dvp == *vpp);
661 		vp = realvp(*vpp);
662 	}
663 
664 	if (IS_ZONEDEVFS(dvp)) {
665 
666 		/*
667 		 * In the case of an exclusive create, *vpp will not
668 		 * be populated.  We must check to see if the file exists.
669 		 */
670 		if ((exclusive == EXCL) && (*nm != '\0')) {
671 			(void) VOP_LOOKUP(dvp, nm, &tvp, NULL, 0, NULL, cr);
672 		}
673 
674 		/* Is this truly a create?  If so, fail */
675 		if ((*vpp == NULL) && (tvp == NULL))
676 			return (EACCES);
677 
678 		if (tvp != NULL)
679 			VN_RELE(tvp);
680 
681 		/* Is this an open of a non-special for writing?  If so, fail */
682 		if (*vpp != NULL && (mode & VWRITE) && !IS_DEVVP(*vpp))
683 			return (EACCES);
684 	}
685 
686 	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag);
687 	if (!error) {
688 		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
689 		if (IS_DEVVP(*vpp)) {
690 			vnode_t *svp;
691 
692 			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
693 			VN_RELE(*vpp);
694 			if (svp == NULL)
695 				error = ENOSYS;
696 			else
697 				*vpp = svp;
698 		}
699 	}
700 	return (error);
701 }
702 
703 static int
704 lo_remove(vnode_t *dvp, char *nm, struct cred *cr)
705 {
706 #ifdef LODEBUG
707 	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
708 #endif
709 	if (IS_ZONEDEVFS(dvp))
710 		return (EACCES);
711 	dvp = realvp(dvp);
712 	return (VOP_REMOVE(dvp, nm, cr));
713 }
714 
715 static int
716 lo_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr)
717 {
718 #ifdef LODEBUG
719 	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
720 #endif
721 	while (vn_matchops(vp, lo_vnodeops)) {
722 		if (IS_ZONEDEVFS(vp))
723 			return (EACCES);
724 		vp = realvp(vp);
725 	}
726 	while (vn_matchops(tdvp, lo_vnodeops)) {
727 		if (IS_ZONEDEVFS(tdvp))
728 			return (EACCES);
729 		tdvp = realvp(tdvp);
730 	}
731 	if (vp->v_vfsp != tdvp->v_vfsp)
732 		return (EXDEV);
733 	return (VOP_LINK(tdvp, vp, tnm, cr));
734 }
735 
736 static int
737 lo_rename(
738 	vnode_t *odvp,
739 	char *onm,
740 	vnode_t *ndvp,
741 	char *nnm,
742 	struct cred *cr)
743 {
744 	vnode_t *tnvp;
745 
746 #ifdef LODEBUG
747 	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
748 #endif
749 	if (IS_ZONEDEVFS(odvp))
750 		return (EACCES);
751 	/*
752 	 * We need to make sure we're not trying to remove a mount point for a
753 	 * filesystem mounted on top of lofs, which only we know about.
754 	 */
755 	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
756 		goto rename;
757 	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr) != 0)
758 		goto rename;
759 	if (tnvp->v_type != VDIR) {
760 		VN_RELE(tnvp);
761 		goto rename;
762 	}
763 	if (vn_mountedvfs(tnvp)) {
764 		VN_RELE(tnvp);
765 		return (EBUSY);
766 	}
767 	VN_RELE(tnvp);
768 rename:
769 	/*
770 	 * Since the case we're dealing with above can happen at any layer in
771 	 * the stack of lofs filesystems, we need to recurse down the stack,
772 	 * checking to see if there are any instances of a filesystem mounted on
773 	 * top of lofs. In order to keep on using the lofs version of
774 	 * VOP_RENAME(), we make sure that while the target directory is of type
775 	 * lofs, the source directory (the one used for getting the fs-specific
776 	 * version of VOP_RENAME()) is also of type lofs.
777 	 */
778 	if (vn_matchops(ndvp, lo_vnodeops)) {
779 		if (IS_ZONEDEVFS(ndvp))
780 			return (EACCES);
781 		ndvp = realvp(ndvp);	/* Check the next layer */
782 	} else {
783 		/*
784 		 * We can go fast here
785 		 */
786 		while (vn_matchops(odvp, lo_vnodeops)) {
787 			if (IS_ZONEDEVFS(odvp))
788 				return (EACCES);
789 			odvp = realvp(odvp);
790 		}
791 		if (odvp->v_vfsp != ndvp->v_vfsp)
792 			return (EXDEV);
793 	}
794 	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr));
795 }
796 
797 static int
798 lo_mkdir(
799 	vnode_t *dvp,
800 	char *nm,
801 	struct vattr *va,
802 	vnode_t **vpp,
803 	struct cred *cr)
804 {
805 	int error;
806 
807 #ifdef LODEBUG
808 	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
809 #endif
810 	if (IS_ZONEDEVFS(dvp))
811 		return (EACCES);
812 	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr);
813 	if (!error)
814 		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
815 	return (error);
816 }
817 
818 static int
819 lo_realvp(vnode_t *vp, vnode_t **vpp)
820 {
821 #ifdef LODEBUG
822 	lo_dprint(4, "lo_realvp %p\n", vp);
823 #endif
824 	while (vn_matchops(vp, lo_vnodeops))
825 		vp = realvp(vp);
826 
827 	if (VOP_REALVP(vp, vpp) != 0)
828 		*vpp = vp;
829 	return (0);
830 }
831 
832 static int
833 lo_rmdir(
834 	vnode_t *dvp,
835 	char *nm,
836 	vnode_t *cdir,
837 	struct cred *cr)
838 {
839 	vnode_t *rvp = cdir;
840 
841 #ifdef LODEBUG
842 	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
843 #endif
844 	if (IS_ZONEDEVFS(dvp))
845 		return (EACCES);
846 	/* if cdir is lofs vnode ptr get its real vnode ptr */
847 	if (vn_matchops(dvp, vn_getops(rvp)))
848 		(void) lo_realvp(cdir, &rvp);
849 	dvp = realvp(dvp);
850 	return (VOP_RMDIR(dvp, nm, rvp, cr));
851 }
852 
853 static int
854 lo_symlink(
855 	vnode_t *dvp,
856 	char *lnm,
857 	struct vattr *tva,
858 	char *tnm,
859 	struct cred *cr)
860 {
861 #ifdef LODEBUG
862 	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
863 #endif
864 	if (IS_ZONEDEVFS(dvp))
865 		return (EACCES);
866 	dvp = realvp(dvp);
867 	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr));
868 }
869 
870 static int
871 lo_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr)
872 {
873 	vp = realvp(vp);
874 	return (VOP_READLINK(vp, uiop, cr));
875 }
876 
877 static int
878 lo_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp)
879 {
880 #ifdef LODEBUG
881 	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
882 #endif
883 	vp = realvp(vp);
884 	return (VOP_READDIR(vp, uiop, cr, eofp));
885 }
886 
887 static int
888 lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
889 {
890 	vp = realvp(vp);
891 	return (VOP_RWLOCK(vp, write_lock, ct));
892 }
893 
894 static void
895 lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
896 {
897 	vp = realvp(vp);
898 	VOP_RWUNLOCK(vp, write_lock, ct);
899 }
900 
901 static int
902 lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
903 {
904 	vp = realvp(vp);
905 	return (VOP_SEEK(vp, ooff, noffp));
906 }
907 
908 static int
909 lo_cmp(vnode_t *vp1, vnode_t *vp2)
910 {
911 	while (vn_matchops(vp1, lo_vnodeops))
912 		vp1 = realvp(vp1);
913 	while (vn_matchops(vp2, lo_vnodeops))
914 		vp2 = realvp(vp2);
915 	return (VOP_CMP(vp1, vp2));
916 }
917 
918 static int
919 lo_frlock(
920 	vnode_t *vp,
921 	int cmd,
922 	struct flock64 *bfp,
923 	int flag,
924 	offset_t offset,
925 	struct flk_callback *flk_cbp,
926 	cred_t *cr)
927 {
928 	vp = realvp(vp);
929 	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr));
930 }
931 
932 static int
933 lo_space(
934 	vnode_t *vp,
935 	int cmd,
936 	struct flock64 *bfp,
937 	int flag,
938 	offset_t offset,
939 	struct cred *cr,
940 	caller_context_t *ct)
941 {
942 	vp = realvp(vp);
943 	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
944 }
945 
946 static int
947 lo_getpage(
948 	vnode_t *vp,
949 	offset_t off,
950 	size_t len,
951 	uint_t *prot,
952 	struct page *parr[],
953 	size_t psz,
954 	struct seg *seg,
955 	caddr_t addr,
956 	enum seg_rw rw,
957 	struct cred *cr)
958 {
959 	vp = realvp(vp);
960 	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr));
961 }
962 
963 static int
964 lo_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr)
965 {
966 	vp = realvp(vp);
967 	return (VOP_PUTPAGE(vp, off, len, flags, cr));
968 }
969 
970 static int
971 lo_map(
972 	vnode_t *vp,
973 	offset_t off,
974 	struct as *as,
975 	caddr_t *addrp,
976 	size_t len,
977 	uchar_t prot,
978 	uchar_t maxprot,
979 	uint_t flags,
980 	struct cred *cr)
981 {
982 	vp = realvp(vp);
983 	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr));
984 }
985 
986 static int
987 lo_addmap(
988 	vnode_t *vp,
989 	offset_t off,
990 	struct as *as,
991 	caddr_t addr,
992 	size_t len,
993 	uchar_t prot,
994 	uchar_t maxprot,
995 	uint_t flags,
996 	struct cred *cr)
997 {
998 	vp = realvp(vp);
999 	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1000 }
1001 
1002 static int
1003 lo_delmap(
1004 	vnode_t *vp,
1005 	offset_t off,
1006 	struct as *as,
1007 	caddr_t addr,
1008 	size_t len,
1009 	uint_t prot,
1010 	uint_t maxprot,
1011 	uint_t flags,
1012 	struct cred *cr)
1013 {
1014 	vp = realvp(vp);
1015 	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr));
1016 }
1017 
1018 static int
1019 lo_poll(
1020 	vnode_t *vp,
1021 	short events,
1022 	int anyyet,
1023 	short *reventsp,
1024 	struct pollhead **phpp)
1025 {
1026 	vp = realvp(vp);
1027 	return (VOP_POLL(vp, events, anyyet, reventsp, phpp));
1028 }
1029 
1030 static int
1031 lo_dump(vnode_t *vp, caddr_t addr, int bn, int count)
1032 {
1033 	vp = realvp(vp);
1034 	return (VOP_DUMP(vp, addr, bn, count));
1035 }
1036 
1037 static int
1038 lo_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr)
1039 {
1040 	vp = realvp(vp);
1041 	return (VOP_PATHCONF(vp, cmd, valp, cr));
1042 }
1043 
1044 static int
1045 lo_pageio(
1046 	vnode_t *vp,
1047 	struct page *pp,
1048 	u_offset_t io_off,
1049 	size_t io_len,
1050 	int flags,
1051 	cred_t *cr)
1052 {
1053 	vp = realvp(vp);
1054 	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr));
1055 }
1056 
1057 static void
1058 lo_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr)
1059 {
1060 	vp = realvp(vp);
1061 	if (vp != NULL && vp != &kvp)
1062 		VOP_DISPOSE(vp, pp, fl, dn, cr);
1063 }
1064 
1065 static int
1066 lo_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1067 {
1068 	if (vn_is_readonly(vp))
1069 		return (EROFS);
1070 	vp = realvp(vp);
1071 	return (VOP_SETSECATTR(vp, secattr, flags, cr));
1072 }
1073 
1074 static int
1075 lo_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr)
1076 {
1077 	vp = realvp(vp);
1078 	return (VOP_GETSECATTR(vp, secattr, flags, cr));
1079 }
1080 
1081 static int
1082 lo_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
1083 {
1084 	vp = realvp(vp);
1085 	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr));
1086 }
1087 
1088 /*
1089  * Loopback vnode operations vector.
1090  */
1091 
1092 struct vnodeops *lo_vnodeops;
1093 
1094 const fs_operation_def_t lo_vnodeops_template[] = {
1095 	VOPNAME_OPEN, lo_open,
1096 	VOPNAME_CLOSE, lo_close,
1097 	VOPNAME_READ, lo_read,
1098 	VOPNAME_WRITE, lo_write,
1099 	VOPNAME_IOCTL, lo_ioctl,
1100 	VOPNAME_SETFL, lo_setfl,
1101 	VOPNAME_GETATTR, lo_getattr,
1102 	VOPNAME_SETATTR, lo_setattr,
1103 	VOPNAME_ACCESS, lo_access,
1104 	VOPNAME_LOOKUP, lo_lookup,
1105 	VOPNAME_CREATE, lo_create,
1106 	VOPNAME_REMOVE, lo_remove,
1107 	VOPNAME_LINK, lo_link,
1108 	VOPNAME_RENAME, lo_rename,
1109 	VOPNAME_MKDIR, lo_mkdir,
1110 	VOPNAME_RMDIR, lo_rmdir,
1111 	VOPNAME_READDIR, lo_readdir,
1112 	VOPNAME_SYMLINK, lo_symlink,
1113 	VOPNAME_READLINK, lo_readlink,
1114 	VOPNAME_FSYNC, lo_fsync,
1115 	VOPNAME_INACTIVE, (fs_generic_func_p) lo_inactive,
1116 	VOPNAME_FID, lo_fid,
1117 	VOPNAME_RWLOCK, lo_rwlock,
1118 	VOPNAME_RWUNLOCK, (fs_generic_func_p) lo_rwunlock,
1119 	VOPNAME_SEEK, lo_seek,
1120 	VOPNAME_CMP, lo_cmp,
1121 	VOPNAME_FRLOCK, lo_frlock,
1122 	VOPNAME_SPACE, lo_space,
1123 	VOPNAME_REALVP, lo_realvp,
1124 	VOPNAME_GETPAGE, lo_getpage,
1125 	VOPNAME_PUTPAGE, lo_putpage,
1126 	VOPNAME_MAP, (fs_generic_func_p) lo_map,
1127 	VOPNAME_ADDMAP, (fs_generic_func_p) lo_addmap,
1128 	VOPNAME_DELMAP, lo_delmap,
1129 	VOPNAME_POLL, (fs_generic_func_p) lo_poll,
1130 	VOPNAME_DUMP, lo_dump,
1131 	VOPNAME_DUMPCTL, fs_error,		/* XXX - why? */
1132 	VOPNAME_PATHCONF, lo_pathconf,
1133 	VOPNAME_PAGEIO, lo_pageio,
1134 	VOPNAME_DISPOSE, (fs_generic_func_p) lo_dispose,
1135 	VOPNAME_SETSECATTR, lo_setsecattr,
1136 	VOPNAME_GETSECATTR, lo_getsecattr,
1137 	VOPNAME_SHRLOCK, lo_shrlock,
1138 	NULL, NULL
1139 };
1140