xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c (revision fbd1c0dae6f4a2ccc2ce0527c7f19d3dd5ea90b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <sys/systm.h>
28 
29 #include <nfs/nfs.h>
30 #include <nfs/export.h>
31 #include <sys/cmn_err.h>
32 
33 /*
34  * A version of VOP_FID that deals with a remote VOP_FID for nfs.
35  * If vp is an nfs node, nfs4_fid() returns EREMOTE, nfs3_fid() and nfs_fid()
36  * returns the filehandle of vp as its fid. When nfs uses fid to set the
37  * exportinfo filehandle template, a remote nfs filehandle would be too big for
38  * the fid of the exported directory. This routine remaps the value of the
39  * attribute va_nodeid of vp to be the fid of vp, so that the fid can fit.
40  *
41  * We need this fid mainly for setting up NFSv4 server namespace where an
42  * nfs filesystem is also part of it. Thus, need to be able to setup a pseudo
43  * exportinfo for an nfs node.
44  *
45  * e.g. mount an ufs filesystem on an nfs filesystem, then share the ufs
46  *      filesystem. (like exporting a local disk from a "diskless" client)
47  */
48 int
49 vop_fid_pseudo(vnode_t *vp, fid_t *fidp)
50 {
51 	struct vattr va;
52 	int error;
53 
54 	error = VOP_FID(vp, fidp);
55 
56 	/*
57 	 * XXX nfs4_fid() does nothing and returns EREMOTE.
58 	 * XXX nfs3_fid()/nfs_fid() returns nfs filehandle as its fid
59 	 * which has a bigger length than local fid.
60 	 * NFS_FH4MAXDATA is the size of
61 	 * fhandle4_t.fh_xdata[NFS_FH4MAXDATA].
62 	 *
63 	 * Note: nfs[2,3,4]_fid() only gets called for diskless clients.
64 	 */
65 	if (error == EREMOTE ||
66 	    (error == 0 && fidp->fid_len > NFS_FH4MAXDATA)) {
67 
68 		va.va_mask = AT_NODEID;
69 		error = VOP_GETATTR(vp, &va, 0, CRED());
70 		if (error)
71 			return (error);
72 
73 		fidp->fid_len = sizeof (va.va_nodeid);
74 		bcopy(&va.va_nodeid, fidp->fid_data, fidp->fid_len);
75 		return (0);
76 	}
77 
78 	return (error);
79 }
80 
81 /*
82  * Get an nfsv4 vnode of the given fid from the visible list of an
83  * nfs filesystem or get the exi_vp if it is the root node.
84  */
85 int
86 nfs4_vget_pseudo(struct exportinfo *exi, vnode_t **vpp, fid_t *fidp)
87 {
88 	fid_t exp_fid;
89 	struct exp_visible *visp;
90 	int error;
91 
92 	/* check if the given fid is in the visible list */
93 
94 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
95 		if (EQFID(fidp, &visp->vis_fid)) {
96 			VN_HOLD(visp->vis_vp);
97 			*vpp = visp->vis_vp;
98 			return (0);
99 		}
100 	}
101 
102 	/* check if the given fid is the same as the exported node */
103 
104 	bzero(&exp_fid, sizeof (exp_fid));
105 	exp_fid.fid_len = MAXFIDSZ;
106 	error = vop_fid_pseudo(exi->exi_vp, &exp_fid);
107 	if (error)
108 		return (error);
109 
110 	if (EQFID(fidp, &exp_fid)) {
111 		VN_HOLD(exi->exi_vp);
112 		*vpp = exi->exi_vp;
113 		return (0);
114 	}
115 
116 	return (ENOENT);
117 }
118 
119 /*
120  * Create a pseudo export entry
121  *
122  * This is an export entry that's created as the
123  * side-effect of a "real" export.  As a part of
124  * a real export, the pathname to the export is
125  * checked to see if all the directory components
126  * are accessible via an NFSv4 client, i.e. are
127  * exported.  If tree_climb() finds an unexported
128  * mountpoint along the path, then it calls this
129  * function to export it.
130  *
131  * This pseudo export differs from a real export
132  * in restriction on simple. read-only access,
133  * and the addition of a "visible" list of directories.
134  * A real export may have a visible list if it is a root of
135  * a file system and at least one of its subtree resides in
136  * a different file system is shared.
137  *
138  * A visible list is per file system. It resides in the exportinfo
139  * for the pseudo node (VROOT) and it could reside in a real export
140  * of a VROOT node.
141  */
142 int
143 pseudo_exportfs(vnode_t *vp, struct exp_visible *vis_head,
144 					struct exportdata *exdata)
145 {
146 	struct exportinfo *exi;
147 	struct exportdata *kex;
148 	fid_t fid;
149 	fsid_t fsid;
150 	int error;
151 	char *pseudo;
152 
153 	ASSERT(RW_WRITE_HELD(&exported_lock));
154 
155 	/*
156 	 * Get the vfs id
157 	 */
158 	bzero(&fid, sizeof (fid));
159 	fid.fid_len = MAXFIDSZ;
160 	error = vop_fid_pseudo(vp, &fid);
161 	if (error) {
162 		/*
163 		 * If VOP_FID returns ENOSPC then the fid supplied
164 		 * is too small.  For now we simply return EREMOTE.
165 		 */
166 		if (error == ENOSPC)
167 			error = EREMOTE;
168 		return (error);
169 	}
170 
171 	fsid = vp->v_vfsp->vfs_fsid;
172 	exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
173 	exi->exi_fsid = fsid;
174 	exi->exi_fid = fid;
175 	exi->exi_vp = vp;
176 	VN_HOLD(exi->exi_vp);
177 	exi->exi_visible = vis_head;
178 	exi->exi_count = 1;
179 	exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
180 				VSW_VOLATILEDEV) ? 1 : 0;
181 	mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
182 
183 	/*
184 	 * Build up the template fhandle
185 	 */
186 	exi->exi_fh.fh_fsid = fsid;
187 	ASSERT(exi->exi_fid.fid_len <= sizeof (exi->exi_fh.fh_xdata));
188 	exi->exi_fh.fh_xlen = exi->exi_fid.fid_len;
189 	bcopy(exi->exi_fid.fid_data, exi->exi_fh.fh_xdata,
190 	    exi->exi_fid.fid_len);
191 	exi->exi_fh.fh_len = sizeof (exi->exi_fh.fh_data);
192 
193 	kex = &exi->exi_export;
194 	kex->ex_flags = EX_PSEUDO;
195 
196 	/* Set up a generic pathname */
197 
198 	pseudo = "(pseudo)";
199 	kex->ex_pathlen = strlen(pseudo);
200 	kex->ex_path = kmem_alloc(kex->ex_pathlen + 1, KM_SLEEP);
201 	(void) strcpy(kex->ex_path, pseudo);
202 
203 	/* Transfer the secinfo data from exdata to this new pseudo node */
204 	if (exdata)
205 		srv_secinfo_exp2pseu(&exi->exi_export, exdata);
206 
207 	/*
208 	 * Initialize auth cache lock
209 	 */
210 	rw_init(&exi->exi_cache_lock, NULL, RW_DEFAULT, NULL);
211 
212 	/*
213 	 * Insert the new entry at the front of the export list
214 	 */
215 	export_link(exi);
216 
217 	return (0);
218 }
219 
220 /*
221  * Free a list of visible directories
222  */
223 void
224 free_visible(struct exp_visible *head)
225 {
226 	struct exp_visible *visp, *next;
227 
228 	for (visp = head; visp; visp = next) {
229 		if (visp->vis_vp != NULL)
230 			VN_RELE(visp->vis_vp);
231 		next = visp->vis_next;
232 		kmem_free(visp, sizeof (*visp));
233 	}
234 }
235 
236 /*
237  * Add a list of visible directories to a pseudo exportfs.
238  *
239  * When we export a new directory we need to add a new
240  * path segment through the pseudofs to reach the new
241  * directory. This new path is reflected in a list of
242  * directories added to the "visible" list.
243  *
244  * Here there are two lists of visible fids: one hanging off the
245  * pseudo exportinfo, and the one we want to add.  It's possible
246  * that the two lists share a common path segment
247  * and have some common directories.  We need to combine
248  * the lists so there's no duplicate entries. Where a common
249  * path component is found, the vis_count field is bumped.
250  *
251  * When the addition is complete, the supplied list is freed.
252  */
253 
254 static void
255 more_visible(struct exportinfo *exi, struct exp_visible *vis_head)
256 {
257 	struct exp_visible *vp1, *vp2;
258 	struct exp_visible *tail, *new;
259 	int found;
260 
261 	/*
262 	 * If exportinfo doesn't already have a visible
263 	 * list just assign the entire supplied list.
264 	 */
265 	if (exi->exi_visible == NULL) {
266 		exi->exi_visible = vis_head;
267 		return;
268 	}
269 
270 	/*
271 	 * The outer loop traverses the supplied list.
272 	 */
273 	for (vp1 = vis_head; vp1; vp1 = vp1->vis_next) {
274 
275 		/*
276 		 * Given an element from the list to be added,
277 		 * search the exportinfo visible list looking for a match.
278 		 * If a match is found, increment the reference count.
279 		 */
280 		found = 0;
281 
282 		for (vp2 = exi->exi_visible; vp2; vp2 = vp2->vis_next) {
283 
284 			tail = vp2;
285 
286 			if (EQFID(&vp1->vis_fid, &vp2->vis_fid)) {
287 				found = 1;
288 				vp2->vis_count++;
289 				VN_RELE(vp1->vis_vp);
290 				vp1->vis_vp = NULL;
291 
292 				/*
293 				 * If the visible struct we want to add
294 				 * (vp1) has vis_exported set to 1, then
295 				 * the matching visible struct we just found
296 				 * must also have it's vis_exported field
297 				 * set to 1.
298 				 *
299 				 * For example, if /export/home was shared
300 				 * (and a UFS mountpoint), then "export" and
301 				 * "home" would each have visible structs in
302 				 * the root pseudo exportinfo. The vis_exported
303 				 * for home would be 1, and vis_exported for
304 				 * export would be 0.  Now, if /export was
305 				 * also shared, more_visible would find the
306 				 * existing visible struct for export, and
307 				 * see that vis_exported was 0.  The code
308 				 * below will set it to 1.
309 				 *
310 				 * vp1 is from vis list passed in (vis_head)
311 				 * vp2 is from vis list on pseudo exportinfo
312 				 */
313 				if (vp1->vis_exported && !vp2->vis_exported)
314 					vp2->vis_exported = 1;
315 				break;
316 			}
317 		}
318 
319 		/* If not found - add to the end of the list */
320 		if (! found) {
321 			new = kmem_zalloc(sizeof (*new), KM_SLEEP);
322 			*new = *vp1;
323 			tail->vis_next = new;
324 			new->vis_next = NULL;
325 			vp1->vis_vp = NULL;
326 		}
327 	}
328 
329 	/*
330 	 * Throw away the path list. vis_vp pointers in vis_head list
331 	 * are either VN_RELEed or reassigned, and are set to NULL.
332 	 * There is no need to VN_RELE in free_visible for this vis_head.
333 	 */
334 	free_visible(vis_head);
335 }
336 
337 /*
338  * Remove a list of visible directories from the pseudo exportfs.
339  *
340  * When we unexport a directory, we have to remove path
341  * components from the visible list in the pseudo exportfs
342  * entry.  The supplied visible list contains the fids of the path
343  * to the unexported directory.  The visible list of the export
344  * is checked against this list any matching fids have their
345  * reference count decremented.  If a reference count drops to
346  * zero, then it means no paths now use this directory, so its
347  * fid can be removed from the visible list.
348  *
349  * When the last path is removed, the visible list will be null.
350  */
351 static void
352 less_visible(struct exportinfo *exi, struct exp_visible *vis_head)
353 {
354 	struct exp_visible *vp1, *vp2;
355 	struct exp_visible *prev, *next;
356 
357 	/*
358 	 * The outer loop traverses the supplied list.
359 	 */
360 	for (vp1 = vis_head; vp1; vp1 = vp1->vis_next) {
361 
362 		/*
363 		 * Given an element from the list to be removed,
364 		 * search the exportinfo list looking for a match.
365 		 * If a match is found, decrement the reference
366 		 * count and drop the element if the count drops
367 		 * to zero.
368 		 */
369 		for (vp2 = exi->exi_visible, prev = NULL; vp2; vp2 = next) {
370 
371 			next = vp2->vis_next;
372 
373 			if (EQFID(&vp1->vis_fid, &vp2->vis_fid)) {
374 
375 				/*
376 				 * Decrement the ref count.
377 				 * Remove the entry if it's zero.
378 				 */
379 				if (--vp2->vis_count <= 0) {
380 					if (prev == NULL)
381 						exi->exi_visible = next;
382 					else
383 						prev->vis_next = next;
384 
385 					VN_RELE(vp2->vis_vp);
386 					kmem_free(vp2, sizeof (*vp1));
387 				} else {
388 					/*
389 					 * If we're here, then the vp2 will
390 					 * remain in the vis list.  If the
391 					 * vis entry corresponds to the object
392 					 * being unshared, then vis_exported
393 					 * needs to be set to 0.
394 					 *
395 					 * vp1 is a node from caller's list
396 					 * vp2 is node from exportinfo's list
397 					 *
398 					 * Only 1 node in the caller's list
399 					 * will have vis_exported set to 1,
400 					 * and it corresponds to the obj being
401 					 * unshared.  It should always be the
402 					 * last element of the caller's list.
403 					 */
404 					if (vp1->vis_exported &&
405 					    vp2->vis_exported) {
406 						vp2->vis_exported = 0;
407 					}
408 				}
409 
410 				break;
411 			}
412 
413 			prev = vp2;
414 		}
415 	}
416 
417 	free_visible(vis_head);
418 }
419 
420 /*
421  * This function checks the path to a new export to
422  * check whether all the pathname components are
423  * exported. It works by climbing the file tree one
424  * component at a time via "..", crossing mountpoints
425  * if necessary until an export entry is found, or the
426  * system root is reached.
427  *
428  * If an unexported mountpoint is found, then
429  * a new pseudo export is added and the pathname from
430  * the mountpoint down to the export is added to the
431  * visible list for the new pseudo export.  If an existing
432  * pseudo export is found, then the pathname is added
433  * to its visible list.
434  *
435  * Note that there's some tests for exportdir.
436  * The exportinfo entry that's passed as a parameter
437  * is that of the real export and exportdir is set
438  * for this case.
439  *
440  * Here is an example of a possible setup:
441  *
442  * () - a new fs; fs mount point
443  * EXPORT - a real exported node
444  * PSEUDO - a pseudo node
445  * vis - visible list
446  * f# - security flavor#
447  * (f#) - security flavor# propagated from its decendents
448  * "" - covered vnode
449  *
450  *
451  *                 /
452  *                 |
453  *                 (a) PSEUDO (f1,f2)
454  *                 |   vis: b,b,"c","n"
455  *                 |
456  *                 b
457  *        ---------|------------------
458  *        |                          |
459  *        (c) EXPORT,f1(f2)          (n) PSEUDO (f1,f2)
460  *        |   vis: "e","d"           |   vis: m,m,,p,q,"o"
461  *        |                          |
462  *  ------------------          -------------------
463  *  |        |        |         |                  |
464  *  (d)      (e)      f         m EXPORT,f1(f2)    p
465  *  EXPORT   EXPORT             |                  |
466  *  f1       f2                 |                  |
467  *           |                  |                  |
468  *           j                 (o) EXPORT,f2       q EXPORT f2
469  *
470  */
471 int
472 treeclimb_export(struct exportinfo *exip)
473 {
474 	vnode_t *dvp, *vp;
475 	fid_t fid;
476 	int error;
477 	int exportdir;
478 	struct exportinfo *exi = NULL;
479 	struct exp_visible *visp;
480 	struct exp_visible *vis_head = NULL;
481 	struct vattr va;
482 
483 	ASSERT(RW_WRITE_HELD(&exported_lock));
484 
485 	vp = exip->exi_vp;
486 	VN_HOLD(vp);
487 	exportdir = 1;
488 
489 	for (;;) {
490 
491 		bzero(&fid, sizeof (fid));
492 		fid.fid_len = MAXFIDSZ;
493 		error = vop_fid_pseudo(vp, &fid);
494 		if (error)
495 			break;
496 
497 		if (! exportdir) {
498 			/*
499 			 * Check if this exportroot is a VROOT dir.  If so,
500 			 * then attach the pseudonodes.  If not, then
501 			 * continue .. traversal until we hit a VROOT
502 			 * export (pseudo or real).
503 			 */
504 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
505 			if (exi != NULL && vp->v_flag & VROOT) {
506 				/*
507 				 * Found an export info
508 				 *
509 				 * Extend the list of visible
510 				 * directories whether it's a pseudo
511 				 * or a real export.
512 				 */
513 				more_visible(exi, vis_head);
514 				vis_head = NULL;
515 				break;	/* and climb no further */
516 			}
517 		}
518 
519 		/*
520 		 * If at the root of the filesystem, need
521 		 * to traverse across the mountpoint
522 		 * and continue the climb on the mounted-on
523 		 * filesystem.
524 		 */
525 		if (vp->v_flag & VROOT) {
526 
527 			if (! exportdir) {
528 				/*
529 				 * Found the root directory of a filesystem
530 				 * that isn't exported.  Need to export
531 				 * this as a pseudo export so that an NFS v4
532 				 * client can do lookups in it.
533 				 */
534 				error = pseudo_exportfs(vp, vis_head, NULL);
535 				if (error)
536 					break;
537 				vis_head = NULL;
538 			}
539 
540 			if (VN_CMP(vp, rootdir)) {
541 				/* at system root */
542 				break;
543 			}
544 
545 			vp = untraverse(vp);
546 			exportdir = 0;
547 			continue;
548 		}
549 
550 		/*
551 		 * Do a getattr to obtain the nodeid (inode num)
552 		 * for this vnode.
553 		 */
554 		va.va_mask = AT_NODEID;
555 		error = VOP_GETATTR(vp, &va, 0, CRED());
556 		if (error)
557 			break;
558 
559 		/*
560 		 *  Add this directory fid to visible list
561 		 */
562 		visp = kmem_alloc(sizeof (*visp), KM_SLEEP);
563 		VN_HOLD(vp);
564 		visp->vis_vp = vp;
565 		visp->vis_fid = fid;		/* structure copy */
566 		visp->vis_ino = va.va_nodeid;
567 		visp->vis_count = 1;
568 		visp->vis_exported = exportdir;
569 		visp->vis_next = vis_head;
570 		vis_head = visp;
571 
572 		/*
573 		 * Now, do a ".." to find parent dir of vp.
574 		 */
575 		error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, CRED());
576 
577 		if (error == ENOTDIR && exportdir) {
578 			dvp = exip->exi_dvp;
579 			ASSERT(dvp != NULL);
580 			VN_HOLD(dvp);
581 			error = 0;
582 		}
583 
584 		if (error)
585 			break;
586 
587 		exportdir = 0;
588 		VN_RELE(vp);
589 		vp = dvp;
590 	}
591 
592 	VN_RELE(vp);
593 	return (error);
594 }
595 
596 /*
597  * Walk up the tree looking for pseudo export entries.
598  *
599  * If a pseudo export is found, remove the path we've
600  * climbed from its visible list. If the visible list
601  * still has entries after the removal, then we can stop.
602  * If it becomes null, then remove the pseudo export entry
603  * and carry on up the tree to see if there's any more.
604  */
605 int
606 treeclimb_unexport(struct exportinfo *exip)
607 {
608 	vnode_t *dvp, *vp;
609 	fid_t fid;
610 	int error = 0;
611 	int exportdir;
612 	struct exportinfo *exi = NULL;
613 	struct exp_visible *vis_head = NULL, *visp;
614 
615 	ASSERT(RW_WRITE_HELD(&exported_lock));
616 
617 	exportdir = 1;
618 	vp = exip->exi_vp;
619 	VN_HOLD(vp);
620 
621 	for (;;) {
622 
623 		bzero(&fid, sizeof (fid));
624 		fid.fid_len = MAXFIDSZ;
625 		error = vop_fid_pseudo(vp, &fid);
626 		if (error)
627 			break;
628 
629 		if (! exportdir) {
630 
631 			/*
632 			 * We need to use checkexport4() here because it
633 			 * doesn't acquire exported_lock and it doesn't
634 			 * manipulate exi_count.
635 			 *
636 			 * Remove directories from the visible
637 			 * list that are unique to the path
638 			 * for this export.  (Only VROOT exportinfos
639 			 * have can have visible entries).
640 			 */
641 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
642 			if (exi != NULL && (vp->v_flag & VROOT)) {
643 
644 				less_visible(exi, vis_head);
645 				vis_head = NULL;
646 
647 				/*
648 				 * If the visible list has entries
649 				 * or if it's a real export, then
650 				 * there's no need to keep climbing.
651 				 */
652 				if (exi->exi_visible || ! PSEUDO(exi))
653 					break;
654 
655 				/*
656 				 * Otherwise, we have a pseudo export
657 				 * with an empty list (no exports below
658 				 * it) so we must remove and continue
659 				 * the climb to remove its name from
660 				 * the parent export.
661 				 */
662 				error = export_unlink(&vp->v_vfsp->vfs_fsid,
663 						&fid, vp, NULL);
664 				if (error)
665 					break;
666 
667 				exi_rele(exi);
668 			}
669 		}
670 
671 		/*
672 		 * If at the root of the filesystem, need
673 		 * to traverse across the mountpoint
674 		 * and continue the climb on the mounted-on
675 		 * filesystem.
676 		 */
677 		if (vp->v_flag & VROOT) {
678 			if (VN_CMP(vp, rootdir)) {
679 				/* at system root */
680 				break;
681 			}
682 			vp = untraverse(vp);
683 			exportdir = 0;
684 			continue;
685 		}
686 
687 		/*
688 		 *  Add this directory fid to path list
689 		 */
690 		visp = kmem_alloc(sizeof (*visp), KM_SLEEP);
691 		VN_HOLD(vp);
692 		visp->vis_vp = vp;
693 		visp->vis_fid = fid;		/* structure copy */
694 		visp->vis_ino = 0;
695 		visp->vis_count = 1;
696 		visp->vis_exported = exportdir;
697 		visp->vis_next = vis_head;
698 		vis_head = visp;
699 
700 		/*
701 		 * Do a ".." to find parent dir of vp.
702 		 */
703 		error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, CRED());
704 
705 		if (error == ENOTDIR && exportdir) {
706 			dvp = exip->exi_dvp;
707 			ASSERT(dvp != NULL);
708 			VN_HOLD(dvp);
709 			error = 0;
710 		}
711 		if (error)
712 			break;
713 
714 		exportdir = 0;
715 		VN_RELE(vp);
716 		vp = dvp;
717 	}
718 
719 	VN_RELE(vp);
720 	return (error);
721 }
722 
723 
724 /*
725  * Traverse backward across mountpoint from the
726  * root vnode of a filesystem to its mounted-on
727  * vnode.
728  */
729 vnode_t *
730 untraverse(vnode_t *vp)
731 {
732 	vnode_t *tvp, *nextvp;
733 
734 	tvp = vp;
735 	for (;;) {
736 		if (! (tvp->v_flag & VROOT))
737 			break;
738 
739 		/* lock vfs to prevent unmount of this vfs */
740 		vfs_lock_wait(tvp->v_vfsp);
741 
742 		if ((nextvp = tvp->v_vfsp->vfs_vnodecovered) == NULL) {
743 			vfs_unlock(tvp->v_vfsp);
744 			break;
745 		}
746 
747 		/*
748 		 * Hold nextvp to prevent unmount.  After unlock vfs and
749 		 * rele tvp, any number of overlays could be unmounted.
750 		 * Putting a hold on vfs_vnodecovered will only allow
751 		 * tvp's vfs to be unmounted. Of course if caller placed
752 		 * extra hold on vp before calling untraverse, the following
753 		 * hold would not be needed.  Since prev actions of caller
754 		 * are unknown, we need to hold here just to be safe.
755 		 */
756 		VN_HOLD(nextvp);
757 		vfs_unlock(tvp->v_vfsp);
758 		VN_RELE(tvp);
759 		tvp = nextvp;
760 	}
761 
762 	return (tvp);
763 }
764 
765 /*
766  * Given an exportinfo, climb up to find the exportinfo for the VROOT
767  * of the filesystem.
768  *
769  * e.g.         /
770  *              |
771  *              a (VROOT) pseudo-exportinfo
772  *		|
773  *		b
774  *		|
775  *		c  #share /a/b/c
776  *		|
777  *		d
778  *
779  * where c is in the same filesystem as a.
780  * So, get_root_export(*exportinfo_for_c) returns exportinfo_for_a
781  *
782  * If d is shared, then c will be put into a's visible list.
783  * Note: visible list is per filesystem and is attached to the
784  * VROOT exportinfo.
785  */
786 struct exportinfo *
787 get_root_export(struct exportinfo *exip)
788 {
789 	vnode_t *dvp, *vp;
790 	fid_t fid;
791 	struct exportinfo *exi = exip;
792 	int error;
793 
794 	vp = exi->exi_vp;
795 	VN_HOLD(vp);
796 
797 	for (;;) {
798 
799 		if (vp->v_flag & VROOT) {
800 			ASSERT(exi != NULL);
801 			break;
802 		}
803 
804 		/*
805 		 * Now, do a ".." to find parent dir of vp.
806 		 */
807 		error = VOP_LOOKUP(vp, "..", &dvp, NULL, 0, NULL, CRED());
808 
809 		if (error) {
810 			exi = NULL;
811 			break;
812 		}
813 
814 		VN_RELE(vp);
815 		vp = dvp;
816 
817 		bzero(&fid, sizeof (fid));
818 		fid.fid_len = MAXFIDSZ;
819 		error = vop_fid_pseudo(vp, &fid);
820 		if (error) {
821 			exi = NULL;
822 			break;
823 		}
824 
825 		exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
826 	}
827 
828 	VN_RELE(vp);
829 	return (exi);
830 }
831 
832 /*
833  * Return true if the supplied vnode has a sub-directory exported.
834  */
835 int
836 has_visible(struct exportinfo *exi, vnode_t *vp)
837 {
838 	struct exp_visible *visp;
839 	fid_t fid;
840 	bool_t vp_is_exported;
841 
842 	vp_is_exported = VN_CMP(vp,  exi->exi_vp);
843 
844 	/*
845 	 * An exported root vnode has a sub-dir shared if it has a visible list.
846 	 * i.e. if it does not have a visible list, then there is no node in
847 	 * this filesystem leads to any other shared node.
848 	 */
849 	if (vp_is_exported && (vp->v_flag & VROOT))
850 		return (exi->exi_visible ? 1 : 0);
851 
852 	/*
853 	 * Only the exportinfo of a fs root node may have a visible list.
854 	 * Either it is a pseudo root node, or a real exported root node.
855 	 */
856 	if ((exi = get_root_export(exi)) == NULL) {
857 		return (0);
858 	}
859 
860 	if (!exi->exi_visible)
861 		return (0);
862 
863 	/* Get the fid of the vnode */
864 	bzero(&fid, sizeof (fid));
865 	fid.fid_len = MAXFIDSZ;
866 	if (vop_fid_pseudo(vp, &fid) != 0) {
867 		return (0);
868 	}
869 
870 	/*
871 	 * See if vp is in the visible list of the root node exportinfo.
872 	 */
873 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
874 		if (EQFID(&fid, &visp->vis_fid)) {
875 			/*
876 			 * If vp is an exported non-root node with only 1 path
877 			 * count (for itself), it indicates no sub-dir shared
878 			 * using this vp as a path.
879 			 */
880 			if (vp_is_exported && visp->vis_count < 2)
881 				break;
882 
883 			return (1);
884 		}
885 	}
886 
887 	return (0);
888 }
889 
890 /*
891  * Returns true if the supplied vnode is visible
892  * in this export.  If vnode is visible, return
893  * vis_exported in expseudo.
894  */
895 int
896 nfs_visible(struct exportinfo *exi, vnode_t *vp, int *expseudo)
897 {
898 	struct exp_visible *visp;
899 	fid_t fid;
900 
901 	/*
902 	 * First check to see if vp is export root.
903 	 *
904 	 * A pseudo export root can never be exported
905 	 * (it would be a real export then); however,
906 	 * it is always visible.  If a pseudo root object
907 	 * was exported by server admin, then the entire
908 	 * pseudo exportinfo (and all visible entries) would
909 	 * be destroyed.  A pseudo exportinfo only exists
910 	 * to provide access to real (descendant) export(s).
911 	 *
912 	 * Previously, rootdir was special cased here; however,
913 	 * the export root special case handles the rootdir
914 	 * case also.
915 	 */
916 	if (VN_CMP(vp, exi->exi_vp)) {
917 		*expseudo = 0;
918 		return (1);
919 	}
920 
921 	/*
922 	 * Only a PSEUDO node has a visible list or an exported VROOT
923 	 * node may have a visible list.
924 	 */
925 	if (! PSEUDO(exi) && (exi = get_root_export(exi)) == NULL) {
926 		*expseudo = 0;
927 		return (0);
928 	}
929 
930 	/* Get the fid of the vnode */
931 
932 	bzero(&fid, sizeof (fid));
933 	fid.fid_len = MAXFIDSZ;
934 	if (vop_fid_pseudo(vp, &fid) != 0) {
935 		*expseudo = 0;
936 		return (0);
937 	}
938 
939 	/*
940 	 * We can't trust VN_CMP() above because of LOFS.
941 	 * Even though VOP_CMP will do the right thing for LOFS
942 	 * objects, VN_CMP will short circuit out early when the
943 	 * vnode ops ptrs are different.  Just in case we're dealing
944 	 * with LOFS, compare exi_fid/fsid here.
945 	 *
946 	 * expseudo is not set because this is not an export
947 	 */
948 	if (EQFID(&exi->exi_fid, &fid) &&
949 	    EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid)) {
950 		*expseudo = 0;
951 		return (1);
952 	}
953 
954 
955 	/* See if it matches any fid in the visible list */
956 
957 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
958 		if (EQFID(&fid, &visp->vis_fid)) {
959 			*expseudo = visp->vis_exported;
960 			return (1);
961 		}
962 	}
963 
964 	*expseudo = 0;
965 
966 	return (0);
967 }
968 
969 /*
970  * Returns true if the supplied vnode is the
971  * directory of an export point.
972  */
973 int
974 nfs_exported(struct exportinfo *exi, vnode_t *vp)
975 {
976 	struct exp_visible *visp;
977 	fid_t fid;
978 
979 	/*
980 	 * First check to see if vp is the export root
981 	 * This check required for the case of lookup ..
982 	 * where .. is a V_ROOT vnode and a pseudo exportroot.
983 	 * Pseudo export root objects do not have an entry
984 	 * in the visible list even though every V_ROOT
985 	 * pseudonode is visible.  It is safe to compare
986 	 * vp here because pseudo_exportfs put a hold on
987 	 * it when exi_vp was initialized.
988 	 *
989 	 * Note: VN_CMP() won't match for LOFS shares, but they're
990 	 * handled below w/EQFID/EQFSID.
991 	 */
992 	if (VN_CMP(vp, exi->exi_vp))
993 		return (1);
994 
995 	/* Get the fid of the vnode */
996 
997 	bzero(&fid, sizeof (fid));
998 	fid.fid_len = MAXFIDSZ;
999 	if (vop_fid_pseudo(vp, &fid) != 0)
1000 		return (0);
1001 
1002 	if (EQFID(&fid, &exi->exi_fid) &&
1003 	    EQFSID(&vp->v_vfsp->vfs_fsid, &exi->exi_fsid)) {
1004 		return (1);
1005 	}
1006 
1007 	/* See if it matches any fid in the visible list */
1008 
1009 	for (visp = exi->exi_visible; visp; visp = visp->vis_next) {
1010 		if (EQFID(&fid, &visp->vis_fid))
1011 			return (visp->vis_exported);
1012 	}
1013 
1014 	return (0);
1015 }
1016 
1017 /*
1018  * Returns true if the supplied inode is visible
1019  * in this export.  This function is used by
1020  * readdir which uses inode numbers from the
1021  * directory.
1022  *
1023  * NOTE: this code does not match inode number for ".",
1024  * but it isn't required because NFS4 server rddir
1025  * skips . and .. entries.
1026  */
1027 int
1028 nfs_visible_inode(struct exportinfo *exi, ino64_t ino, int *expseudo)
1029 {
1030 	struct exp_visible *visp;
1031 
1032 	/*
1033 	 * Only a PSEUDO node has a visible list or an exported VROOT
1034 	 * node may have a visible list.
1035 	 */
1036 	if (! PSEUDO(exi) && (exi = get_root_export(exi)) == NULL) {
1037 		*expseudo = 0;
1038 		return (0);
1039 	}
1040 
1041 	for (visp = exi->exi_visible; visp; visp = visp->vis_next)
1042 		if ((u_longlong_t)ino == visp->vis_ino) {
1043 			*expseudo = visp->vis_exported;
1044 			return (1);
1045 		}
1046 
1047 	*expseudo = 0;
1048 	return (0);
1049 }
1050