xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c (revision 354507029a42e4bcb1ea64fc4685f2bfd4792db8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Support for ephemeral mounts, e.g. mirror-mounts. These mounts are
29  * triggered from a "stub" rnode via a special set of vnodeops.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/time.h>
37 #include <sys/vnode.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/file.h>
41 #include <sys/filio.h>
42 #include <sys/uio.h>
43 #include <sys/buf.h>
44 #include <sys/mman.h>
45 #include <sys/pathname.h>
46 #include <sys/dirent.h>
47 #include <sys/debug.h>
48 #include <sys/vmsystm.h>
49 #include <sys/fcntl.h>
50 #include <sys/flock.h>
51 #include <sys/swap.h>
52 #include <sys/errno.h>
53 #include <sys/strsubr.h>
54 #include <sys/sysmacros.h>
55 #include <sys/kmem.h>
56 #include <sys/mount.h>
57 #include <sys/cmn_err.h>
58 #include <sys/pathconf.h>
59 #include <sys/utsname.h>
60 #include <sys/dnlc.h>
61 #include <sys/acl.h>
62 #include <sys/systeminfo.h>
63 #include <sys/policy.h>
64 #include <sys/sdt.h>
65 #include <sys/list.h>
66 #include <sys/stat.h>
67 #include <sys/mntent.h>
68 
69 #include <rpc/types.h>
70 #include <rpc/auth.h>
71 #include <rpc/clnt.h>
72 
73 #include <nfs/nfs.h>
74 #include <nfs/nfs_clnt.h>
75 #include <nfs/nfs_acl.h>
76 #include <nfs/lm.h>
77 #include <nfs/nfs4.h>
78 #include <nfs/nfs4_kprot.h>
79 #include <nfs/rnode4.h>
80 #include <nfs/nfs4_clnt.h>
81 
82 #include <vm/hat.h>
83 #include <vm/as.h>
84 #include <vm/page.h>
85 #include <vm/pvn.h>
86 #include <vm/seg.h>
87 #include <vm/seg_map.h>
88 #include <vm/seg_kpm.h>
89 #include <vm/seg_vn.h>
90 
91 #include <fs/fs_subr.h>
92 
93 #include <sys/ddi.h>
94 #include <sys/int_fmtio.h>
95 
96 #include <sys/sunddi.h>
97 
98 /*
99  * The automatic unmounter thread stuff!
100  */
101 static int nfs4_trigger_thread_timer = 20;	/* in seconds */
102 
103 /*
104  * Just a default....
105  */
106 static uint_t nfs4_trigger_mount_to = 240;
107 
108 typedef struct nfs4_trigger_globals {
109 	kmutex_t		ntg_forest_lock;
110 	uint_t			ntg_mount_to;
111 	int			ntg_thread_started;
112 	nfs4_ephemeral_tree_t	*ntg_forest;
113 } nfs4_trigger_globals_t;
114 
115 kmutex_t	nfs4_ephemeral_thread_lock;
116 
117 zone_key_t	nfs4_ephemeral_key = ZONE_KEY_UNINITIALIZED;
118 
119 static void	nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *);
120 
121 /*
122  * Used for ephemeral mounts; contains data either duplicated from
123  * servinfo4_t, or hand-crafted, depending on type of ephemeral mount.
124  *
125  * It's intended that this structure is used solely for ephemeral
126  * mount-type specific data, for passing this data to
127  * nfs4_trigger_nargs_create().
128  */
129 typedef struct ephemeral_servinfo {
130 	char			*esi_hostname;
131 	char			*esi_netname;
132 	char			*esi_path;
133 	int			esi_path_len;
134 	int			esi_mount_flags;
135 	struct netbuf		*esi_addr;
136 	struct netbuf		*esi_syncaddr;
137 	struct knetconfig	*esi_knconf;
138 } ephemeral_servinfo_t;
139 
140 /*
141  * Collect together the mount-type specific and generic data args.
142  */
143 typedef struct domount_args {
144 	ephemeral_servinfo_t	*dma_esi;
145 	char			*dma_hostlist; /* comma-sep. for RO failover */
146 	struct nfs_args		*dma_nargs;
147 } domount_args_t;
148 
149 
150 /*
151  * The vnode ops functions for a trigger stub vnode
152  */
153 static int nfs4_trigger_open(vnode_t **, int, cred_t *, caller_context_t *);
154 static int nfs4_trigger_getattr(vnode_t *, struct vattr *, int, cred_t *,
155     caller_context_t *);
156 static int nfs4_trigger_setattr(vnode_t *, struct vattr *, int, cred_t *,
157     caller_context_t *);
158 static int nfs4_trigger_access(vnode_t *, int, int, cred_t *,
159     caller_context_t *);
160 static int nfs4_trigger_readlink(vnode_t *, struct uio *, cred_t *,
161     caller_context_t *);
162 static int nfs4_trigger_lookup(vnode_t *, char *, vnode_t **,
163     struct pathname *, int, vnode_t *, cred_t *, caller_context_t *,
164     int *, pathname_t *);
165 static int nfs4_trigger_create(vnode_t *, char *, struct vattr *,
166     enum vcexcl, int, vnode_t **, cred_t *, int, caller_context_t *,
167     vsecattr_t *);
168 static int nfs4_trigger_remove(vnode_t *, char *, cred_t *, caller_context_t *,
169     int);
170 static int nfs4_trigger_link(vnode_t *, vnode_t *, char *, cred_t *,
171     caller_context_t *, int);
172 static int nfs4_trigger_rename(vnode_t *, char *, vnode_t *, char *,
173     cred_t *, caller_context_t *, int);
174 static int nfs4_trigger_mkdir(vnode_t *, char *, struct vattr *,
175     vnode_t **, cred_t *, caller_context_t *, int, vsecattr_t *vsecp);
176 static int nfs4_trigger_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
177     caller_context_t *, int);
178 static int nfs4_trigger_symlink(vnode_t *, char *, struct vattr *, char *,
179     cred_t *, caller_context_t *, int);
180 static int nfs4_trigger_cmp(vnode_t *, vnode_t *, caller_context_t *);
181 
182 /*
183  * Regular NFSv4 vnodeops that we need to reference directly
184  */
185 extern int	nfs4_getattr(vnode_t *, struct vattr *, int, cred_t *,
186 		    caller_context_t *);
187 extern void	nfs4_inactive(vnode_t *, cred_t *, caller_context_t *);
188 extern int	nfs4_rwlock(vnode_t *, int, caller_context_t *);
189 extern void	nfs4_rwunlock(vnode_t *, int, caller_context_t *);
190 extern int	nfs4_lookup(vnode_t *, char *, vnode_t **,
191 		    struct pathname *, int, vnode_t *, cred_t *,
192 		    caller_context_t *, int *, pathname_t *);
193 extern int	nfs4_pathconf(vnode_t *, int, ulong_t *, cred_t *,
194 		    caller_context_t *);
195 extern int	nfs4_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
196 		    caller_context_t *);
197 extern int	nfs4_fid(vnode_t *, fid_t *, caller_context_t *);
198 extern int	nfs4_realvp(vnode_t *, vnode_t **, caller_context_t *);
199 
200 static int	nfs4_trigger_mount(vnode_t *, vnode_t **);
201 static int	nfs4_trigger_domount(vnode_t *, domount_args_t *, vfs_t **,
202     cred_t *);
203 static domount_args_t  *nfs4_trigger_domount_args_create(vnode_t *);
204 static void	nfs4_trigger_domount_args_destroy(domount_args_t *dma,
205     vnode_t *vp);
206 static ephemeral_servinfo_t *nfs4_trigger_esi_create(vnode_t *, servinfo4_t *);
207 static void	nfs4_trigger_esi_destroy(ephemeral_servinfo_t *, vnode_t *);
208 static ephemeral_servinfo_t *nfs4_trigger_esi_create_mirrormount(vnode_t *,
209     servinfo4_t *);
210 static struct nfs_args 	*nfs4_trigger_nargs_create(mntinfo4_t *, servinfo4_t *,
211     ephemeral_servinfo_t *);
212 static void	nfs4_trigger_nargs_destroy(struct nfs_args *);
213 static char	*nfs4_trigger_create_mntopts(vfs_t *);
214 static void	nfs4_trigger_destroy_mntopts(char *);
215 static int 	nfs4_trigger_add_mntopt(char *, char *, vfs_t *);
216 static enum clnt_stat nfs4_trigger_ping_server(servinfo4_t *, int);
217 
218 extern int	umount2_engine(vfs_t *, int, cred_t *, int);
219 
220 
221 vnodeops_t *nfs4_trigger_vnodeops;
222 
223 /*
224  * These are the vnodeops that we must define for stub vnodes.
225  *
226  *
227  * Many of the VOPs defined for NFSv4 do not need to be defined here,
228  * for various reasons. This will result in the VFS default function being
229  * used:
230  *
231  * - These VOPs require a previous VOP_OPEN to have occurred. That will have
232  *   lost the reference to the stub vnode, meaning these should not be called:
233  *       close, read, write, ioctl, readdir, seek.
234  *
235  * - These VOPs are meaningless for vnodes without data pages. Since the
236  *   stub vnode is of type VDIR, these should not be called:
237  *       space, getpage, putpage, map, addmap, delmap, pageio, fsync.
238  *
239  * - These VOPs are otherwise not applicable, and should not be called:
240  *       dump, setsecattr.
241  *
242  *
243  * These VOPs we do not want to define, but nor do we want the VFS default
244  * action. Instead, we specify the VFS error function, with fs_error(), but
245  * note that fs_error() is not actually called. Instead it results in the
246  * use of the error function defined for the particular VOP, in vn_ops_table[]:
247  *
248  * -   frlock, dispose, shrlock.
249  *
250  *
251  * These VOPs we define to use the corresponding regular NFSv4 vnodeop.
252  * NOTE: if any of these ops involve an OTW call with the stub FH, then
253  * that call must be wrapped with save_mnt_secinfo()/check_mnt_secinfo()
254  * to protect the security data in the servinfo4_t for the "parent"
255  * filesystem that contains the stub.
256  *
257  * - These VOPs should not trigger a mount, so that "ls -l" does not:
258  *       pathconf, getsecattr.
259  *
260  * - These VOPs would not make sense to trigger:
261  *       inactive, rwlock, rwunlock, fid, realvp.
262  */
263 const fs_operation_def_t nfs4_trigger_vnodeops_template[] = {
264 	VOPNAME_OPEN,		{ .vop_open = nfs4_trigger_open },
265 	VOPNAME_GETATTR,	{ .vop_getattr = nfs4_trigger_getattr },
266 	VOPNAME_SETATTR,	{ .vop_setattr = nfs4_trigger_setattr },
267 	VOPNAME_ACCESS,		{ .vop_access = nfs4_trigger_access },
268 	VOPNAME_LOOKUP,		{ .vop_lookup = nfs4_trigger_lookup },
269 	VOPNAME_CREATE,		{ .vop_create = nfs4_trigger_create },
270 	VOPNAME_REMOVE,		{ .vop_remove = nfs4_trigger_remove },
271 	VOPNAME_LINK,		{ .vop_link = nfs4_trigger_link },
272 	VOPNAME_RENAME,		{ .vop_rename = nfs4_trigger_rename },
273 	VOPNAME_MKDIR,		{ .vop_mkdir = nfs4_trigger_mkdir },
274 	VOPNAME_RMDIR,		{ .vop_rmdir = nfs4_trigger_rmdir },
275 	VOPNAME_SYMLINK,	{ .vop_symlink = nfs4_trigger_symlink },
276 	VOPNAME_READLINK,	{ .vop_readlink = nfs4_trigger_readlink },
277 	VOPNAME_INACTIVE, 	{ .vop_inactive = nfs4_inactive },
278 	VOPNAME_FID,		{ .vop_fid = nfs4_fid },
279 	VOPNAME_RWLOCK,		{ .vop_rwlock = nfs4_rwlock },
280 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = nfs4_rwunlock },
281 	VOPNAME_REALVP,		{ .vop_realvp = nfs4_realvp },
282 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = nfs4_getsecattr },
283 	VOPNAME_PATHCONF,	{ .vop_pathconf = nfs4_pathconf },
284 	VOPNAME_FRLOCK,		{ .error = fs_error },
285 	VOPNAME_DISPOSE,	{ .error = fs_error },
286 	VOPNAME_SHRLOCK,	{ .error = fs_error },
287 	VOPNAME_VNEVENT,	{ .vop_vnevent = fs_vnevent_support },
288 	NULL, NULL
289 };
290 
291 static void
292 nfs4_ephemeral_tree_incr(nfs4_ephemeral_tree_t *net)
293 {
294 	ASSERT(mutex_owned(&net->net_cnt_lock));
295 	net->net_refcnt++;
296 	ASSERT(net->net_refcnt != 0);
297 }
298 
299 static void
300 nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net)
301 {
302 	mutex_enter(&net->net_cnt_lock);
303 	nfs4_ephemeral_tree_incr(net);
304 	mutex_exit(&net->net_cnt_lock);
305 }
306 
307 /*
308  * We need a safe way to decrement the refcnt whilst the
309  * lock is being held.
310  */
311 static void
312 nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net)
313 {
314 	ASSERT(mutex_owned(&net->net_cnt_lock));
315 	ASSERT(net->net_refcnt != 0);
316 	net->net_refcnt--;
317 }
318 
319 static void
320 nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net)
321 {
322 	mutex_enter(&net->net_cnt_lock);
323 	nfs4_ephemeral_tree_decr(net);
324 	mutex_exit(&net->net_cnt_lock);
325 }
326 
327 /*
328  * Trigger ops for stub vnodes; for mirror mounts, etc.
329  *
330  * The general idea is that a "triggering" op will first call
331  * nfs4_trigger_mount(), which will find out whether a mount has already
332  * been triggered.
333  *
334  * If it has, then nfs4_trigger_mount() sets newvp to the root vnode
335  * of the covering vfs.
336  *
337  * If a mount has not yet been triggered, nfs4_trigger_mount() will do so,
338  * and again set newvp, as above.
339  *
340  * The triggering op may then re-issue the VOP by calling it on newvp.
341  *
342  * Note that some ops may perform custom action, and may or may not need
343  * to trigger a mount.
344  *
345  * Some ops need to call the regular NFSv4 vnodeop for a stub vnode. We
346  * obviously can't do this with VOP_<whatever>, since it's a stub vnode
347  * and that would just recurse. Instead, we call the v4 op directly,
348  * by name.  This is OK, since we know that the vnode is for NFSv4,
349  * otherwise it couldn't be a stub.
350  *
351  */
352 
353 static int
354 nfs4_trigger_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
355 {
356 	int error;
357 	vnode_t *newvp;
358 
359 	error = nfs4_trigger_mount(*vpp, &newvp);
360 	if (error)
361 		return (error);
362 
363 	/* Release the stub vnode, as we're losing the reference to it */
364 	VN_RELE(*vpp);
365 
366 	/* Give the caller the root vnode of the newly-mounted fs */
367 	*vpp = newvp;
368 
369 	/* return with VN_HELD(newvp) */
370 	return (VOP_OPEN(vpp, flag, cr, ct));
371 }
372 
373 /*
374  * For the majority of cases, nfs4_trigger_getattr() will not trigger
375  * a mount. However, if ATTR_TRIGGER is set, we are being informed
376  * that we need to force the mount before we attempt to determine
377  * the attributes. The intent is an atomic operation for security
378  * testing.
379  */
380 static int
381 nfs4_trigger_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
382     caller_context_t *ct)
383 {
384 	int error;
385 
386 	if (flags & ATTR_TRIGGER) {
387 		vnode_t	*newvp;
388 
389 		error = nfs4_trigger_mount(vp, &newvp);
390 		if (error)
391 			return (error);
392 
393 		error = VOP_GETATTR(newvp, vap, flags, cr, ct);
394 		VN_RELE(newvp);
395 	} else {
396 		error = nfs4_getattr(vp, vap, flags, cr, ct);
397 	}
398 
399 	return (error);
400 }
401 
402 static int
403 nfs4_trigger_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
404 		caller_context_t *ct)
405 {
406 	int error;
407 	vnode_t *newvp;
408 
409 	error = nfs4_trigger_mount(vp, &newvp);
410 	if (error)
411 		return (error);
412 
413 	error = VOP_SETATTR(newvp, vap, flags, cr, ct);
414 	VN_RELE(newvp);
415 
416 	return (error);
417 }
418 
419 static int
420 nfs4_trigger_access(vnode_t *vp, int mode, int flags, cred_t *cr,
421     caller_context_t *ct)
422 {
423 	int error;
424 	vnode_t *newvp;
425 
426 	error = nfs4_trigger_mount(vp, &newvp);
427 	if (error)
428 		return (error);
429 
430 	error = VOP_ACCESS(newvp, mode, flags, cr, ct);
431 	VN_RELE(newvp);
432 
433 	return (error);
434 }
435 
436 static int
437 nfs4_trigger_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
438     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
439     caller_context_t *ct, int *deflags, pathname_t *rpnp)
440 {
441 	int error;
442 	vnode_t *newdvp;
443 	rnode4_t *drp = VTOR4(dvp);
444 
445 	ASSERT(RP_ISSTUB(drp));
446 
447 	/* for now, we only support mirror-mounts */
448 	ASSERT(RP_ISSTUB_MIRRORMOUNT(drp));
449 
450 	/*
451 	 * It's not legal to lookup ".." for an fs root, so we mustn't pass
452 	 * that up. Instead, pass onto the regular op, regardless of whether
453 	 * we've triggered a mount.
454 	 */
455 	if (strcmp(nm, "..") == 0)
456 		return (nfs4_lookup(dvp, nm, vpp, pnp, flags, rdir, cr,
457 		    ct, deflags, rpnp));
458 
459 	error = nfs4_trigger_mount(dvp, &newdvp);
460 	if (error)
461 		return (error);
462 
463 	error = VOP_LOOKUP(newdvp, nm, vpp, pnp, flags, rdir, cr, ct,
464 	    deflags, rpnp);
465 	VN_RELE(newdvp);
466 
467 	return (error);
468 }
469 
470 static int
471 nfs4_trigger_create(vnode_t *dvp, char *nm, struct vattr *va,
472     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr,
473     int flags, caller_context_t *ct, vsecattr_t *vsecp)
474 {
475 	int error;
476 	vnode_t *newdvp;
477 
478 	error = nfs4_trigger_mount(dvp, &newdvp);
479 	if (error)
480 		return (error);
481 
482 	error = VOP_CREATE(newdvp, nm, va, exclusive, mode, vpp, cr,
483 	    flags, ct, vsecp);
484 	VN_RELE(newdvp);
485 
486 	return (error);
487 }
488 
489 static int
490 nfs4_trigger_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
491     int flags)
492 {
493 	int error;
494 	vnode_t *newdvp;
495 
496 	error = nfs4_trigger_mount(dvp, &newdvp);
497 	if (error)
498 		return (error);
499 
500 	error = VOP_REMOVE(newdvp, nm, cr, ct, flags);
501 	VN_RELE(newdvp);
502 
503 	return (error);
504 }
505 
506 static int
507 nfs4_trigger_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr,
508     caller_context_t *ct, int flags)
509 {
510 	int error;
511 	vnode_t *newtdvp;
512 
513 	error = nfs4_trigger_mount(tdvp, &newtdvp);
514 	if (error)
515 		return (error);
516 
517 	/*
518 	 * We don't check whether svp is a stub. Let the NFSv4 code
519 	 * detect that error, and return accordingly.
520 	 */
521 	error = VOP_LINK(newtdvp, svp, tnm, cr, ct, flags);
522 	VN_RELE(newtdvp);
523 
524 	return (error);
525 }
526 
527 static int
528 nfs4_trigger_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
529     cred_t *cr, caller_context_t *ct, int flags)
530 {
531 	int error;
532 	vnode_t *newsdvp;
533 	rnode4_t *tdrp = VTOR4(tdvp);
534 
535 	/*
536 	 * We know that sdvp is a stub, otherwise we would not be here.
537 	 *
538 	 * If tdvp is also be a stub, there are two possibilities: it
539 	 * is either the same stub as sdvp [i.e. VN_CMP(sdvp, tdvp)]
540 	 * or it is a different stub [!VN_CMP(sdvp, tdvp)].
541 	 *
542 	 * In the former case, just trigger sdvp, and treat tdvp as
543 	 * though it were not a stub.
544 	 *
545 	 * In the latter case, it might be a different stub for the
546 	 * same server fs as sdvp, or for a different server fs.
547 	 * Regardless, from the client perspective this would still
548 	 * be a cross-filesystem rename, and should not be allowed,
549 	 * so return EXDEV, without triggering either mount.
550 	 */
551 	if (RP_ISSTUB(tdrp) && !VN_CMP(sdvp, tdvp))
552 		return (EXDEV);
553 
554 	error = nfs4_trigger_mount(sdvp, &newsdvp);
555 	if (error)
556 		return (error);
557 
558 	error = VOP_RENAME(newsdvp, snm, tdvp, tnm, cr, ct, flags);
559 
560 	VN_RELE(newsdvp);
561 
562 	return (error);
563 }
564 
565 /* ARGSUSED */
566 static int
567 nfs4_trigger_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
568     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
569 {
570 	int error;
571 	vnode_t *newdvp;
572 
573 	error = nfs4_trigger_mount(dvp, &newdvp);
574 	if (error)
575 		return (error);
576 
577 	error = VOP_MKDIR(newdvp, nm, va, vpp, cr, ct, flags, vsecp);
578 	VN_RELE(newdvp);
579 
580 	return (error);
581 }
582 
583 static int
584 nfs4_trigger_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
585     caller_context_t *ct, int flags)
586 {
587 	int error;
588 	vnode_t *newdvp;
589 
590 	error = nfs4_trigger_mount(dvp, &newdvp);
591 	if (error)
592 		return (error);
593 
594 	error = VOP_RMDIR(newdvp, nm, cdir, cr, ct, flags);
595 	VN_RELE(newdvp);
596 
597 	return (error);
598 }
599 
600 static int
601 nfs4_trigger_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm,
602     cred_t *cr, caller_context_t *ct, int flags)
603 {
604 	int error;
605 	vnode_t *newdvp;
606 
607 	error = nfs4_trigger_mount(dvp, &newdvp);
608 	if (error)
609 		return (error);
610 
611 	error = VOP_SYMLINK(newdvp, lnm, tva, tnm, cr, ct, flags);
612 	VN_RELE(newdvp);
613 
614 	return (error);
615 }
616 
617 static int
618 nfs4_trigger_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr,
619     caller_context_t *ct)
620 {
621 	int error;
622 	vnode_t *newvp;
623 
624 	error = nfs4_trigger_mount(vp, &newvp);
625 	if (error)
626 		return (error);
627 
628 	error = VOP_READLINK(newvp, uiop, cr, ct);
629 	VN_RELE(newvp);
630 
631 	return (error);
632 }
633 
634 /* end of trigger vnode ops */
635 
636 
637 /*
638  * Mount upon a trigger vnode; for mirror-mounts, etc.
639  *
640  * The mount may have already occurred, via another thread. If not,
641  * assemble the location information - which may require fetching - and
642  * perform the mount.
643  *
644  * Sets newvp to be the root of the fs that is now covering vp. Note
645  * that we return with VN_HELD(*newvp).
646  *
647  * The caller is responsible for passing the VOP onto the covering fs.
648  */
649 static int
650 nfs4_trigger_mount(vnode_t *vp, vnode_t **newvpp)
651 {
652 	int			 error;
653 	vfs_t			*vfsp;
654 	rnode4_t		*rp = VTOR4(vp);
655 	mntinfo4_t		*mi = VTOMI4(vp);
656 	domount_args_t		*dma;
657 
658 	nfs4_ephemeral_tree_t	*net;
659 
660 	bool_t			must_unlock = FALSE;
661 	bool_t			is_building = FALSE;
662 
663 	cred_t			*zcred;
664 
665 	nfs4_trigger_globals_t	*ntg;
666 
667 	zone_t			*zone = curproc->p_zone;
668 
669 	ASSERT(RP_ISSTUB(rp));
670 
671 	/* for now, we only support mirror-mounts */
672 	ASSERT(RP_ISSTUB_MIRRORMOUNT(rp));
673 
674 	*newvpp = NULL;
675 
676 	/*
677 	 * Has the mount already occurred?
678 	 */
679 	error = vn_vfsrlock_wait(vp);
680 	if (error)
681 		goto done;
682 	vfsp = vn_mountedvfs(vp);
683 	if (vfsp != NULL) {
684 		/* the mount has already occurred */
685 		error = VFS_ROOT(vfsp, newvpp);
686 		if (!error) {
687 			/* need to update the reference time  */
688 			mutex_enter(&mi->mi_lock);
689 			if (mi->mi_ephemeral)
690 				mi->mi_ephemeral->ne_ref_time =
691 				    gethrestime_sec();
692 			mutex_exit(&mi->mi_lock);
693 		}
694 
695 		vn_vfsunlock(vp);
696 		goto done;
697 	}
698 	vn_vfsunlock(vp);
699 
700 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
701 	ASSERT(ntg != NULL);
702 
703 	mutex_enter(&mi->mi_lock);
704 
705 	/*
706 	 * We need to lock down the ephemeral tree.
707 	 */
708 	if (mi->mi_ephemeral_tree == NULL) {
709 		net = kmem_zalloc(sizeof (*net), KM_SLEEP);
710 		mutex_init(&net->net_tree_lock, NULL, MUTEX_DEFAULT, NULL);
711 		mutex_init(&net->net_cnt_lock, NULL, MUTEX_DEFAULT, NULL);
712 		net->net_refcnt = 1;
713 		net->net_status = NFS4_EPHEMERAL_TREE_BUILDING;
714 		is_building = TRUE;
715 
716 		/*
717 		 * We need to add it to the zone specific list for
718 		 * automatic unmounting and harvesting of deadwood.
719 		 */
720 		mutex_enter(&ntg->ntg_forest_lock);
721 		if (ntg->ntg_forest != NULL)
722 			net->net_next = ntg->ntg_forest;
723 		ntg->ntg_forest = net;
724 		mutex_exit(&ntg->ntg_forest_lock);
725 
726 		/*
727 		 * No lock order confusion with mi_lock because no
728 		 * other node could have grabbed net_tree_lock.
729 		 */
730 		mutex_enter(&net->net_tree_lock);
731 		mi->mi_ephemeral_tree = net;
732 		net->net_mount = mi;
733 		mutex_exit(&mi->mi_lock);
734 	} else {
735 		net = mi->mi_ephemeral_tree;
736 		nfs4_ephemeral_tree_hold(net);
737 
738 		mutex_exit(&mi->mi_lock);
739 
740 		mutex_enter(&net->net_tree_lock);
741 
742 		/*
743 		 * We can only procede if the tree is neither locked
744 		 * nor being torn down.
745 		 */
746 		mutex_enter(&net->net_cnt_lock);
747 		if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) {
748 			nfs4_ephemeral_tree_decr(net);
749 			mutex_exit(&net->net_cnt_lock);
750 			mutex_exit(&net->net_tree_lock);
751 
752 			return (EIO);
753 		}
754 		mutex_exit(&net->net_cnt_lock);
755 	}
756 
757 	mutex_enter(&net->net_cnt_lock);
758 	net->net_status |= NFS4_EPHEMERAL_TREE_MOUNTING;
759 	mutex_exit(&net->net_cnt_lock);
760 
761 	must_unlock = TRUE;
762 
763 	dma = nfs4_trigger_domount_args_create(vp);
764 	if (dma == NULL) {
765 		error = EINVAL;
766 		goto done;
767 	}
768 
769 	/*
770 	 * Need to be root for this call to make mount work.
771 	 * Note that since we define mirror mounts to work
772 	 * for any user, we allow the mount to proceed. And
773 	 * we realize that the server will perform security
774 	 * checks to make sure that the client is allowed
775 	 * access. Finally, once the mount takes place,
776 	 * directory permissions will ensure that the
777 	 * content is secure.
778 	 */
779 	zcred = zone_get_kcred(getzoneid());
780 	ASSERT(zcred != NULL);
781 
782 	error = nfs4_trigger_domount(vp, dma, &vfsp, zcred);
783 	nfs4_trigger_domount_args_destroy(dma, vp);
784 
785 	crfree(zcred);
786 
787 	if (!error)
788 		error = VFS_ROOT(vfsp, newvpp);
789 done:
790 	if (must_unlock) {
791 		mutex_enter(&net->net_cnt_lock);
792 		net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING;
793 		if (is_building)
794 			net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING;
795 		nfs4_ephemeral_tree_decr(net);
796 		mutex_exit(&net->net_cnt_lock);
797 
798 		mutex_exit(&net->net_tree_lock);
799 	}
800 
801 	if (!error && (newvpp == NULL || *newvpp == NULL))
802 		error = ENOSYS;
803 
804 	return (error);
805 }
806 
807 /*
808  * Collect together both the generic & mount-type specific args.
809  */
810 static domount_args_t *
811 nfs4_trigger_domount_args_create(vnode_t *vp)
812 {
813 	int nointr;
814 	char *hostlist;
815 	servinfo4_t *svp;
816 	struct nfs_args *nargs, *nargs_head;
817 	enum clnt_stat status;
818 	ephemeral_servinfo_t *esi, *esi_first;
819 	domount_args_t *dma;
820 	mntinfo4_t *mi = VTOMI4(vp);
821 
822 	nointr = !(mi->mi_flags & MI4_INT);
823 	hostlist = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
824 
825 	svp = mi->mi_curr_serv;
826 	/* check if the current server is responding */
827 	status = nfs4_trigger_ping_server(svp, nointr);
828 	if (status == RPC_SUCCESS) {
829 		esi_first = nfs4_trigger_esi_create(vp, svp);
830 		if (esi_first == NULL) {
831 			kmem_free(hostlist, MAXPATHLEN);
832 			return (NULL);
833 		}
834 
835 		(void) strlcpy(hostlist, esi_first->esi_hostname, MAXPATHLEN);
836 
837 		nargs_head = nfs4_trigger_nargs_create(mi, svp, esi_first);
838 	} else {
839 		/* current server did not respond */
840 		esi_first = NULL;
841 		nargs_head = NULL;
842 	}
843 	nargs = nargs_head;
844 
845 	/*
846 	 * NFS RO failover.
847 	 *
848 	 * If we have multiple servinfo4 structures, linked via sv_next,
849 	 * we must create one nfs_args for each, linking the nfs_args via
850 	 * nfs_ext_u.nfs_extB.next.
851 	 *
852 	 * We need to build a corresponding esi for each, too, but that is
853 	 * used solely for building nfs_args, and may be immediately
854 	 * discarded, as domount() requires the info from just one esi,
855 	 * but all the nfs_args.
856 	 *
857 	 * Currently, the NFS mount code will hang if not all servers
858 	 * requested are available. To avoid that, we need to ping each
859 	 * server, here, and remove it from the list if it is not
860 	 * responding. This has the side-effect of that server then
861 	 * being permanently unavailable for this failover mount, even if
862 	 * it recovers. That's unfortunate, but the best we can do until
863 	 * the mount code path is fixed.
864 	 */
865 
866 	/*
867 	 * If the current server was down, loop indefinitely until we find
868 	 * at least one responsive server.
869 	 */
870 	do {
871 		/* no locking needed for sv_next; it is only set at fs mount */
872 		for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
873 			struct nfs_args *next;
874 
875 			/*
876 			 * nargs_head: the head of the nfs_args list
877 			 * nargs: the current tail of the list
878 			 * next: the newly-created element to be added
879 			 */
880 
881 			/*
882 			 * We've already tried the current server, above;
883 			 * if it was responding, we have already included it
884 			 * and it may now be ignored.
885 			 *
886 			 * Otherwise, try it again, since it may now have
887 			 * recovered.
888 			 */
889 			if (svp == mi->mi_curr_serv && esi_first != NULL)
890 				continue;
891 
892 			(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
893 			if (svp->sv_flags & SV4_NOTINUSE) {
894 				nfs_rw_exit(&svp->sv_lock);
895 				continue;
896 			}
897 			nfs_rw_exit(&svp->sv_lock);
898 
899 			/* check if the server is responding */
900 			status = nfs4_trigger_ping_server(svp, nointr);
901 			/* if the server did not respond, ignore it */
902 			if (status != RPC_SUCCESS)
903 				continue;
904 
905 			esi = nfs4_trigger_esi_create(vp, svp);
906 			if (esi == NULL)
907 				continue;
908 
909 			/*
910 			 * If the original current server (mi_curr_serv)
911 			 * was down when when we first tried it,
912 			 * (i.e. esi_first == NULL),
913 			 * we select this new server (svp) to be the server
914 			 * that we will actually contact (esi_first).
915 			 *
916 			 * Note that it's possible that mi_curr_serv == svp,
917 			 * if that mi_curr_serv was down but has now recovered.
918 			 */
919 			next = nfs4_trigger_nargs_create(mi, svp, esi);
920 			if (esi_first == NULL) {
921 				ASSERT(nargs == NULL);
922 				ASSERT(nargs_head == NULL);
923 				nargs_head = next;
924 				esi_first = esi;
925 				(void) strlcpy(hostlist,
926 				    esi_first->esi_hostname, MAXPATHLEN);
927 			} else {
928 				ASSERT(nargs_head != NULL);
929 				nargs->nfs_ext_u.nfs_extB.next = next;
930 				(void) strlcat(hostlist, ",", MAXPATHLEN);
931 				(void) strlcat(hostlist, esi->esi_hostname,
932 				    MAXPATHLEN);
933 				/* esi was only needed for hostname & nargs */
934 				nfs4_trigger_esi_destroy(esi, vp);
935 			}
936 
937 			nargs = next;
938 		}
939 
940 		/* if we've had no response at all, wait a second */
941 		if (esi_first == NULL)
942 			delay(drv_usectohz(1000000));
943 
944 	} while (esi_first == NULL);
945 	ASSERT(nargs_head != NULL);
946 
947 	dma = kmem_zalloc(sizeof (domount_args_t), KM_SLEEP);
948 	dma->dma_esi = esi_first;
949 	dma->dma_hostlist = hostlist;
950 	dma->dma_nargs = nargs_head;
951 
952 	return (dma);
953 }
954 
955 static void
956 nfs4_trigger_domount_args_destroy(domount_args_t *dma, vnode_t *vp)
957 {
958 	if (dma != NULL) {
959 		if (dma->dma_esi != NULL && vp != NULL)
960 			nfs4_trigger_esi_destroy(dma->dma_esi, vp);
961 
962 		if (dma->dma_hostlist != NULL)
963 			kmem_free(dma->dma_hostlist, MAXPATHLEN);
964 
965 		if (dma->dma_nargs != NULL) {
966 			struct nfs_args *nargs = dma->dma_nargs;
967 
968 			do {
969 				struct nfs_args *next =
970 				    nargs->nfs_ext_u.nfs_extB.next;
971 
972 				nfs4_trigger_nargs_destroy(nargs);
973 				nargs = next;
974 			} while (nargs != NULL);
975 		}
976 
977 		kmem_free(dma, sizeof (domount_args_t));
978 	}
979 }
980 
981 /*
982  * The ephemeral_servinfo_t struct contains basic information we will need to
983  * perform the mount. Whilst the structure is generic across different
984  * types of ephemeral mount, the way we gather its contents differs.
985  */
986 static ephemeral_servinfo_t *
987 nfs4_trigger_esi_create(vnode_t *vp, servinfo4_t *svp)
988 {
989 	ephemeral_servinfo_t *esi;
990 	rnode4_t *rp = VTOR4(vp);
991 
992 	ASSERT(RP_ISSTUB(rp));
993 
994 	/* Call the ephemeral type-specific routine */
995 	if (RP_ISSTUB_MIRRORMOUNT(rp))
996 		esi = nfs4_trigger_esi_create_mirrormount(vp, svp);
997 	else
998 		esi = NULL;
999 
1000 	/* for now, we only support mirror-mounts */
1001 	ASSERT(esi != NULL);
1002 
1003 	return (esi);
1004 }
1005 
1006 static void
1007 nfs4_trigger_esi_destroy(ephemeral_servinfo_t *esi, vnode_t *vp)
1008 {
1009 	rnode4_t *rp = VTOR4(vp);
1010 
1011 	ASSERT(RP_ISSTUB(rp));
1012 
1013 	/* for now, we only support mirror-mounts */
1014 	ASSERT(RP_ISSTUB_MIRRORMOUNT(rp));
1015 
1016 	/* Currently, no need for an ephemeral type-specific routine */
1017 
1018 	/*
1019 	 * The contents of ephemeral_servinfo_t goes into nfs_args,
1020 	 * and will be handled by nfs4_trigger_nargs_destroy().
1021 	 * We need only free the structure itself.
1022 	 */
1023 	if (esi != NULL)
1024 		kmem_free(esi, sizeof (ephemeral_servinfo_t));
1025 }
1026 
1027 /*
1028  * Some of this may turn out to be common with other ephemeral types,
1029  * in which case it should be moved to nfs4_trigger_esi_create(), or a
1030  * common function called.
1031  */
1032 static ephemeral_servinfo_t *
1033 nfs4_trigger_esi_create_mirrormount(vnode_t *vp, servinfo4_t *svp)
1034 {
1035 	char			*stubpath;
1036 	struct knetconfig	*sikncp, *svkncp;
1037 	struct netbuf		*bufp;
1038 	ephemeral_servinfo_t	*esi;
1039 
1040 	esi = kmem_zalloc(sizeof (ephemeral_servinfo_t), KM_SLEEP);
1041 
1042 	/* initially set to be our type of ephemeral mount; may be added to */
1043 	esi->esi_mount_flags = NFSMNT_MIRRORMOUNT;
1044 
1045 	/*
1046 	 * We're copying info from the stub rnode's servinfo4, but
1047 	 * we must create new copies, not pointers, since this information
1048 	 * is to be associated with the new mount, which will be
1049 	 * unmounted (and its structures freed) separately
1050 	 */
1051 
1052 	/*
1053 	 * Sizes passed to kmem_[z]alloc here must match those freed
1054 	 * in nfs4_free_args()
1055 	 */
1056 
1057 	/*
1058 	 * We hold sv_lock across kmem_zalloc() calls that may sleep, but this
1059 	 * is difficult to avoid: as we need to read svp to calculate the
1060 	 * sizes to be allocated.
1061 	 */
1062 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1063 
1064 	esi->esi_hostname = kmem_zalloc(strlen(svp->sv_hostname) + 1, KM_SLEEP);
1065 	(void) strcat(esi->esi_hostname, svp->sv_hostname);
1066 
1067 	esi->esi_addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1068 	bufp = esi->esi_addr;
1069 	bufp->len = svp->sv_addr.len;
1070 	bufp->maxlen = svp->sv_addr.maxlen;
1071 	bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1072 	bcopy(svp->sv_addr.buf, bufp->buf, bufp->len);
1073 
1074 	esi->esi_knconf = kmem_zalloc(sizeof (*esi->esi_knconf), KM_SLEEP);
1075 	sikncp = esi->esi_knconf;
1076 	svkncp = svp->sv_knconf;
1077 	sikncp->knc_semantics = svkncp->knc_semantics;
1078 	sikncp->knc_protofmly = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1079 	(void) strcat((char *)sikncp->knc_protofmly,
1080 	    (char *)svkncp->knc_protofmly);
1081 	sikncp->knc_proto = (caddr_t)kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1082 	(void) strcat((char *)sikncp->knc_proto, (char *)svkncp->knc_proto);
1083 	sikncp->knc_rdev = svkncp->knc_rdev;
1084 
1085 	/*
1086 	 * Used when AUTH_DH is negotiated.
1087 	 *
1088 	 * This is ephemeral mount-type specific, since it contains the
1089 	 * server's time-sync syncaddr.
1090 	 */
1091 	if (svp->sv_dhsec) {
1092 		struct netbuf *bufp;
1093 		sec_data_t *sdata;
1094 		dh_k4_clntdata_t *data;
1095 
1096 		sdata = svp->sv_dhsec;
1097 		data = (dh_k4_clntdata_t *)sdata->data;
1098 		ASSERT(sdata->rpcflavor == AUTH_DH);
1099 
1100 		bufp = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
1101 		bufp->len = data->syncaddr.len;
1102 		bufp->maxlen = data->syncaddr.maxlen;
1103 		bufp->buf = kmem_zalloc(bufp->len, KM_SLEEP);
1104 		bcopy(data->syncaddr.buf, bufp->buf, bufp->len);
1105 		esi->esi_syncaddr = bufp;
1106 
1107 		if (data->netname != NULL) {
1108 			int nmlen = data->netnamelen;
1109 
1110 			/*
1111 			 * We need to copy from a dh_k4_clntdata_t
1112 			 * netname/netnamelen pair to a NUL-terminated
1113 			 * netname string suitable for putting in nfs_args,
1114 			 * where the latter has no netnamelen field.
1115 			 */
1116 			esi->esi_netname = kmem_zalloc(nmlen + 1, KM_SLEEP);
1117 			bcopy(data->netname, esi->esi_netname, nmlen);
1118 		}
1119 	} else {
1120 		esi->esi_syncaddr = NULL;
1121 		esi->esi_netname = NULL;
1122 	}
1123 
1124 	stubpath = fn_path(VTOSV(vp)->sv_name);
1125 	/* step over initial '.', to avoid e.g. sv_path: "/tank./ws" */
1126 	ASSERT(*stubpath == '.');
1127 	stubpath += 1;
1128 
1129 	/* for nfs_args->fh */
1130 	esi->esi_path_len = strlen(svp->sv_path) + strlen(stubpath) + 1;
1131 	esi->esi_path = kmem_zalloc(esi->esi_path_len, KM_SLEEP);
1132 	(void) strcat(esi->esi_path, svp->sv_path);
1133 	(void) strcat(esi->esi_path, stubpath);
1134 
1135 	stubpath -= 1;
1136 	/* stubpath allocated by fn_path() */
1137 	kmem_free(stubpath, strlen(stubpath) + 1);
1138 
1139 	nfs_rw_exit(&svp->sv_lock);
1140 
1141 	return (esi);
1142 }
1143 
1144 /*
1145  * Assemble the args, and call the generic VFS mount function to
1146  * finally perform the ephemeral mount.
1147  */
1148 static int
1149 nfs4_trigger_domount(vnode_t *stubvp, domount_args_t *dma, vfs_t **vfsp,
1150     cred_t *cr)
1151 {
1152 	struct mounta	*uap;
1153 	char		*mntpt, *orig_path, *path;
1154 	const char	*orig_mntpt;
1155 	int		retval;
1156 	int		mntpt_len;
1157 	int		spec_len;
1158 	zone_t		*zone = curproc->p_zone;
1159 	bool_t		has_leading_slash;
1160 
1161 	vfs_t			*stubvfsp = stubvp->v_vfsp;
1162 	ephemeral_servinfo_t	*esi = dma->dma_esi;
1163 	struct nfs_args		*nargs = dma->dma_nargs;
1164 
1165 	/* first, construct the mount point for the ephemeral mount */
1166 	orig_path = path = fn_path(VTOSV(stubvp)->sv_name);
1167 	orig_mntpt = (char *)refstr_value(stubvfsp->vfs_mntpt);
1168 
1169 	if (*orig_path == '.')
1170 		orig_path++;
1171 
1172 	/*
1173 	 * Get rid of zone's root path
1174 	 */
1175 	if (zone != global_zone) {
1176 		/*
1177 		 * -1 for trailing '/' and -1 for EOS.
1178 		 */
1179 		if (strncmp(zone->zone_rootpath, orig_mntpt,
1180 		    zone->zone_rootpathlen - 1) == 0) {
1181 			orig_mntpt += (zone->zone_rootpathlen - 2);
1182 		}
1183 	}
1184 
1185 	mntpt_len = strlen(orig_mntpt) + strlen(orig_path);
1186 	mntpt = kmem_zalloc(mntpt_len + 1, KM_SLEEP);
1187 	(void) strcat(mntpt, orig_mntpt);
1188 	(void) strcat(mntpt, orig_path);
1189 
1190 	kmem_free(path, strlen(path) + 1);
1191 	path = esi->esi_path;
1192 	if (*path == '.')
1193 		path++;
1194 	if (path[0] == '/' && path[1] == '/')
1195 		path++;
1196 	has_leading_slash = (*path == '/');
1197 
1198 	spec_len = strlen(dma->dma_hostlist);
1199 	spec_len += strlen(path);
1200 
1201 	/* We are going to have to add this in */
1202 	if (!has_leading_slash)
1203 		spec_len++;
1204 
1205 	/* We need to get the ':' for dma_hostlist:esi_path */
1206 	spec_len++;
1207 
1208 	uap = kmem_zalloc(sizeof (struct mounta), KM_SLEEP);
1209 	uap->spec = kmem_zalloc(spec_len + 1, KM_SLEEP);
1210 	(void) snprintf(uap->spec, spec_len + 1, "%s:%s%s", dma->dma_hostlist,
1211 	    has_leading_slash ? "" : "/", path);
1212 
1213 	uap->dir = mntpt;
1214 
1215 	uap->flags = MS_SYSSPACE | MS_DATA;
1216 	/* fstype-independent mount options not covered elsewhere */
1217 	/* copy parent's mount(1M) "-m" flag */
1218 	if (stubvfsp->vfs_flag & VFS_NOMNTTAB)
1219 		uap->flags |= MS_NOMNTTAB;
1220 
1221 	uap->fstype = MNTTYPE_NFS4;
1222 	uap->dataptr = (char *)nargs;
1223 	/* not needed for MS_SYSSPACE */
1224 	uap->datalen = 0;
1225 
1226 	/* use optptr to pass in extra mount options */
1227 	uap->flags |= MS_OPTIONSTR;
1228 	uap->optptr = nfs4_trigger_create_mntopts(stubvfsp);
1229 	if (uap->optptr == NULL) {
1230 		retval = EINVAL;
1231 		goto done;
1232 	}
1233 	/* domount() expects us to count the trailing NUL */
1234 	uap->optlen = strlen(uap->optptr) + 1;
1235 
1236 	retval = domount(NULL, uap, stubvp, cr, vfsp);
1237 	if (retval == 0)
1238 		VFS_RELE(*vfsp);
1239 done:
1240 	if (uap->optptr)
1241 		nfs4_trigger_destroy_mntopts(uap->optptr);
1242 
1243 	kmem_free(uap->spec, spec_len + 1);
1244 	kmem_free(uap, sizeof (struct mounta));
1245 	kmem_free(mntpt, mntpt_len + 1);
1246 
1247 	return (retval);
1248 }
1249 
1250 /*
1251  * Build an nfs_args structure for passing to domount().
1252  *
1253  * Ephemeral mount-type specific data comes from the ephemeral_servinfo_t;
1254  * generic data - common to all ephemeral mount types - is read directly
1255  * from the parent mount's servinfo4_t and mntinfo4_t, via the stub vnode.
1256  */
1257 static struct nfs_args *
1258 nfs4_trigger_nargs_create(mntinfo4_t *mi, servinfo4_t *svp,
1259     ephemeral_servinfo_t *esi)
1260 {
1261 	sec_data_t *secdata;
1262 	struct nfs_args *nargs;
1263 
1264 	/* setup the nfs args */
1265 	nargs = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
1266 
1267 	(void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1268 
1269 	nargs->addr = esi->esi_addr;
1270 
1271 	/* for AUTH_DH by negotiation */
1272 	if (esi->esi_syncaddr || esi->esi_netname) {
1273 		nargs->flags |= NFSMNT_SECURE;
1274 		nargs->syncaddr = esi->esi_syncaddr;
1275 		nargs->netname = esi->esi_netname;
1276 	}
1277 
1278 	nargs->flags |= NFSMNT_KNCONF;
1279 	nargs->knconf = esi->esi_knconf;
1280 	nargs->flags |= NFSMNT_HOSTNAME;
1281 	nargs->hostname = esi->esi_hostname;
1282 	nargs->fh = esi->esi_path;
1283 
1284 	/* general mount settings, all copied from parent mount */
1285 	mutex_enter(&mi->mi_lock);
1286 
1287 	if (!(mi->mi_flags & MI4_HARD))
1288 		nargs->flags |= NFSMNT_SOFT;
1289 
1290 	nargs->flags |= NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_TIMEO |
1291 	    NFSMNT_RETRANS;
1292 	nargs->wsize = mi->mi_stsize;
1293 	nargs->rsize = mi->mi_tsize;
1294 	nargs->timeo = mi->mi_timeo;
1295 	nargs->retrans = mi->mi_retrans;
1296 
1297 	if (mi->mi_flags & MI4_INT)
1298 		nargs->flags |= NFSMNT_INT;
1299 	if (mi->mi_flags & MI4_NOAC)
1300 		nargs->flags |= NFSMNT_NOAC;
1301 
1302 	nargs->flags |= NFSMNT_ACREGMIN | NFSMNT_ACREGMAX | NFSMNT_ACDIRMIN |
1303 	    NFSMNT_ACDIRMAX;
1304 	nargs->acregmin = HR2SEC(mi->mi_acregmin);
1305 	nargs->acregmax = HR2SEC(mi->mi_acregmax);
1306 	nargs->acdirmin = HR2SEC(mi->mi_acdirmin);
1307 	nargs->acdirmax = HR2SEC(mi->mi_acdirmax);
1308 
1309 	if (mi->mi_flags & MI4_NOCTO)
1310 		nargs->flags |= NFSMNT_NOCTO;
1311 	if (mi->mi_flags & MI4_GRPID)
1312 		nargs->flags |= NFSMNT_GRPID;
1313 	if (mi->mi_flags & MI4_LLOCK)
1314 		nargs->flags |= NFSMNT_LLOCK;
1315 	if (mi->mi_flags & MI4_NOPRINT)
1316 		nargs->flags |= NFSMNT_NOPRINT;
1317 	if (mi->mi_flags & MI4_DIRECTIO)
1318 		nargs->flags |= NFSMNT_DIRECTIO;
1319 	if (mi->mi_flags & MI4_PUBLIC)
1320 		nargs->flags |= NFSMNT_PUBLIC;
1321 
1322 	mutex_exit(&mi->mi_lock);
1323 
1324 	/* add any specific flags for this type of ephemeral mount */
1325 	nargs->flags |= esi->esi_mount_flags;
1326 
1327 	/*
1328 	 * Security data & negotiation policy.
1329 	 *
1330 	 * We need to preserve the parent mount's preference for security
1331 	 * negotiation, translating SV4_TRYSECDEFAULT -> NFSMNT_SECDEFAULT.
1332 	 *
1333 	 * If SV4_TRYSECDEFAULT is not set, that indicates that a specific
1334 	 * security flavour was requested, with data in sv_secdata, and that
1335 	 * no negotiation should occur. If this specified flavour fails, that's
1336 	 * it. We will copy sv_secdata, and not set NFSMNT_SECDEFAULT.
1337 	 *
1338 	 * If SV4_TRYSECDEFAULT is set, then we start with a passed-in
1339 	 * default flavour, in sv_secdata, but then negotiate a new flavour.
1340 	 * Possible flavours are recorded in an array in sv_secinfo, with
1341 	 * currently in-use flavour pointed to by sv_currsec.
1342 	 *
1343 	 * If sv_currsec is set, i.e. if negotiation has already occurred,
1344 	 * we will copy sv_currsec. Otherwise, copy sv_secdata. Regardless,
1345 	 * we will set NFSMNT_SECDEFAULT, to enable negotiation.
1346 	 */
1347 	if (svp->sv_flags & SV4_TRYSECDEFAULT) {
1348 		/* enable negotiation for ephemeral mount */
1349 		nargs->flags |= NFSMNT_SECDEFAULT;
1350 
1351 		/*
1352 		 * As a starting point for negotiation, copy parent
1353 		 * mount's negotiated flavour (sv_currsec) if available,
1354 		 * or its passed-in flavour (sv_secdata) if not.
1355 		 */
1356 		if (svp->sv_currsec != NULL)
1357 			secdata = copy_sec_data(svp->sv_currsec);
1358 		else if (svp->sv_secdata != NULL)
1359 			secdata = copy_sec_data(svp->sv_secdata);
1360 		else
1361 			secdata = NULL;
1362 	} else {
1363 		/* do not enable negotiation; copy parent's passed-in flavour */
1364 		if (svp->sv_secdata != NULL)
1365 			secdata = copy_sec_data(svp->sv_secdata);
1366 		else
1367 			secdata = NULL;
1368 	}
1369 
1370 	nfs_rw_exit(&svp->sv_lock);
1371 
1372 	nargs->flags |= NFSMNT_NEWARGS;
1373 	nargs->nfs_args_ext = NFS_ARGS_EXTB;
1374 	nargs->nfs_ext_u.nfs_extB.secdata = secdata;
1375 
1376 	/* for NFS RO failover; caller will set if necessary */
1377 	nargs->nfs_ext_u.nfs_extB.next = NULL;
1378 
1379 	return (nargs);
1380 }
1381 
1382 static void
1383 nfs4_trigger_nargs_destroy(struct nfs_args *nargs)
1384 {
1385 	/*
1386 	 * Either the mount failed, in which case the data is not needed, or
1387 	 * nfs4_mount() has either taken copies of what it needs or,
1388 	 * where it has merely copied the ptr, it has set *our* ptr to NULL,
1389 	 * whereby nfs4_free_args() will ignore it.
1390 	 */
1391 	nfs4_free_args(nargs);
1392 	kmem_free(nargs, sizeof (struct nfs_args));
1393 }
1394 
1395 /*
1396  * When we finally get into the mounting, we need to add this
1397  * node to the ephemeral tree.
1398  *
1399  * This is called from nfs4_mount().
1400  */
1401 int
1402 nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
1403 {
1404 	mntinfo4_t		*mi_parent;
1405 	nfs4_ephemeral_t	*eph;
1406 	nfs4_ephemeral_tree_t	*net;
1407 
1408 	nfs4_ephemeral_t	*prior;
1409 	nfs4_ephemeral_t	*child;
1410 
1411 	nfs4_ephemeral_t	*peer;
1412 
1413 	nfs4_trigger_globals_t	*ntg;
1414 	zone_t			*zone = curproc->p_zone;
1415 
1416 	int			rc = 0;
1417 
1418 	mi_parent = VTOMI4(mvp);
1419 
1420 	/*
1421 	 * Get this before grabbing anything else!
1422 	 */
1423 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
1424 	if (!ntg->ntg_thread_started) {
1425 		nfs4_ephemeral_start_harvester(ntg);
1426 	}
1427 
1428 	mutex_enter(&mi_parent->mi_lock);
1429 	mutex_enter(&mi->mi_lock);
1430 
1431 	net = mi->mi_ephemeral_tree =
1432 	    mi_parent->mi_ephemeral_tree;
1433 
1434 	/*
1435 	 * If the mi_ephemeral_tree is NULL, then it
1436 	 * means that either the harvester or a manual
1437 	 * umount has cleared the tree out right before
1438 	 * we got here.
1439 	 *
1440 	 * There is nothing we can do here, so return
1441 	 * to the caller and let them decide whether they
1442 	 * try again.
1443 	 */
1444 	if (net == NULL) {
1445 		mutex_exit(&mi->mi_lock);
1446 		mutex_exit(&mi_parent->mi_lock);
1447 
1448 		return (EBUSY);
1449 	}
1450 
1451 	nfs4_ephemeral_tree_hold(net);
1452 
1453 	/*
1454 	 * We need to tack together the ephemeral mount
1455 	 * with this new mntinfo.
1456 	 */
1457 	eph = kmem_zalloc(sizeof (*eph), KM_SLEEP);
1458 	eph->ne_mount = mi;
1459 	eph->ne_ref_time = gethrestime_sec();
1460 
1461 	/*
1462 	 * We need to tell the ephemeral mount when
1463 	 * to time out.
1464 	 */
1465 	eph->ne_mount_to = ntg->ntg_mount_to;
1466 
1467 	mi->mi_flags |= MI4_EPHEMERAL;
1468 	mi->mi_ephemeral = eph;
1469 
1470 	/*
1471 	 * If the enclosing mntinfo4 is also ephemeral,
1472 	 * then we need to point to its enclosing parent.
1473 	 * Else the enclosing mntinfo4 is the enclosing parent.
1474 	 *
1475 	 * We also need to weave this ephemeral node
1476 	 * into the tree.
1477 	 */
1478 	if (mi_parent->mi_flags & MI4_EPHEMERAL) {
1479 		/*
1480 		 * We need to decide if we are
1481 		 * the root node of this branch
1482 		 * or if we are a sibling of this
1483 		 * branch.
1484 		 */
1485 		prior = mi_parent->mi_ephemeral;
1486 		if (prior == NULL) {
1487 			/*
1488 			 * Race condition, clean up, and
1489 			 * let caller handle mntinfo.
1490 			 */
1491 			mi->mi_flags &= ~MI4_EPHEMERAL;
1492 			mi->mi_ephemeral = NULL;
1493 			kmem_free(eph, sizeof (*eph));
1494 			rc = EBUSY;
1495 		} else {
1496 			if (prior->ne_child == NULL) {
1497 				prior->ne_child = eph;
1498 			} else {
1499 				child = prior->ne_child;
1500 
1501 				prior->ne_child = eph;
1502 				eph->ne_peer = child;
1503 
1504 				child->ne_prior = eph;
1505 			}
1506 
1507 			eph->ne_prior = prior;
1508 		}
1509 	} else {
1510 		/*
1511 		 * The parent mntinfo4 is the non-ephemeral
1512 		 * root of the ephemeral tree. We
1513 		 * need to decide if we are the root
1514 		 * node of that tree or if we are a
1515 		 * sibling of the root node.
1516 		 *
1517 		 * We are the root if there is no
1518 		 * other node.
1519 		 */
1520 		if (net->net_root == NULL) {
1521 			net->net_root = eph;
1522 		} else {
1523 			eph->ne_peer = peer = net->net_root;
1524 			ASSERT(peer != NULL);
1525 			net->net_root = eph;
1526 
1527 			peer->ne_prior = eph;
1528 		}
1529 
1530 		eph->ne_prior = NULL;
1531 	}
1532 
1533 	nfs4_ephemeral_tree_rele(net);
1534 
1535 	mutex_exit(&mi->mi_lock);
1536 	mutex_exit(&mi_parent->mi_lock);
1537 
1538 	return (rc);
1539 }
1540 
1541 /*
1542  * Commit the changes to the ephemeral tree for removing this node.
1543  */
1544 static void
1545 nfs4_ephemeral_umount_cleanup(nfs4_ephemeral_t *eph)
1546 {
1547 	nfs4_ephemeral_t	*e = eph;
1548 	nfs4_ephemeral_t	*peer;
1549 	nfs4_ephemeral_t	*prior;
1550 
1551 	peer = eph->ne_peer;
1552 	prior = e->ne_prior;
1553 
1554 	/*
1555 	 * If this branch root was not the
1556 	 * tree root, then we need to fix back pointers.
1557 	 */
1558 	if (prior) {
1559 		if (prior->ne_child == e) {
1560 			prior->ne_child = peer;
1561 		} else {
1562 			prior->ne_peer = peer;
1563 		}
1564 
1565 		if (peer)
1566 			peer->ne_prior = prior;
1567 	} else if (peer) {
1568 		peer->ne_mount->mi_ephemeral_tree->net_root = peer;
1569 		peer->ne_prior = NULL;
1570 	} else {
1571 		e->ne_mount->mi_ephemeral_tree->net_root = NULL;
1572 	}
1573 }
1574 
1575 /*
1576  * We want to avoid recursion at all costs. So we need to
1577  * unroll the tree. We do this by a depth first traversal to
1578  * leaf nodes. We blast away the leaf and work our way back
1579  * up and down the tree.
1580  */
1581 static int
1582 nfs4_ephemeral_unmount_engine(nfs4_ephemeral_t *eph,
1583     int isTreeRoot, int flag, cred_t *cr)
1584 {
1585 	nfs4_ephemeral_t	*e = eph;
1586 	nfs4_ephemeral_t	*prior;
1587 	mntinfo4_t		*mi;
1588 	vfs_t			*vfsp;
1589 	int			error;
1590 
1591 	/*
1592 	 * We use the loop while unrolling the ephemeral tree.
1593 	 */
1594 	for (;;) {
1595 		/*
1596 		 * First we walk down the child.
1597 		 */
1598 		if (e->ne_child) {
1599 			prior = e;
1600 			e = e->ne_child;
1601 			continue;
1602 		}
1603 
1604 		/*
1605 		 * If we are the root of the branch we are removing,
1606 		 * we end it here. But if the branch is the root of
1607 		 * the tree, we have to forge on. We do not consider
1608 		 * the peer list for the root because while it may
1609 		 * be okay to remove, it is both extra work and a
1610 		 * potential for a false-positive error to stall the
1611 		 * unmount attempt.
1612 		 */
1613 		if (e == eph && isTreeRoot == FALSE)
1614 			return (0);
1615 
1616 		/*
1617 		 * Next we walk down the peer list.
1618 		 */
1619 		if (e->ne_peer) {
1620 			prior = e;
1621 			e = e->ne_peer;
1622 			continue;
1623 		}
1624 
1625 		/*
1626 		 * We can only remove the node passed in by the
1627 		 * caller if it is the root of the ephemeral tree.
1628 		 * Otherwise, the caller will remove it.
1629 		 */
1630 		if (e == eph && isTreeRoot == FALSE)
1631 			return (0);
1632 
1633 		/*
1634 		 * Okay, we have a leaf node, time
1635 		 * to prune it!
1636 		 *
1637 		 * Note that prior can only be NULL if
1638 		 * and only if it is the root of the
1639 		 * ephemeral tree.
1640 		 */
1641 		prior = e->ne_prior;
1642 
1643 		mi = e->ne_mount;
1644 		mutex_enter(&mi->mi_lock);
1645 		vfsp = mi->mi_vfsp;
1646 
1647 		/*
1648 		 * Cleared by umount2_engine.
1649 		 */
1650 		VFS_HOLD(vfsp);
1651 
1652 		/*
1653 		 * Inform nfs4_unmount to not recursively
1654 		 * descend into this node's children when it
1655 		 * gets processed.
1656 		 */
1657 		mi->mi_flags |= MI4_EPHEMERAL_RECURSED;
1658 		mutex_exit(&mi->mi_lock);
1659 
1660 		error = umount2_engine(vfsp, flag, cr, FALSE);
1661 		if (error) {
1662 			/*
1663 			 * We need to reenable nfs4_unmount's ability
1664 			 * to recursively descend on this node.
1665 			 */
1666 			mutex_enter(&mi->mi_lock);
1667 			mi->mi_flags &= ~MI4_EPHEMERAL_RECURSED;
1668 			mutex_exit(&mi->mi_lock);
1669 
1670 			return (error);
1671 		}
1672 
1673 		/*
1674 		 * If we are the current node, we do not want to
1675 		 * touch anything else. At this point, the only
1676 		 * way the current node can have survived to here
1677 		 * is if it is the root of the ephemeral tree and
1678 		 * we are unmounting the enclosing mntinfo4.
1679 		 */
1680 		if (e == eph) {
1681 			ASSERT(prior == NULL);
1682 			return (0);
1683 		}
1684 
1685 		/*
1686 		 * Stitch up the prior node. Note that since
1687 		 * we have handled the root of the tree, prior
1688 		 * must be non-NULL.
1689 		 */
1690 		ASSERT(prior != NULL);
1691 		if (prior->ne_child == e) {
1692 			prior->ne_child = NULL;
1693 		} else {
1694 			ASSERT(prior->ne_peer == e);
1695 
1696 			prior->ne_peer = NULL;
1697 		}
1698 
1699 		e = prior;
1700 	}
1701 
1702 	/* NOTREACHED */
1703 }
1704 
1705 /*
1706  * Common code to safely release net_cnt_lock and net_tree_lock
1707  */
1708 void
1709 nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock,
1710     bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet)
1711 {
1712 	nfs4_ephemeral_tree_t	*net = *pnet;
1713 
1714 	if (*pmust_unlock) {
1715 		mutex_enter(&net->net_cnt_lock);
1716 		net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING;
1717 		if (*pmust_rele)
1718 			nfs4_ephemeral_tree_decr(net);
1719 		mutex_exit(&net->net_cnt_lock);
1720 
1721 		mutex_exit(&net->net_tree_lock);
1722 
1723 		*pmust_unlock = FALSE;
1724 	}
1725 }
1726 
1727 /*
1728  * While we may have removed any child or sibling nodes of this
1729  * ephemeral node, we can not nuke it until we know that there
1730  * were no actived vnodes on it. This will do that final
1731  * work once we know it is not busy.
1732  */
1733 void
1734 nfs4_ephemeral_umount_activate(mntinfo4_t *mi, bool_t *pmust_unlock,
1735     bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet)
1736 {
1737 	/*
1738 	 * Now we need to get rid of the ephemeral data if it exists.
1739 	 */
1740 	mutex_enter(&mi->mi_lock);
1741 	if (mi->mi_ephemeral) {
1742 		/*
1743 		 * If we are the root node of an ephemeral branch
1744 		 * which is being removed, then we need to fixup
1745 		 * pointers into and out of the node.
1746 		 */
1747 		if (!(mi->mi_flags & MI4_EPHEMERAL_RECURSED))
1748 			nfs4_ephemeral_umount_cleanup(mi->mi_ephemeral);
1749 
1750 		ASSERT(mi->mi_ephemeral != NULL);
1751 
1752 		kmem_free(mi->mi_ephemeral, sizeof (*mi->mi_ephemeral));
1753 		mi->mi_ephemeral = NULL;
1754 	}
1755 	mutex_exit(&mi->mi_lock);
1756 
1757 	nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele, pnet);
1758 }
1759 
1760 /*
1761  * Unmount an ephemeral node.
1762  */
1763 int
1764 nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
1765     bool_t *pmust_unlock, bool_t *pmust_rele, nfs4_ephemeral_tree_t **pnet)
1766 {
1767 	int			error = 0;
1768 	nfs4_ephemeral_t	*eph;
1769 	nfs4_ephemeral_tree_t	*net;
1770 	int			is_derooting = FALSE;
1771 	int			is_recursed = FALSE;
1772 	int			was_locked = FALSE;
1773 
1774 	/*
1775 	 * Make sure to set the default state for cleaning
1776 	 * up the tree in the caller (and on the way out).
1777 	 */
1778 	*pmust_unlock = *pmust_rele = FALSE;
1779 
1780 	/*
1781 	 * The active vnodes on this file system may be ephemeral
1782 	 * children. We need to check for and try to unmount them
1783 	 * here. If any can not be unmounted, we are going
1784 	 * to return EBUSY.
1785 	 */
1786 	mutex_enter(&mi->mi_lock);
1787 
1788 	/*
1789 	 * If an ephemeral tree, we need to check to see if
1790 	 * the lock is already held. If it is, then we need
1791 	 * to see if we are being called as a result of
1792 	 * the recursive removal of some node of the tree or
1793 	 * if we are another attempt to remove the tree.
1794 	 *
1795 	 * mi_flags & MI4_EPHEMERAL indicates an ephemeral
1796 	 * node. mi_ephemeral being non-NULL also does this.
1797 	 *
1798 	 * mi_ephemeral_tree being non-NULL is sufficient
1799 	 * to also indicate either it is an ephemeral node
1800 	 * or the enclosing mntinfo4.
1801 	 *
1802 	 * Do we need MI4_EPHEMERAL? Yes, it is useful for
1803 	 * when we delete the ephemeral node and need to
1804 	 * differentiate from an ephemeral node and the
1805 	 * enclosing root node.
1806 	 */
1807 	*pnet = net = mi->mi_ephemeral_tree;
1808 	if (net == NULL) {
1809 		mutex_exit(&mi->mi_lock);
1810 		return (0);
1811 	}
1812 
1813 	eph = mi->mi_ephemeral;
1814 	is_recursed = mi->mi_flags & MI4_EPHEMERAL_RECURSED;
1815 	is_derooting = (eph == NULL);
1816 
1817 	/*
1818 	 * If this is not recursion, then we need to
1819 	 * grab a ref count.
1820 	 *
1821 	 * But wait, we also do not want to do that
1822 	 * if a harvester thread has already grabbed
1823 	 * the lock.
1824 	 */
1825 	if (!is_recursed) {
1826 		mutex_enter(&net->net_cnt_lock);
1827 		if (net->net_status &
1828 		    NFS4_EPHEMERAL_TREE_LOCKED) {
1829 			/*
1830 			 * If the tree is locked, we need
1831 			 * to decide whether we are the
1832 			 * harvester or some explicit call
1833 			 * for a umount. The only way that
1834 			 * we are the harvester is if
1835 			 * MS_SYSSPACE is set.
1836 			 *
1837 			 * We only let the harvester through
1838 			 * at this point.
1839 			 *
1840 			 * We return EBUSY so that the
1841 			 * caller knows something is
1842 			 * going on. Note that by that
1843 			 * time, the umount in the other
1844 			 * thread may have already occured.
1845 			 */
1846 			if (!(flag & MS_SYSSPACE)) {
1847 				mutex_exit(&net->net_cnt_lock);
1848 				mutex_exit(&mi->mi_lock);
1849 
1850 				return (EBUSY);
1851 			}
1852 
1853 			was_locked = TRUE;
1854 		} else {
1855 			nfs4_ephemeral_tree_incr(net);
1856 			*pmust_rele = TRUE;
1857 		}
1858 
1859 		mutex_exit(&net->net_cnt_lock);
1860 	}
1861 	mutex_exit(&mi->mi_lock);
1862 
1863 	/*
1864 	 * If we are not the harvester, we need to check
1865 	 * to see if we need to grab the tree lock.
1866 	 */
1867 	if (was_locked == FALSE) {
1868 		/*
1869 		 * If we grab the lock, it means that no other
1870 		 * operation is working on the tree. If we don't
1871 		 * grab it, we need to decide if this is because
1872 		 * we are a recursive call or a new operation.
1873 		 */
1874 		if (mutex_tryenter(&net->net_tree_lock)) {
1875 			*pmust_unlock = TRUE;
1876 		} else {
1877 			/*
1878 			 * If we are a recursive call, we can
1879 			 * proceed without the lock.
1880 			 * Otherwise we have to wait until
1881 			 * the lock becomes free.
1882 			 */
1883 			if (!is_recursed) {
1884 				mutex_enter(&net->net_cnt_lock);
1885 				if (net->net_status &
1886 				    (NFS4_EPHEMERAL_TREE_DEROOTING
1887 				    | NFS4_EPHEMERAL_TREE_INVALID)) {
1888 					nfs4_ephemeral_tree_decr(net);
1889 					mutex_exit(&net->net_cnt_lock);
1890 					*pmust_rele = FALSE;
1891 					goto is_busy;
1892 				}
1893 				mutex_exit(&net->net_cnt_lock);
1894 
1895 				/*
1896 				 * We can't hold any other locks whilst
1897 				 * we wait on this to free up.
1898 				 */
1899 				mutex_enter(&net->net_tree_lock);
1900 
1901 				/*
1902 				 * Note that while mi->mi_ephemeral
1903 				 * may change and thus we have to
1904 				 * update eph, it is the case that
1905 				 * we have tied down net and
1906 				 * do not care if mi->mi_ephemeral_tree
1907 				 * has changed.
1908 				 */
1909 				mutex_enter(&mi->mi_lock);
1910 				eph = mi->mi_ephemeral;
1911 				mutex_exit(&mi->mi_lock);
1912 
1913 				/*
1914 				 * Okay, we need to see if either the
1915 				 * tree got nuked or the current node
1916 				 * got nuked. Both of which will cause
1917 				 * an error.
1918 				 *
1919 				 * Note that a subsequent retry of the
1920 				 * umount shall work.
1921 				 */
1922 				mutex_enter(&net->net_cnt_lock);
1923 				if (net->net_status &
1924 				    NFS4_EPHEMERAL_TREE_INVALID ||
1925 				    (!is_derooting && eph == NULL)) {
1926 					nfs4_ephemeral_tree_decr(net);
1927 					mutex_exit(&net->net_cnt_lock);
1928 					mutex_exit(&net->net_tree_lock);
1929 					*pmust_rele = FALSE;
1930 					goto is_busy;
1931 				}
1932 				mutex_exit(&net->net_cnt_lock);
1933 				*pmust_unlock = TRUE;
1934 			}
1935 		}
1936 	}
1937 
1938 	/*
1939 	 * Only once we have grabbed the lock can we mark what we
1940 	 * are planning on doing to the ephemeral tree.
1941 	 */
1942 	if (*pmust_unlock) {
1943 		mutex_enter(&net->net_cnt_lock);
1944 		net->net_status |= NFS4_EPHEMERAL_TREE_UMOUNTING;
1945 
1946 		/*
1947 		 * Check to see if we are nuking the root.
1948 		 */
1949 		if (is_derooting)
1950 			net->net_status |=
1951 			    NFS4_EPHEMERAL_TREE_DEROOTING;
1952 		mutex_exit(&net->net_cnt_lock);
1953 	}
1954 
1955 	if (!is_derooting) {
1956 		/*
1957 		 * Only work on children if the caller has not already
1958 		 * done so.
1959 		 */
1960 		if (!is_recursed) {
1961 			ASSERT(eph != NULL);
1962 
1963 			error = nfs4_ephemeral_unmount_engine(eph,
1964 			    FALSE, flag, cr);
1965 			if (error)
1966 				goto is_busy;
1967 		}
1968 	} else {
1969 		eph = net->net_root;
1970 
1971 		/*
1972 		 * Only work if there is something there.
1973 		 */
1974 		if (eph) {
1975 			error = nfs4_ephemeral_unmount_engine(eph, TRUE,
1976 			    flag, cr);
1977 			if (error) {
1978 				mutex_enter(&net->net_cnt_lock);
1979 				net->net_status &=
1980 				    ~NFS4_EPHEMERAL_TREE_DEROOTING;
1981 				mutex_exit(&net->net_cnt_lock);
1982 				goto is_busy;
1983 			}
1984 
1985 			/*
1986 			 * Nothing else which goes wrong will
1987 			 * invalidate the blowing away of the
1988 			 * ephmeral tree.
1989 			 */
1990 			net->net_root = NULL;
1991 		}
1992 
1993 		/*
1994 		 * We have derooted and we have caused the tree to be
1995 		 * invalidated.
1996 		 */
1997 		mutex_enter(&net->net_cnt_lock);
1998 		net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING;
1999 		net->net_status |= NFS4_EPHEMERAL_TREE_INVALID;
2000 		if (was_locked == FALSE)
2001 			nfs4_ephemeral_tree_decr(net);
2002 		mutex_exit(&net->net_cnt_lock);
2003 
2004 		if (was_locked == FALSE)
2005 			mutex_exit(&net->net_tree_lock);
2006 
2007 		/*
2008 		 * We have just blown away any notation of this
2009 		 * tree being locked. We can't let the caller
2010 		 * try to clean things up.
2011 		 */
2012 		*pmust_unlock = FALSE;
2013 
2014 		/*
2015 		 * At this point, the tree should no longer be
2016 		 * associated with the mntinfo4. We need to pull
2017 		 * it off there and let the harvester take
2018 		 * care of it once the refcnt drops.
2019 		 */
2020 		mutex_enter(&mi->mi_lock);
2021 		mi->mi_ephemeral_tree = NULL;
2022 		mutex_exit(&mi->mi_lock);
2023 	}
2024 
2025 	return (0);
2026 
2027 is_busy:
2028 
2029 	nfs4_ephemeral_umount_unlock(pmust_unlock, pmust_rele,
2030 	    pnet);
2031 
2032 	return (error);
2033 }
2034 
2035 /*
2036  * Do the umount and record any error in the parent.
2037  */
2038 static void
2039 nfs4_ephemeral_record_umount(vfs_t *vfsp, int flag,
2040     nfs4_ephemeral_t *e, nfs4_ephemeral_t *prior)
2041 {
2042 	int	error;
2043 
2044 	error = umount2_engine(vfsp, flag, kcred, FALSE);
2045 	if (error) {
2046 		if (prior) {
2047 			if (prior->ne_child == e)
2048 				prior->ne_state |=
2049 				    NFS4_EPHEMERAL_CHILD_ERROR;
2050 			else
2051 				prior->ne_state |=
2052 				    NFS4_EPHEMERAL_PEER_ERROR;
2053 		}
2054 	}
2055 }
2056 
2057 /*
2058  * For each tree in the forest (where the forest is in
2059  * effect all of the ephemeral trees for this zone),
2060  * scan to see if a node can be unmounted. Note that
2061  * unlike nfs4_ephemeral_unmount_engine(), we do
2062  * not process the current node before children or
2063  * siblings. I.e., if a node can be unmounted, we
2064  * do not recursively check to see if the nodes
2065  * hanging off of it can also be unmounted.
2066  *
2067  * Instead, we delve down deep to try and remove the
2068  * children first. Then, because we share code with
2069  * nfs4_ephemeral_unmount_engine(), we will try
2070  * them again. This could be a performance issue in
2071  * the future.
2072  *
2073  * Also note that unlike nfs4_ephemeral_unmount_engine(),
2074  * we do not halt on an error. We will not remove the
2075  * current node, but we will keep on trying to remove
2076  * the others.
2077  *
2078  * force indicates that we want the unmount to occur
2079  * even if there is something blocking it.
2080  *
2081  * time_check indicates that we want to see if the
2082  * mount has expired past mount_to or not. Typically
2083  * we want to do this and only on a shutdown of the
2084  * zone would we want to ignore the check.
2085  */
2086 static void
2087 nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg,
2088     bool_t force, bool_t time_check)
2089 {
2090 	nfs4_ephemeral_tree_t	*net;
2091 	nfs4_ephemeral_tree_t	*prev = NULL;
2092 	nfs4_ephemeral_tree_t	*next;
2093 	nfs4_ephemeral_t	*e;
2094 	nfs4_ephemeral_t	*prior;
2095 	time_t			now = gethrestime_sec();
2096 
2097 	nfs4_ephemeral_tree_t	*harvest = NULL;
2098 
2099 	int			flag;
2100 
2101 	mntinfo4_t		*mi;
2102 	vfs_t			*vfsp;
2103 
2104 	if (force)
2105 		flag = MS_FORCE | MS_SYSSPACE;
2106 	else
2107 		flag = MS_SYSSPACE;
2108 
2109 	mutex_enter(&ntg->ntg_forest_lock);
2110 	for (net = ntg->ntg_forest; net != NULL; net = next) {
2111 		next = net->net_next;
2112 
2113 		nfs4_ephemeral_tree_hold(net);
2114 
2115 		mutex_enter(&net->net_tree_lock);
2116 
2117 		/*
2118 		 * Let the unmount code know that the
2119 		 * tree is already locked!
2120 		 */
2121 		mutex_enter(&net->net_cnt_lock);
2122 		net->net_status |= NFS4_EPHEMERAL_TREE_LOCKED;
2123 		mutex_exit(&net->net_cnt_lock);
2124 
2125 		/*
2126 		 * If the intent is force all ephemeral nodes to
2127 		 * be unmounted in this zone, we can short circuit a
2128 		 * lot of tree traversal and simply zap the root node.
2129 		 */
2130 		if (force) {
2131 			if (net->net_root) {
2132 				mi = net->net_root->ne_mount;
2133 				vfsp = mi->mi_vfsp;
2134 
2135 				/*
2136 				 * Cleared by umount2_engine.
2137 				 */
2138 				VFS_HOLD(vfsp);
2139 
2140 				(void) umount2_engine(vfsp, flag,
2141 				    kcred, FALSE);
2142 
2143 				goto check_done;
2144 			}
2145 		}
2146 
2147 		e = net->net_root;
2148 		if (e)
2149 			e->ne_state = NFS4_EPHEMERAL_VISIT_CHILD;
2150 
2151 		while (e) {
2152 			if (e->ne_state == NFS4_EPHEMERAL_VISIT_CHILD) {
2153 				e->ne_state = NFS4_EPHEMERAL_VISIT_SIBLING;
2154 				if (e->ne_child) {
2155 					e = e->ne_child;
2156 					e->ne_state =
2157 					    NFS4_EPHEMERAL_VISIT_CHILD;
2158 				}
2159 
2160 				continue;
2161 			} else if (e->ne_state ==
2162 			    NFS4_EPHEMERAL_VISIT_SIBLING) {
2163 				e->ne_state = NFS4_EPHEMERAL_PROCESS_ME;
2164 				if (e->ne_peer) {
2165 					e = e->ne_peer;
2166 					e->ne_state =
2167 					    NFS4_EPHEMERAL_VISIT_CHILD;
2168 				}
2169 
2170 				continue;
2171 			} else if (e->ne_state ==
2172 			    NFS4_EPHEMERAL_CHILD_ERROR) {
2173 				prior = e->ne_prior;
2174 
2175 				/*
2176 				 * If a child reported an error, do
2177 				 * not bother trying to unmount.
2178 				 *
2179 				 * If your prior node is a parent,
2180 				 * pass the error up such that they
2181 				 * also do not try to unmount.
2182 				 *
2183 				 * However, if your prior is a sibling,
2184 				 * let them try to unmount if they can.
2185 				 */
2186 				if (prior) {
2187 					if (prior->ne_child == e)
2188 						prior->ne_state |=
2189 						    NFS4_EPHEMERAL_CHILD_ERROR;
2190 					else
2191 						prior->ne_state |=
2192 						    NFS4_EPHEMERAL_PEER_ERROR;
2193 				}
2194 
2195 				/*
2196 				 * Clear the error and if needed, process peers.
2197 				 *
2198 				 * Once we mask out the error, we know whether
2199 				 * or we have to process another node.
2200 				 */
2201 				e->ne_state &= ~NFS4_EPHEMERAL_CHILD_ERROR;
2202 				if (e->ne_state == NFS4_EPHEMERAL_PROCESS_ME)
2203 					e = prior;
2204 
2205 				continue;
2206 			} else if (e->ne_state ==
2207 			    NFS4_EPHEMERAL_PEER_ERROR) {
2208 				prior = e->ne_prior;
2209 
2210 				if (prior) {
2211 					if (prior->ne_child == e)
2212 						prior->ne_state =
2213 						    NFS4_EPHEMERAL_CHILD_ERROR;
2214 					else
2215 						prior->ne_state =
2216 						    NFS4_EPHEMERAL_PEER_ERROR;
2217 				}
2218 
2219 				/*
2220 				 * Clear the error from this node and do the
2221 				 * correct processing.
2222 				 */
2223 				e->ne_state &= ~NFS4_EPHEMERAL_PEER_ERROR;
2224 				continue;
2225 			}
2226 
2227 			prior = e->ne_prior;
2228 			e->ne_state = NFS4_EPHEMERAL_OK;
2229 
2230 			/*
2231 			 * It must be the case that we need to process
2232 			 * this node.
2233 			 */
2234 			if (!time_check ||
2235 			    now - e->ne_ref_time > e->ne_mount_to) {
2236 				mi = e->ne_mount;
2237 				vfsp = mi->mi_vfsp;
2238 
2239 				/*
2240 				 * Cleared by umount2_engine.
2241 				 */
2242 				VFS_HOLD(vfsp);
2243 
2244 				/*
2245 				 * Note that we effectively work down to the
2246 				 * leaf nodes first, try to unmount them,
2247 				 * then work our way back up into the leaf
2248 				 * nodes.
2249 				 *
2250 				 * Also note that we deal with a lot of
2251 				 * complexity by sharing the work with
2252 				 * the manual unmount code.
2253 				 */
2254 				nfs4_ephemeral_record_umount(vfsp, flag,
2255 				    e, prior);
2256 			}
2257 
2258 			e = prior;
2259 		}
2260 
2261 check_done:
2262 
2263 		/*
2264 		 * At this point we are done processing this tree.
2265 		 *
2266 		 * If the tree is invalid and we are the only reference
2267 		 * to it, then we push it on the local linked list
2268 		 * to remove it at the end. We avoid that action now
2269 		 * to keep the tree processing going along at a fair clip.
2270 		 *
2271 		 * Else, even if we are the only reference, we drop
2272 		 * our hold on the current tree and allow it to be
2273 		 * reused as needed.
2274 		 */
2275 		mutex_enter(&net->net_cnt_lock);
2276 		if (net->net_refcnt == 1 &&
2277 		    net->net_status & NFS4_EPHEMERAL_TREE_INVALID) {
2278 			nfs4_ephemeral_tree_decr(net);
2279 			net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
2280 			mutex_exit(&net->net_cnt_lock);
2281 			mutex_exit(&net->net_tree_lock);
2282 
2283 			if (prev)
2284 				prev->net_next = net->net_next;
2285 			else
2286 				ntg->ntg_forest = net->net_next;
2287 
2288 			net->net_next = harvest;
2289 			harvest = net;
2290 			continue;
2291 		}
2292 
2293 		nfs4_ephemeral_tree_decr(net);
2294 		net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
2295 		mutex_exit(&net->net_cnt_lock);
2296 		mutex_exit(&net->net_tree_lock);
2297 
2298 		prev = net;
2299 	}
2300 	mutex_exit(&ntg->ntg_forest_lock);
2301 
2302 	for (net = harvest; net != NULL; net = next) {
2303 		next = net->net_next;
2304 
2305 		mutex_destroy(&net->net_tree_lock);
2306 		mutex_destroy(&net->net_cnt_lock);
2307 		kmem_free(net, sizeof (*net));
2308 	}
2309 }
2310 
2311 /*
2312  * This is the thread which decides when the harvesting
2313  * can proceed and when to kill it off for this zone.
2314  */
2315 static void
2316 nfs4_ephemeral_harvester(nfs4_trigger_globals_t *ntg)
2317 {
2318 	clock_t		timeleft;
2319 	zone_t		*zone = curproc->p_zone;
2320 
2321 	for (;;) {
2322 		timeleft = zone_status_timedwait(zone, lbolt +
2323 		    nfs4_trigger_thread_timer * hz, ZONE_IS_SHUTTING_DOWN);
2324 
2325 		/*
2326 		 * zone is exiting...
2327 		 */
2328 		if (timeleft != -1) {
2329 			ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN);
2330 			zthread_exit();
2331 			/* NOTREACHED */
2332 		}
2333 
2334 		/*
2335 		 * Only bother scanning if there is potential
2336 		 * work to be done.
2337 		 */
2338 		if (ntg->ntg_forest == NULL)
2339 			continue;
2340 
2341 		/*
2342 		 * Now scan the list and get rid of everything which
2343 		 * is old.
2344 		 */
2345 		nfs4_ephemeral_harvest_forest(ntg, FALSE, TRUE);
2346 	}
2347 
2348 	/* NOTREACHED */
2349 }
2350 
2351 /*
2352  * The zone specific glue needed to start the unmount harvester.
2353  *
2354  * Note that we want to avoid holding the mutex as long as possible,
2355  * hence the multiple checks.
2356  *
2357  * The caller should avoid us getting down here in the first
2358  * place.
2359  */
2360 static void
2361 nfs4_ephemeral_start_harvester(nfs4_trigger_globals_t *ntg)
2362 {
2363 	/*
2364 	 * It got started before we got here...
2365 	 */
2366 	if (ntg->ntg_thread_started)
2367 		return;
2368 
2369 	mutex_enter(&nfs4_ephemeral_thread_lock);
2370 
2371 	if (ntg->ntg_thread_started) {
2372 		mutex_exit(&nfs4_ephemeral_thread_lock);
2373 		return;
2374 	}
2375 
2376 	/*
2377 	 * Start the unmounter harvester thread for this zone.
2378 	 */
2379 	(void) zthread_create(NULL, 0, nfs4_ephemeral_harvester,
2380 	    ntg, 0, minclsyspri);
2381 
2382 	ntg->ntg_thread_started = TRUE;
2383 	mutex_exit(&nfs4_ephemeral_thread_lock);
2384 }
2385 
2386 /*ARGSUSED*/
2387 static void *
2388 nfs4_ephemeral_zsd_create(zoneid_t zoneid)
2389 {
2390 	nfs4_trigger_globals_t	*ntg;
2391 
2392 	ntg = kmem_zalloc(sizeof (*ntg), KM_SLEEP);
2393 	ntg->ntg_thread_started = FALSE;
2394 
2395 	/*
2396 	 * This is the default....
2397 	 */
2398 	ntg->ntg_mount_to = nfs4_trigger_thread_timer;
2399 
2400 	mutex_init(&ntg->ntg_forest_lock, NULL,
2401 	    MUTEX_DEFAULT, NULL);
2402 
2403 	return (ntg);
2404 }
2405 
2406 /*
2407  * Try a nice gentle walk down the forest and convince
2408  * all of the trees to gracefully give it up.
2409  */
2410 /*ARGSUSED*/
2411 static void
2412 nfs4_ephemeral_zsd_shutdown(zoneid_t zoneid, void *arg)
2413 {
2414 	nfs4_trigger_globals_t	*ntg = arg;
2415 
2416 	if (!ntg)
2417 		return;
2418 
2419 	nfs4_ephemeral_harvest_forest(ntg, FALSE, FALSE);
2420 }
2421 
2422 /*
2423  * Race along the forest and rip all of the trees out by
2424  * their rootballs!
2425  */
2426 /*ARGSUSED*/
2427 static void
2428 nfs4_ephemeral_zsd_destroy(zoneid_t zoneid, void *arg)
2429 {
2430 	nfs4_trigger_globals_t	*ntg = arg;
2431 
2432 	if (!ntg)
2433 		return;
2434 
2435 	nfs4_ephemeral_harvest_forest(ntg, TRUE, FALSE);
2436 
2437 	mutex_destroy(&ntg->ntg_forest_lock);
2438 	kmem_free(ntg, sizeof (*ntg));
2439 }
2440 
2441 /*
2442  * This is the zone independent cleanup needed for
2443  * emphemeral mount processing.
2444  */
2445 void
2446 nfs4_ephemeral_fini(void)
2447 {
2448 	(void) zone_key_delete(nfs4_ephemeral_key);
2449 	mutex_destroy(&nfs4_ephemeral_thread_lock);
2450 }
2451 
2452 /*
2453  * This is the zone independent initialization needed for
2454  * emphemeral mount processing.
2455  */
2456 void
2457 nfs4_ephemeral_init(void)
2458 {
2459 	mutex_init(&nfs4_ephemeral_thread_lock, NULL, MUTEX_DEFAULT,
2460 	    NULL);
2461 
2462 	zone_key_create(&nfs4_ephemeral_key, nfs4_ephemeral_zsd_create,
2463 	    nfs4_ephemeral_zsd_shutdown, nfs4_ephemeral_zsd_destroy);
2464 }
2465 
2466 /*
2467  * nfssys() calls this function to set the per-zone
2468  * value of mount_to to drive when an ephemeral mount is
2469  * timed out. Each mount will grab a copy of this value
2470  * when mounted.
2471  */
2472 void
2473 nfs4_ephemeral_set_mount_to(uint_t mount_to)
2474 {
2475 	nfs4_trigger_globals_t	*ntg;
2476 	zone_t			*zone = curproc->p_zone;
2477 
2478 	ntg = zone_getspecific(nfs4_ephemeral_key, zone);
2479 
2480 	ntg->ntg_mount_to = mount_to;
2481 }
2482 
2483 /*
2484  * Walk the list of v4 mount options; if they are currently set in vfsp,
2485  * append them to a new comma-separated mount option string, and return it.
2486  *
2487  * Caller should free by calling nfs4_trigger_destroy_mntopts().
2488  */
2489 static char *
2490 nfs4_trigger_create_mntopts(vfs_t *vfsp)
2491 {
2492 	uint_t i;
2493 	char *mntopts;
2494 	struct vfssw *vswp;
2495 	mntopts_t *optproto;
2496 
2497 	mntopts = kmem_zalloc(MAX_MNTOPT_STR, KM_SLEEP);
2498 
2499 	/* get the list of applicable mount options for v4; locks *vswp */
2500 	vswp = vfs_getvfssw(MNTTYPE_NFS4);
2501 	optproto = &vswp->vsw_optproto;
2502 
2503 	for (i = 0; i < optproto->mo_count; i++) {
2504 		struct mntopt *mop = &optproto->mo_list[i];
2505 
2506 		if (mop->mo_flags & MO_EMPTY)
2507 			continue;
2508 
2509 		if (nfs4_trigger_add_mntopt(mntopts, mop->mo_name, vfsp)) {
2510 			kmem_free(mntopts, MAX_MNTOPT_STR);
2511 			vfs_unrefvfssw(vswp);
2512 			return (NULL);
2513 		}
2514 	}
2515 
2516 	vfs_unrefvfssw(vswp);
2517 
2518 	/*
2519 	 * MNTOPT_XATTR is not in the v4 mount opt proto list,
2520 	 * and it may only be passed via MS_OPTIONSTR, so we
2521 	 * must handle it here.
2522 	 *
2523 	 * Ideally, it would be in the list, but NFS does not specify its
2524 	 * own opt proto list, it uses instead the default one. Since
2525 	 * not all filesystems support extended attrs, it would not be
2526 	 * appropriate to add it there.
2527 	 */
2528 	if (nfs4_trigger_add_mntopt(mntopts, MNTOPT_XATTR, vfsp) ||
2529 	    nfs4_trigger_add_mntopt(mntopts, MNTOPT_NOXATTR, vfsp)) {
2530 		kmem_free(mntopts, MAX_MNTOPT_STR);
2531 		return (NULL);
2532 	}
2533 
2534 	return (mntopts);
2535 }
2536 
2537 static void
2538 nfs4_trigger_destroy_mntopts(char *mntopts)
2539 {
2540 	if (mntopts)
2541 		kmem_free(mntopts, MAX_MNTOPT_STR);
2542 }
2543 
2544 /*
2545  * Check a single mount option (optname). Add to mntopts if it is set in VFS.
2546  */
2547 static int
2548 nfs4_trigger_add_mntopt(char *mntopts, char *optname, vfs_t *vfsp)
2549 {
2550 	if (mntopts == NULL || optname == NULL || vfsp == NULL)
2551 		return (EINVAL);
2552 
2553 	if (vfs_optionisset(vfsp, optname, NULL)) {
2554 		size_t mntoptslen = strlen(mntopts);
2555 		size_t optnamelen = strlen(optname);
2556 
2557 		/* +1 for ',', +1 for NUL */
2558 		if (mntoptslen + optnamelen + 2 > MAX_MNTOPT_STR)
2559 			return (EOVERFLOW);
2560 
2561 		/* first or subsequent mount option? */
2562 		if (*mntopts != '\0')
2563 			(void) strcat(mntopts, ",");
2564 
2565 		(void) strcat(mntopts, optname);
2566 	}
2567 
2568 	return (0);
2569 }
2570 
2571 static enum clnt_stat
2572 nfs4_trigger_ping_server(servinfo4_t *svp, int nointr)
2573 {
2574 	int retries, error;
2575 	uint_t max_msgsize;
2576 	enum clnt_stat status;
2577 	CLIENT *cl;
2578 	struct timeval timeout;
2579 
2580 	/* as per recov_newserver() */
2581 	max_msgsize = 0;
2582 	retries = 1;
2583 	timeout.tv_sec = 2;
2584 	timeout.tv_usec = 0;
2585 
2586 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, NFS_PROGRAM,
2587 	    NFS_V4, max_msgsize, retries, CRED(), &cl);
2588 	if (error)
2589 		return (RPC_FAILED);
2590 
2591 	if (nointr)
2592 		cl->cl_nosignal = TRUE;
2593 	status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, xdr_void, NULL,
2594 	    timeout);
2595 	if (nointr)
2596 		cl->cl_nosignal = FALSE;
2597 
2598 	AUTH_DESTROY(cl->cl_auth);
2599 	CLNT_DESTROY(cl);
2600 
2601 	return (status);
2602 }
2603