xref: /titanic_41/usr/src/uts/common/fs/autofs/auto_subr.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/kmem.h>
31 #include <sys/errno.h>
32 #include <sys/proc.h>
33 #include <sys/disp.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/pathname.h>
37 #include <sys/cred.h>
38 #include <sys/mount.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/systm.h>
42 #include <rpc/types.h>
43 #include <rpc/xdr.h>
44 #include <rpc/auth.h>
45 #include <rpc/clnt.h>
46 #include <sys/ticotsord.h>
47 #include <sys/dirent.h>
48 #include <fs/fs_subr.h>
49 #include <rpcsvc/autofs_prot.h>
50 #include <sys/fs/autofs.h>
51 #include <sys/callb.h>
52 #include <sys/sysmacros.h>
53 #include <sys/zone.h>
54 #include <sys/fs/mntdata.h>
55 
56 /*
57  * Autofs and Zones:
58  *
59  * Zones are delegated the responsibility of managing their own autofs mounts
60  * and maps.  Each zone runs its own copy of automountd, with its own timeouts,
61  * and other logically "global" parameters.  kRPC and virtualization in the
62  * loopback transport (tl) will prevent a zone from communicating with another
63  * zone's automountd.
64  *
65  * Each zone has its own "rootfnnode" and associated tree of auto nodes.
66  *
67  * Each zone also has its own set of "unmounter" kernel threads; these are
68  * created and run within the zone's context (ie, they are created via
69  * zthread_create()).
70  *
71  * Cross-zone mount triggers are disallowed.  There is a check in
72  * auto_trigger_mount() to this effect; EPERM is returned to indicate that the
73  * mount is not owned by the caller.
74  *
75  * autofssys() enables a caller in the global zone to clean up in-kernel (as
76  * well as regular) autofs mounts via the unmount_tree() mechanism.  This is
77  * routinely done when all mounts are removed as part of zone shutdown.
78  */
79 #define	TYPICALMAXPATHLEN	64
80 
81 static kmutex_t autofs_nodeid_lock;
82 
83 static int auto_perform_link(fnnode_t *, struct linka *, cred_t *);
84 static int auto_perform_actions(fninfo_t *, fnnode_t *,
85     action_list *, cred_t *);
86 static int auto_getmntpnt(vnode_t *, char *, vnode_t **, cred_t *);
87 static int auto_lookup_request(fninfo_t *, char *, struct linka *,
88     cred_t *, bool_t, bool_t *);
89 static int auto_mount_request(fninfo_t *, char *, action_list **,
90     cred_t *, bool_t);
91 
92 /*
93  * Clears the MF_INPROG flag, and wakes up those threads sleeping on
94  * fn_cv_mount if MF_WAITING is set.
95  */
96 void
97 auto_unblock_others(
98 	fnnode_t *fnp,
99 	uint_t operation)		/* either MF_INPROG or MF_LOOKUP */
100 {
101 	ASSERT(operation & (MF_INPROG | MF_LOOKUP));
102 	fnp->fn_flags &= ~operation;
103 	if (fnp->fn_flags & MF_WAITING) {
104 		fnp->fn_flags &= ~MF_WAITING;
105 		cv_broadcast(&fnp->fn_cv_mount);
106 	}
107 }
108 
109 int
110 auto_wait4mount(fnnode_t *fnp)
111 {
112 	int error;
113 	k_sigset_t smask;
114 
115 	AUTOFS_DPRINT((4, "auto_wait4mount: fnp=%p\n", (void *)fnp));
116 
117 	mutex_enter(&fnp->fn_lock);
118 	while (fnp->fn_flags & (MF_INPROG | MF_LOOKUP)) {
119 		/*
120 		 * There is a mount or a lookup in progress.
121 		 */
122 		fnp->fn_flags |= MF_WAITING;
123 		sigintr(&smask, 1);
124 		if (!cv_wait_sig(&fnp->fn_cv_mount, &fnp->fn_lock)) {
125 			/*
126 			 * Decided not to wait for operation to
127 			 * finish after all.
128 			 */
129 			sigunintr(&smask);
130 			mutex_exit(&fnp->fn_lock);
131 			return (EINTR);
132 		}
133 		sigunintr(&smask);
134 	}
135 	error = fnp->fn_error;
136 
137 	if (error == EINTR) {
138 		/*
139 		 * The thread doing the mount got interrupted, we need to
140 		 * try again, by returning EAGAIN.
141 		 */
142 		error = EAGAIN;
143 	}
144 	mutex_exit(&fnp->fn_lock);
145 
146 	AUTOFS_DPRINT((5, "auto_wait4mount: fnp=%p error=%d\n", (void *)fnp,
147 	    error));
148 	return (error);
149 }
150 
151 int
152 auto_lookup_aux(fnnode_t *fnp, char *name, cred_t *cred)
153 {
154 	struct fninfo *fnip;
155 	struct linka link;
156 	bool_t mountreq = FALSE;
157 	int error = 0;
158 
159 	fnip = vfstofni(fntovn(fnp)->v_vfsp);
160 	bzero(&link, sizeof (link));
161 	error = auto_lookup_request(fnip, name, &link, cred, TRUE, &mountreq);
162 	if (!error) {
163 		if (link.link != NULL) {
164 			/*
165 			 * This node should be a symlink
166 			 */
167 			error = auto_perform_link(fnp, &link, cred);
168 			kmem_free(link.dir, strlen(link.dir) + 1);
169 			kmem_free(link.link, strlen(link.link) + 1);
170 		} else if (mountreq) {
171 			/*
172 			 * The automount daemon is requesting a mount,
173 			 * implying this entry must be a wildcard match and
174 			 * therefore in need of verification that the entry
175 			 * exists on the server.
176 			 */
177 			mutex_enter(&fnp->fn_lock);
178 			AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG);
179 			fnp->fn_error = 0;
180 
181 			/*
182 			 * Unblock other lookup requests on this node,
183 			 * this is needed to let the lookup generated by
184 			 * the mount call to complete. The caveat is
185 			 * other lookups on this node can also get by,
186 			 * i.e., another lookup on this node that occurs
187 			 * while this lookup is attempting the mount
188 			 * would return a positive result no matter what.
189 			 * Therefore two lookups on the this node could
190 			 * potentially get disparate results.
191 			 */
192 			AUTOFS_UNBLOCK_OTHERS(fnp, MF_LOOKUP);
193 			mutex_exit(&fnp->fn_lock);
194 			/*
195 			 * auto_new_mount_thread fires up a new thread which
196 			 * calls automountd finishing up the work
197 			 */
198 			auto_new_mount_thread(fnp, name, cred);
199 
200 			/*
201 			 * At this point, we are simply another thread
202 			 * waiting for the mount to complete
203 			 */
204 			error = auto_wait4mount(fnp);
205 			if (error == AUTOFS_SHUTDOWN)
206 				error = ENOENT;
207 		}
208 	}
209 
210 	mutex_enter(&fnp->fn_lock);
211 	fnp->fn_error = error;
212 
213 	/*
214 	 * Notify threads waiting for lookup/mount that
215 	 * it's done.
216 	 */
217 	if (mountreq) {
218 		AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
219 	} else {
220 		AUTOFS_UNBLOCK_OTHERS(fnp, MF_LOOKUP);
221 	}
222 	mutex_exit(&fnp->fn_lock);
223 	return (error);
224 }
225 
226 /*
227  * Starting point for thread to handle mount requests with automountd.
228  * XXX auto_mount_thread() is not suspend-safe within the scope of
229  * the present model defined for cpr to suspend the system. Calls
230  * made by the auto_mount_thread() that have been identified to be unsafe
231  * are (1) RPC client handle setup and client calls to automountd which
232  * can block deep down in the RPC library, (2) kmem_alloc() calls with the
233  * KM_SLEEP flag which can block if memory is low, and (3) VFS_*(), and
234  * lookuppnvp() calls which can result in over the wire calls to servers.
235  * The thread should be completely reevaluated to make it suspend-safe in
236  * case of future updates to the cpr model.
237  */
238 static void
239 auto_mount_thread(struct autofs_callargs *argsp)
240 {
241 	struct fninfo *fnip;
242 	fnnode_t *fnp;
243 	vnode_t *vp;
244 	char *name;
245 	size_t namelen;
246 	cred_t *cred;
247 	action_list *alp = NULL;
248 	int error;
249 	callb_cpr_t cprinfo;
250 	kmutex_t auto_mount_thread_cpr_lock;
251 
252 	mutex_init(&auto_mount_thread_cpr_lock, NULL, MUTEX_DEFAULT, NULL);
253 	CALLB_CPR_INIT(&cprinfo, &auto_mount_thread_cpr_lock, callb_generic_cpr,
254 		"auto_mount_thread");
255 
256 	fnp = argsp->fnc_fnp;
257 	vp = fntovn(fnp);
258 	fnip = vfstofni(vp->v_vfsp);
259 	name = argsp->fnc_name;
260 	cred = argsp->fnc_cred;
261 	ASSERT(crgetzoneid(argsp->fnc_cred) == fnip->fi_zoneid);
262 
263 	error = auto_mount_request(fnip, name, &alp, cred, TRUE);
264 	if (!error)
265 		error = auto_perform_actions(fnip, fnp, alp, cred);
266 	mutex_enter(&fnp->fn_lock);
267 	fnp->fn_error = error;
268 
269 	/*
270 	 * Notify threads waiting for mount that
271 	 * it's done.
272 	 */
273 	AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
274 	mutex_exit(&fnp->fn_lock);
275 
276 	VN_RELE(vp);
277 	crfree(argsp->fnc_cred);
278 	namelen = strlen(argsp->fnc_name) + 1;
279 	kmem_free(argsp->fnc_name, namelen);
280 	kmem_free(argsp, sizeof (*argsp));
281 
282 	mutex_enter(&auto_mount_thread_cpr_lock);
283 	CALLB_CPR_EXIT(&cprinfo);
284 	mutex_destroy(&auto_mount_thread_cpr_lock);
285 	zthread_exit();
286 	/* NOTREACHED */
287 }
288 
289 static int autofs_thr_success = 0;
290 
291 /*
292  * Creates new thread which calls auto_mount_thread which does
293  * the bulk of the work calling automountd, via 'auto_perform_actions'.
294  */
295 void
296 auto_new_mount_thread(fnnode_t *fnp, char *name, cred_t *cred)
297 {
298 	struct autofs_callargs *argsp;
299 
300 	argsp = kmem_alloc(sizeof (*argsp), KM_SLEEP);
301 	VN_HOLD(fntovn(fnp));
302 	argsp->fnc_fnp = fnp;
303 	argsp->fnc_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
304 	(void) strcpy(argsp->fnc_name, name);
305 	argsp->fnc_origin = curthread;
306 	crhold(cred);
307 	argsp->fnc_cred = cred;
308 
309 	(void) zthread_create(NULL, 0, auto_mount_thread, argsp, 0,
310 	    minclsyspri);
311 	autofs_thr_success++;
312 }
313 
314 int
315 auto_calldaemon(
316 	fninfo_t *fnip,
317 	rpcproc_t which,
318 	xdrproc_t xdrargs,
319 	void *argsp,
320 	xdrproc_t xdrres,
321 	void *resp,
322 	cred_t *cred,
323 	bool_t hard)				/* retry forever? */
324 {
325 	CLIENT *client;
326 	enum clnt_stat status;
327 	struct rpc_err rpcerr;
328 	struct timeval wait;
329 	bool_t tryagain;
330 	int error = 0;
331 	k_sigset_t smask;
332 	struct autofs_globals *fngp = vntofn(fnip->fi_rootvp)->fn_globals;
333 
334 	AUTOFS_DPRINT((4, "auto_calldaemon\n"));
335 
336 	error = clnt_tli_kcreate(&fnip->fi_knconf, &fnip->fi_addr,
337 	    AUTOFS_PROG, AUTOFS_VERS, 0, INT_MAX, cred, &client);
338 
339 	if (error) {
340 		auto_log(fngp, CE_WARN, "autofs: clnt_tli_kcreate: error %d",
341 		    error);
342 		goto done;
343 	}
344 
345 	/*
346 	 * Release the old authentication handle.  It was probably
347 	 * AUTH_UNIX.
348 	 */
349 	auth_destroy(client->cl_auth);
350 
351 	/*
352 	 * Create a new authentication handle for AUTH_LOOPBACK.  This
353 	 * will allow us to correctly handle the entire groups list.
354 	 */
355 	client->cl_auth = authloopback_create();
356 	if (client->cl_auth == NULL) {
357 		clnt_destroy(client);
358 		error = EINTR;
359 		auto_log(fngp, CE_WARN,
360 		    "autofs: authloopback_create: error %d", error);
361 		goto done;
362 	}
363 
364 	wait.tv_sec = fnip->fi_rpc_to;
365 	wait.tv_usec = 0;
366 	do {
367 		tryagain = FALSE;
368 		error = 0;
369 
370 		/*
371 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
372 		 * and SIGTERM. (Preserving the existing masks)
373 		 */
374 		sigintr(&smask, 1);
375 
376 		status = CLNT_CALL(client, which, xdrargs, argsp,
377 		    xdrres, resp, wait);
378 
379 		/*
380 		 * Restore original signal mask
381 		 */
382 		sigunintr(&smask);
383 
384 		switch (status) {
385 		case RPC_SUCCESS:
386 			break;
387 
388 		case RPC_INTR:
389 			error = EINTR;
390 			break;
391 
392 		case RPC_TIMEDOUT:
393 			tryagain = TRUE;
394 			error = ETIMEDOUT;
395 			break;
396 
397 		case RPC_CANTCONNECT:
398 		case RPC_CANTCREATESTREAM:
399 			/*
400 			 * The connection could not be established
401 			 */
402 			/* fall thru */
403 		case RPC_XPRTFAILED:
404 			/*
405 			 * The connection could not be established or
406 			 * was dropped, we differentiate between the two
407 			 * conditions by calling CLNT_GETERR and look at
408 			 * rpcerror.re_errno.
409 			 * If rpcerr.re_errno == ECONNREFUSED, then the
410 			 * connection could not be established at all.
411 			 */
412 			error = ECONNREFUSED;
413 			if (status == RPC_XPRTFAILED) {
414 				CLNT_GETERR(client, &rpcerr);
415 				if (rpcerr.re_errno != ECONNREFUSED) {
416 					/*
417 					 * The connection was dropped, return
418 					 * to the caller if hard is not set.
419 					 * It is the responsability of the
420 					 * caller to retry the call if
421 					 * appropriate.
422 					 */
423 					error = ECONNRESET;
424 				}
425 			}
426 			/*
427 			 * We know that the current thread is doing work on
428 			 * behalf of its own zone, so it's ok to use
429 			 * curproc->p_zone.
430 			 */
431 			ASSERT(fngp->fng_zoneid == getzoneid());
432 			if (zone_status_get(curproc->p_zone) >=
433 			    ZONE_IS_SHUTTING_DOWN) {
434 				/*
435 				 * There's no point in trying to talk to
436 				 * automountd.  Plus, zone_shutdown() is
437 				 * waiting for us.
438 				 */
439 				tryagain = FALSE;
440 				break;
441 			}
442 			tryagain = hard;
443 			if (!fngp->fng_printed_not_running_msg) {
444 				if (tryagain) {
445 					fngp->fng_printed_not_running_msg = 1;
446 					zprintf(fngp->fng_zoneid,
447 					"automountd not running, retrying\n");
448 				}
449 			}
450 			break;
451 
452 		default:
453 			auto_log(fngp, CE_WARN, "autofs: %s",
454 			    clnt_sperrno(status));
455 			error = ENOENT;
456 			break;
457 		}
458 	} while (tryagain);
459 
460 	if (status == RPC_SUCCESS) {
461 		if (fngp->fng_printed_not_running_msg == 1) {
462 			fngp->fng_printed_not_running_msg = 0;
463 			zprintf(fngp->fng_zoneid, "automountd OK\n");
464 		}
465 	}
466 	auth_destroy(client->cl_auth);
467 	clnt_destroy(client);
468 
469 done:
470 	ASSERT(status == RPC_SUCCESS || error != 0);
471 
472 	AUTOFS_DPRINT((5, "auto_calldaemon error=%d\n", error));
473 	return (error);
474 }
475 
476 static int
477 auto_null_request(fninfo_t *fnip, cred_t *cred, bool_t hard)
478 {
479 	int error;
480 
481 	AUTOFS_DPRINT((4, "\tauto_null_request\n"));
482 
483 	error = auto_calldaemon(fnip, NULLPROC, xdr_void, NULL, xdr_void, NULL,
484 	    cred, hard);
485 
486 	AUTOFS_DPRINT((5, "\tauto_null_request: error=%d\n", error));
487 	return (error);
488 }
489 
490 static int
491 auto_lookup_request(
492 	fninfo_t *fnip,
493 	char *key,
494 	struct linka *lnp,
495 	cred_t *cred,
496 	bool_t hard,
497 	bool_t *mountreq)
498 {
499 	int error;
500 	struct autofs_globals *fngp;
501 	struct autofs_lookupargs request;
502 	struct autofs_lookupres result;
503 	struct linka *p;
504 
505 	AUTOFS_DPRINT((4, "auto_lookup_request: path=%s name=%s\n",
506 	    fnip->fi_path, key));
507 
508 	fngp = vntofn(fnip->fi_rootvp)->fn_globals;
509 	request.map = fnip->fi_map;
510 	request.path = fnip->fi_path;
511 
512 	if (fnip->fi_flags & MF_DIRECT)
513 		request.name = fnip->fi_key;
514 	else
515 		request.name = key;
516 	AUTOFS_DPRINT((4, "auto_lookup_request: using key=%s\n", request.name));
517 
518 	request.subdir = fnip->fi_subdir;
519 	request.opts = fnip->fi_opts;
520 	request.isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
521 
522 	bzero(&result, sizeof (result));
523 	error = auto_calldaemon(fnip, AUTOFS_LOOKUP,
524 	    xdr_autofs_lookupargs, &request,
525 	    xdr_autofs_lookupres, &result,
526 	    cred, hard);
527 	if (!error) {
528 		fngp->fng_verbose = result.lu_verbose;
529 		switch (result.lu_res) {
530 		case AUTOFS_OK:
531 			switch (result.lu_type.action) {
532 			case AUTOFS_MOUNT_RQ:
533 				lnp->link = NULL;
534 				lnp->dir = NULL;
535 				*mountreq = TRUE;
536 				break;
537 			case AUTOFS_LINK_RQ:
538 				p =
539 				&result.lu_type.lookup_result_type_u.lt_linka;
540 				lnp->dir = kmem_alloc(strlen(p->dir) + 1,
541 				    KM_SLEEP);
542 				(void) strcpy(lnp->dir, p->dir);
543 				lnp->link = kmem_alloc(strlen(p->link) + 1,
544 				    KM_SLEEP);
545 				(void) strcpy(lnp->link, p->link);
546 				break;
547 			case AUTOFS_NONE:
548 				lnp->link = NULL;
549 				lnp->dir = NULL;
550 				break;
551 			default:
552 				auto_log(fngp, CE_WARN,
553 				    "auto_lookup_request: bad action type %d",
554 				    result.lu_res);
555 				error = ENOENT;
556 			}
557 			break;
558 		case AUTOFS_NOENT:
559 			error = ENOENT;
560 			break;
561 		default:
562 			error = ENOENT;
563 			auto_log(fngp, CE_WARN,
564 			    "auto_lookup_request: unknown result: %d",
565 			    result.lu_res);
566 			break;
567 		}
568 	}
569 
570 done:
571 	xdr_free(xdr_autofs_lookupres, (char *)&result);
572 
573 	AUTOFS_DPRINT((5, "auto_lookup_request: path=%s name=%s error=%d\n",
574 	    fnip->fi_path, key, error));
575 	return (error);
576 }
577 
578 static int
579 auto_mount_request(
580 	fninfo_t *fnip,
581 	char *key,
582 	action_list **alpp,
583 	cred_t *cred,
584 	bool_t hard)
585 {
586 	int error;
587 	struct autofs_globals *fngp;
588 	struct autofs_lookupargs request;
589 	struct autofs_mountres *result;
590 
591 	AUTOFS_DPRINT((4, "auto_mount_request: path=%s name=%s\n",
592 	    fnip->fi_path, key));
593 
594 	fngp = vntofn(fnip->fi_rootvp)->fn_globals;
595 	request.map = fnip->fi_map;
596 	request.path = fnip->fi_path;
597 
598 	if (fnip->fi_flags & MF_DIRECT)
599 		request.name = fnip->fi_key;
600 	else
601 		request.name = key;
602 	AUTOFS_DPRINT((4, "auto_mount_request: using key=%s\n", request.name));
603 
604 	request.subdir = fnip->fi_subdir;
605 	request.opts = fnip->fi_opts;
606 	request.isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
607 
608 	*alpp = NULL;
609 	result = kmem_zalloc(sizeof (*result), KM_SLEEP);
610 	error = auto_calldaemon(fnip, AUTOFS_MOUNT,
611 	    xdr_autofs_lookupargs, &request,
612 	    xdr_autofs_mountres, result,
613 	    cred, hard);
614 	if (!error) {
615 		fngp->fng_verbose = result->mr_verbose;
616 		switch (result->mr_type.status) {
617 		case AUTOFS_ACTION:
618 			error = 0;
619 			/*
620 			 * Save the action list since it is used by
621 			 * the caller. We NULL the action list pointer
622 			 * in 'result' so that xdr_free() will not free
623 			 * the list.
624 			 */
625 			*alpp = result->mr_type.mount_result_type_u.list;
626 			result->mr_type.mount_result_type_u.list = NULL;
627 			break;
628 		case AUTOFS_DONE:
629 			error = result->mr_type.mount_result_type_u.error;
630 			break;
631 		default:
632 			error = ENOENT;
633 			auto_log(fngp, CE_WARN,
634 			    "auto_mount_request: unknown status %d",
635 			    result->mr_type.status);
636 			break;
637 		}
638 	}
639 
640 	xdr_free(xdr_autofs_mountres, (char *)result);
641 	kmem_free(result, sizeof (*result));
642 
643 	AUTOFS_DPRINT((5, "auto_mount_request: path=%s name=%s error=%d\n",
644 	    fnip->fi_path, key, error));
645 	return (error);
646 }
647 
648 
649 static int
650 auto_send_unmount_request(
651 	fninfo_t *fnip,
652 	umntrequest *ul,
653 	cred_t *cred,
654 	bool_t hard)
655 {
656 	int error;
657 	umntres result;
658 
659 	AUTOFS_DPRINT((4, "\tauto_send_unmount_request: fstype=%s "
660 			" mntpnt=%s\n", ul->fstype, ul->mntpnt));
661 
662 	error = auto_calldaemon(fnip, AUTOFS_UNMOUNT,
663 	    xdr_umntrequest, ul,
664 	    xdr_umntres, &result,
665 	    cred, hard);
666 	if (!error)
667 		error = result.status;
668 
669 	AUTOFS_DPRINT((5, "\tauto_send_unmount_request: error=%d\n", error));
670 
671 	return (error);
672 }
673 
674 static int
675 auto_perform_link(fnnode_t *fnp, struct linka *linkp, cred_t *cred)
676 {
677 	vnode_t *vp;
678 	size_t len;
679 	char *tmp;
680 
681 	AUTOFS_DPRINT((3, "auto_perform_link: fnp=%p dir=%s link=%s\n",
682 	    (void *)fnp, linkp->dir, linkp->link));
683 
684 	len = strlen(linkp->link) + 1;		/* include '\0' */
685 	tmp = kmem_zalloc(len, KM_SLEEP);
686 	(void) kcopy(linkp->link, tmp, len);
687 	mutex_enter(&fnp->fn_lock);
688 	fnp->fn_symlink = tmp;
689 	fnp->fn_symlinklen = (uint_t)len;
690 	fnp->fn_flags |= MF_THISUID_MATCH_RQD;
691 	crhold(cred);
692 	fnp->fn_cred = cred;
693 	mutex_exit(&fnp->fn_lock);
694 
695 	vp = fntovn(fnp);
696 	vp->v_type = VLNK;
697 
698 	return (0);
699 }
700 
701 static boolean_t
702 auto_invalid_action(fninfo_t *dfnip, fnnode_t *dfnp, action_list *p)
703 {
704 	struct mounta *m;
705 	struct autofs_args *argsp;
706 	vnode_t *dvp;
707 	char buff[AUTOFS_MAXPATHLEN];
708 	size_t len;
709 	struct autofs_globals *fngp;
710 
711 	fngp = dfnp->fn_globals;
712 	dvp = fntovn(dfnp);
713 	/*
714 	 * Before we go any further, this better be a mount request.
715 	 */
716 	if (p->action.action != AUTOFS_MOUNT_RQ)
717 		return (B_TRUE);
718 	m = &p->action.action_list_entry_u.mounta;
719 	/*
720 	 * Make sure we aren't geting passed NULL values or a "dir" that
721 	 * isn't "." and doesn't begin with "./".
722 	 *
723 	 * We also only want to perform autofs mounts, so make sure
724 	 * no-one is trying to trick us into doing anything else.
725 	 */
726 	if (m->spec == NULL || m->dir == NULL || m->dir[0] != '.' ||
727 	    (m->dir[1] != '/' && m->dir[1] != '\0') ||
728 	    m->fstype == NULL || strcmp(m->fstype, "autofs") != 0 ||
729 	    m->dataptr == NULL || m->datalen != sizeof (struct autofs_args) ||
730 	    m->optptr == NULL)
731 		return (B_TRUE);
732 	/*
733 	 * We also don't like ".."s in the pathname.  Symlinks are
734 	 * handled by the fact that we'll use NOFOLLOW when we do
735 	 * lookup()s.
736 	 */
737 	if (strstr(m->dir, "/../") != NULL ||
738 	    (len = strlen(m->dir)) > sizeof ("/..") - 1 &&
739 	    m->dir[len] == '.' && m->dir[len - 1] == '.' &&
740 	    m->dir[len - 2] == '/')
741 		return (B_TRUE);
742 	argsp = (struct autofs_args *)m->dataptr;
743 	/*
744 	 * We don't want NULL values here either.
745 	 */
746 	if (argsp->addr.buf == NULL || argsp->path == NULL ||
747 	    argsp->opts == NULL || argsp->map == NULL || argsp->subdir == NULL)
748 		return (B_TRUE);
749 	/*
750 	 * We know what the claimed pathname *should* look like:
751 	 *
752 	 * If the parent (dfnp) is a mount point (VROOT), then
753 	 * the path should be (dfnip->fi_path + m->dir).
754 	 *
755 	 * Else, we know we're only two levels deep, so we use
756 	 * (dfnip->fi_path + dfnp->fn_name + m->dir).
757 	 *
758 	 * Furthermore, "." only makes sense if dfnp is a
759 	 * trigger node.
760 	 *
761 	 * At this point it seems like the passed-in path is
762 	 * redundant.
763 	 */
764 	if (dvp->v_flag & VROOT) {
765 		if (m->dir[1] == '\0' && !(dfnp->fn_flags & MF_TRIGGER))
766 			return (B_TRUE);
767 		(void) snprintf(buff, sizeof (buff), "%s%s",
768 		    dfnip->fi_path, m->dir + 1);
769 	} else {
770 		(void) snprintf(buff, sizeof (buff), "%s/%s%s",
771 		    dfnip->fi_path, dfnp->fn_name, m->dir + 1);
772 	}
773 	if (strcmp(argsp->path, buff) != 0) {
774 		auto_log(fngp, CE_WARN, "autofs: expected path of '%s', "
775 		    "got '%s' instead.", buff, argsp->path);
776 		return (B_TRUE);
777 	}
778 	return (B_FALSE); /* looks OK */
779 }
780 
781 static int
782 auto_perform_actions(
783 	fninfo_t *dfnip,
784 	fnnode_t *dfnp,
785 	action_list *alp,
786 	cred_t *cred)	/* Credentials of the caller */
787 {
788 	action_list *p;
789 	struct mounta *m, margs;
790 	struct autofs_args *argsp;
791 	int error, success = 0;
792 	vnode_t *mvp, *dvp, *newvp;
793 	fnnode_t *newfnp, *mfnp;
794 	int auto_mount = 0;
795 	int save_triggers = 0;		/* set when we need to save at least */
796 					/* one trigger node */
797 	int update_times = 0;
798 	char *mntpnt;
799 	char buff[AUTOFS_MAXPATHLEN];
800 	timestruc_t now;
801 	struct autofs_globals *fngp;
802 	cred_t *zcred;	/* kcred-like credentials limited by our zone */
803 
804 	AUTOFS_DPRINT((4, "auto_perform_actions: alp=%p\n", (void *)alp));
805 
806 	fngp = dfnp->fn_globals;
807 	dvp = fntovn(dfnp);
808 
809 	/*
810 	 * As automountd running in a zone may be compromised, and this may be
811 	 * an attack, we can't trust everything passed in by automountd, and we
812 	 * need to do argument verification.  We'll issue a warning and drop
813 	 * the request if it doesn't seem right.
814 	 */
815 	for (p = alp; p != NULL; p = p->next) {
816 		if (auto_invalid_action(dfnip, dfnp, p)) {
817 			/*
818 			 * This warning should be sent to the global zone,
819 			 * since presumably the zone administrator is the same
820 			 * as the attacker.
821 			 */
822 			cmn_err(CE_WARN, "autofs: invalid action list received "
823 			    "by automountd in zone %s.",
824 			    curproc->p_zone->zone_name);
825 			/*
826 			 * This conversation is over.
827 			 */
828 			xdr_free(xdr_action_list, (char *)alp);
829 			return (EINVAL);
830 		}
831 	}
832 
833 	zcred = zone_get_kcred(getzoneid());
834 	ASSERT(zcred != NULL);
835 
836 	if (vn_mountedvfs(dvp) != NULL) {
837 		/*
838 		 * The daemon successfully mounted a filesystem
839 		 * on the AUTOFS root node.
840 		 */
841 		mutex_enter(&dfnp->fn_lock);
842 		dfnp->fn_flags |= MF_MOUNTPOINT;
843 		ASSERT(dfnp->fn_dirents == NULL);
844 		mutex_exit(&dfnp->fn_lock);
845 		success++;
846 	} else {
847 		/*
848 		 * Clear MF_MOUNTPOINT.
849 		 */
850 		mutex_enter(&dfnp->fn_lock);
851 		if (dfnp->fn_flags & MF_MOUNTPOINT) {
852 			AUTOFS_DPRINT((10, "autofs: clearing mountpoint "
853 			    "flag on %s.", dfnp->fn_name));
854 			ASSERT(dfnp->fn_dirents == NULL);
855 			ASSERT(dfnp->fn_trigger == NULL);
856 		}
857 		dfnp->fn_flags &= ~MF_MOUNTPOINT;
858 		mutex_exit(&dfnp->fn_lock);
859 	}
860 
861 	for (p = alp; p != NULL; p = p->next) {
862 		vfs_t *vfsp;	/* dummy argument */
863 		vfs_t *mvfsp;
864 
865 		auto_mount = 0;
866 
867 		m = &p->action.action_list_entry_u.mounta;
868 		argsp = (struct autofs_args *)m->dataptr;
869 		/*
870 		 * use the parent directory's timeout since it's the
871 		 * one specified/inherited by automount.
872 		 */
873 		argsp->mount_to = dfnip->fi_mount_to;
874 		/*
875 		 * The mountpoint is relative, and it is guaranteed to
876 		 * begin with "."
877 		 *
878 		 */
879 		ASSERT(m->dir[0] == '.');
880 		if (m->dir[0] == '.' && m->dir[1] == '\0') {
881 			/*
882 			 * mounting on the trigger node
883 			 */
884 			mvp = dvp;
885 			VN_HOLD(mvp);
886 			goto mount;
887 		}
888 		/*
889 		 * ignore "./" in front of mountpoint
890 		 */
891 		ASSERT(m->dir[1] == '/');
892 		mntpnt = m->dir + 2;
893 
894 		AUTOFS_DPRINT((10, "\tdfnip->fi_path=%s\n", dfnip->fi_path));
895 		AUTOFS_DPRINT((10, "\tdfnip->fi_flags=%x\n", dfnip->fi_flags));
896 		AUTOFS_DPRINT((10, "\tmntpnt=%s\n", mntpnt));
897 
898 		if (dfnip->fi_flags & MF_DIRECT) {
899 			AUTOFS_DPRINT((10, "\tDIRECT\n"));
900 			(void) sprintf(buff, "%s/%s", dfnip->fi_path, mntpnt);
901 		} else {
902 			AUTOFS_DPRINT((10, "\tINDIRECT\n"));
903 			(void) sprintf(buff, "%s/%s/%s", dfnip->fi_path,
904 			    dfnp->fn_name, mntpnt);
905 		}
906 
907 		if (vn_mountedvfs(dvp) == NULL) {
908 			/*
909 			 * Daemon didn't mount anything on the root
910 			 * We have to create the mountpoint if it doesn't
911 			 * exist already
912 			 *
913 			 * We use the caller's credentials in case a UID-match
914 			 * is required (MF_THISUID_MATCH_RQD).
915 			 */
916 			rw_enter(&dfnp->fn_rwlock, RW_WRITER);
917 			error = auto_search(dfnp, mntpnt, &mfnp, cred);
918 			if (error == 0) {
919 				/*
920 				 * AUTOFS mountpoint exists
921 				 */
922 				if (vn_mountedvfs(fntovn(mfnp)) != NULL) {
923 					cmn_err(CE_PANIC,
924 					    "auto_perform_actions: "
925 					    "mfnp=%p covered", (void *)mfnp);
926 				}
927 			} else {
928 				/*
929 				 * Create AUTOFS mountpoint
930 				 */
931 				ASSERT((dfnp->fn_flags & MF_MOUNTPOINT) == 0);
932 				error = auto_enter(dfnp, mntpnt, &mfnp, cred);
933 				ASSERT(mfnp->fn_linkcnt == 1);
934 				mfnp->fn_linkcnt++;
935 			}
936 			if (!error)
937 				update_times = 1;
938 			rw_exit(&dfnp->fn_rwlock);
939 			ASSERT(error != EEXIST);
940 			if (!error) {
941 				/*
942 				 * mfnp is already held.
943 				 */
944 				mvp = fntovn(mfnp);
945 			} else {
946 				auto_log(fngp, CE_WARN, "autofs: mount of %s "
947 				    "failed - can't create mountpoint.", buff);
948 				continue;
949 			}
950 		} else {
951 			/*
952 			 * Find mountpoint in VFS mounted here. If not found,
953 			 * fail the submount, though the overall mount has
954 			 * succeeded since the root is mounted.
955 			 */
956 			if (error = auto_getmntpnt(dvp, mntpnt, &mvp, kcred)) {
957 				auto_log(fngp, CE_WARN, "autofs: mount of %s "
958 				    "failed - mountpoint doesn't exist.", buff);
959 				continue;
960 			}
961 			if (mvp->v_type == VLNK) {
962 				auto_log(fngp, CE_WARN, "autofs: %s symbolic "
963 				    "link: not a valid mountpoint "
964 				    "- mount failed", buff);
965 				VN_RELE(mvp);
966 				error = ENOENT;
967 				continue;
968 			}
969 		}
970 mount:
971 		m->flags |= MS_SYSSPACE | MS_OPTIONSTR;
972 		/*
973 		 * Copy mounta struct here so we can substitute a buffer
974 		 * that is large enough to hold the returned option string,
975 		 * if that string is longer that the input option string.
976 		 * This can happen if there are default options enabled
977 		 * that were not in the input option string.
978 		 */
979 		bcopy(m, &margs, sizeof (*m));
980 		margs.optptr = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
981 		margs.optlen = MAX_MNTOPT_STR;
982 		(void) strcpy(margs.optptr, m->optptr);
983 		margs.dir = argsp->path;
984 		/*
985 		 * We use the zone's kcred because we don't want the zone to be
986 		 * able to thus do something it wouldn't normally be able to.
987 		 */
988 		error = domount(NULL, &margs, mvp, zcred, &vfsp);
989 		kmem_free(margs.optptr, MAX_MNTOPT_STR);
990 		if (error != 0) {
991 			auto_log(fngp, CE_WARN,
992 			    "autofs: domount of %s failed error=%d",
993 			    buff, error);
994 			VN_RELE(mvp);
995 			continue;
996 		}
997 		VFS_RELE(vfsp);
998 
999 		/*
1000 		 * If mountpoint is an AUTOFS node, then I'm going to
1001 		 * flag it that the Filesystem mounted on top was mounted
1002 		 * in the kernel so that the unmount can be done inside the
1003 		 * kernel as well.
1004 		 * I don't care to flag non-AUTOFS mountpoints when an AUTOFS
1005 		 * in-kernel mount was done on top, because the unmount
1006 		 * routine already knows that such case was done in the kernel.
1007 		 */
1008 		if (vfs_matchops(dvp->v_vfsp, vfs_getops(mvp->v_vfsp))) {
1009 			mfnp = vntofn(mvp);
1010 			mutex_enter(&mfnp->fn_lock);
1011 			mfnp->fn_flags |= MF_IK_MOUNT;
1012 			mutex_exit(&mfnp->fn_lock);
1013 		}
1014 
1015 		(void) vn_vfswlock_wait(mvp);
1016 		mvfsp = vn_mountedvfs(mvp);
1017 		if (mvfsp != NULL) {
1018 			vfs_lock_wait(mvfsp);
1019 			vn_vfsunlock(mvp);
1020 			error = VFS_ROOT(mvfsp, &newvp);
1021 			vfs_unlock(mvfsp);
1022 			if (error) {
1023 				/*
1024 				 * We've dropped the locks, so let's get
1025 				 * the mounted vfs again in case it changed.
1026 				 */
1027 				(void) vn_vfswlock_wait(mvp);
1028 				mvfsp = vn_mountedvfs(mvp);
1029 				if (mvfsp != NULL) {
1030 					error = dounmount(mvfsp, 0, CRED());
1031 					if (error) {
1032 						cmn_err(CE_WARN,
1033 						    "autofs: could not "
1034 						    "unmount vfs=%p",
1035 						(void *)mvfsp);
1036 					}
1037 				} else
1038 					vn_vfsunlock(mvp);
1039 				VN_RELE(mvp);
1040 				continue;
1041 			}
1042 		} else {
1043 			vn_vfsunlock(mvp);
1044 			VN_RELE(mvp);
1045 			continue;
1046 		}
1047 
1048 		auto_mount = vfs_matchops(dvp->v_vfsp,
1049 						vfs_getops(newvp->v_vfsp));
1050 		newfnp = vntofn(newvp);
1051 		newfnp->fn_parent = dfnp;
1052 
1053 		/*
1054 		 * At this time we want to save the AUTOFS filesystem as
1055 		 * a trigger node. (We only do this if the mount occured
1056 		 * on a node different from the root.
1057 		 * We look at the trigger nodes during
1058 		 * the automatic unmounting to make sure we remove them
1059 		 * as a unit and remount them as a unit if the filesystem
1060 		 * mounted at the root could not be unmounted.
1061 		 */
1062 		if (auto_mount && (error == 0) && (mvp != dvp)) {
1063 			save_triggers++;
1064 			/*
1065 			 * Add AUTOFS mount to hierarchy
1066 			 */
1067 			newfnp->fn_flags |= MF_TRIGGER;
1068 			rw_enter(&newfnp->fn_rwlock, RW_WRITER);
1069 			newfnp->fn_next = dfnp->fn_trigger;
1070 			rw_exit(&newfnp->fn_rwlock);
1071 			rw_enter(&dfnp->fn_rwlock, RW_WRITER);
1072 			dfnp->fn_trigger = newfnp;
1073 			rw_exit(&dfnp->fn_rwlock);
1074 			/*
1075 			 * Don't VN_RELE(newvp) here since dfnp now holds
1076 			 * reference to it as its trigger node.
1077 			 */
1078 			AUTOFS_DPRINT((10, "\tadding trigger %s to %s\n",
1079 			    newfnp->fn_name, dfnp->fn_name));
1080 			AUTOFS_DPRINT((10, "\tfirst trigger is %s\n",
1081 			    dfnp->fn_trigger->fn_name));
1082 			if (newfnp->fn_next != NULL)
1083 				AUTOFS_DPRINT((10, "\tnext trigger is %s\n",
1084 				    newfnp->fn_next->fn_name));
1085 			else
1086 				AUTOFS_DPRINT((10, "\tno next trigger\n"));
1087 		} else
1088 			VN_RELE(newvp);
1089 
1090 		if (!error)
1091 			success++;
1092 
1093 		if (update_times) {
1094 			gethrestime(&now);
1095 			dfnp->fn_atime = dfnp->fn_mtime = now;
1096 		}
1097 
1098 		VN_RELE(mvp);
1099 	}
1100 
1101 	if (save_triggers) {
1102 		/*
1103 		 * Make sure the parent can't be freed while it has triggers.
1104 		 */
1105 		VN_HOLD(dvp);
1106 	}
1107 
1108 	crfree(zcred);
1109 
1110 done:
1111 	/*
1112 	 * Return failure if daemon didn't mount anything, and all
1113 	 * kernel mounts attempted failed.
1114 	 */
1115 	error = success ? 0 : ENOENT;
1116 
1117 	if (alp != NULL) {
1118 		if ((error == 0) && save_triggers) {
1119 			/*
1120 			 * Save action_list information, so that we can use it
1121 			 * when it comes time to remount the trigger nodes
1122 			 * The action list is freed when the directory node
1123 			 * containing the reference to it is unmounted in
1124 			 * unmount_tree().
1125 			 */
1126 			mutex_enter(&dfnp->fn_lock);
1127 			ASSERT(dfnp->fn_alp == NULL);
1128 			dfnp->fn_alp = alp;
1129 			mutex_exit(&dfnp->fn_lock);
1130 		} else {
1131 			/*
1132 			 * free the action list now,
1133 			 */
1134 			xdr_free(xdr_action_list, (char *)alp);
1135 		}
1136 	}
1137 
1138 	AUTOFS_DPRINT((5, "auto_perform_actions: error=%d\n", error));
1139 	return (error);
1140 }
1141 
1142 fnnode_t *
1143 auto_makefnnode(
1144 	vtype_t type,
1145 	vfs_t *vfsp,
1146 	char *name,
1147 	cred_t *cred,
1148 	struct autofs_globals *fngp)
1149 {
1150 	fnnode_t *fnp;
1151 	vnode_t *vp;
1152 	char *tmpname;
1153 	timestruc_t now;
1154 	/*
1155 	 * autofs uses odd inode numbers
1156 	 * automountd uses even inode numbers
1157 	 *
1158 	 * To preserve the age-old semantics that inum+devid is unique across
1159 	 * the system, this variable must be global across zones.
1160 	 */
1161 	static ino_t nodeid = 3;
1162 
1163 	fnp = kmem_zalloc(sizeof (*fnp), KM_SLEEP);
1164 	fnp->fn_vnode = vn_alloc(KM_SLEEP);
1165 
1166 	vp = fntovn(fnp);
1167 	tmpname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1168 	(void) strcpy(tmpname, name);
1169 	fnp->fn_name = &tmpname[0];
1170 	fnp->fn_namelen = (int)strlen(tmpname) + 1;	/* include '\0' */
1171 	fnp->fn_uid = crgetuid(cred);
1172 	fnp->fn_gid = crgetgid(cred);
1173 	/*
1174 	 * ".." is added in auto_enter and auto_mount.
1175 	 * "." is added in auto_mkdir and auto_mount.
1176 	 */
1177 	/*
1178 	 * Note that fn_size and fn_linkcnt are already 0 since
1179 	 * we used kmem_zalloc to allocated fnp
1180 	 */
1181 	fnp->fn_mode = AUTOFS_MODE;
1182 	gethrestime(&now);
1183 	fnp->fn_atime = fnp->fn_mtime = fnp->fn_ctime = now;
1184 	fnp->fn_ref_time = now.tv_sec;
1185 	mutex_enter(&autofs_nodeid_lock);
1186 	fnp->fn_nodeid = nodeid;
1187 	nodeid += 2;
1188 	fnp->fn_globals = fngp;
1189 	fngp->fng_fnnode_count++;
1190 	mutex_exit(&autofs_nodeid_lock);
1191 	vn_setops(vp, auto_vnodeops);
1192 	vp->v_type = type;
1193 	vp->v_data = (void *)fnp;
1194 	vp->v_vfsp = vfsp;
1195 	mutex_init(&fnp->fn_lock, NULL, MUTEX_DEFAULT, NULL);
1196 	rw_init(&fnp->fn_rwlock, NULL, RW_DEFAULT, NULL);
1197 	cv_init(&fnp->fn_cv_mount, NULL, CV_DEFAULT, NULL);
1198 	vn_exists(vp);
1199 	return (fnp);
1200 }
1201 
1202 
1203 void
1204 auto_freefnnode(fnnode_t *fnp)
1205 {
1206 	vnode_t *vp = fntovn(fnp);
1207 
1208 	AUTOFS_DPRINT((4, "auto_freefnnode: fnp=%p\n", (void *)fnp));
1209 
1210 	ASSERT(fnp->fn_linkcnt == 0);
1211 	ASSERT(vp->v_count == 0);
1212 	ASSERT(fnp->fn_dirents == NULL);
1213 	ASSERT(fnp->fn_parent == NULL);
1214 
1215 	vn_invalid(vp);
1216 	kmem_free(fnp->fn_name, fnp->fn_namelen);
1217 	if (fnp->fn_symlink) {
1218 		ASSERT(fnp->fn_flags & MF_THISUID_MATCH_RQD);
1219 		kmem_free(fnp->fn_symlink, fnp->fn_symlinklen);
1220 	}
1221 	if (fnp->fn_cred)
1222 		crfree(fnp->fn_cred);
1223 	mutex_destroy(&fnp->fn_lock);
1224 	rw_destroy(&fnp->fn_rwlock);
1225 	cv_destroy(&fnp->fn_cv_mount);
1226 	vn_free(vp);
1227 
1228 	mutex_enter(&autofs_nodeid_lock);
1229 	fnp->fn_globals->fng_fnnode_count--;
1230 	mutex_exit(&autofs_nodeid_lock);
1231 	kmem_free(fnp, sizeof (*fnp));
1232 }
1233 
1234 void
1235 auto_disconnect(
1236 	fnnode_t *dfnp,
1237 	fnnode_t *fnp)
1238 {
1239 	fnnode_t *tmp, **fnpp;
1240 	vnode_t *vp = fntovn(fnp);
1241 	timestruc_t now;
1242 
1243 	AUTOFS_DPRINT((4,
1244 	    "auto_disconnect: dfnp=%p fnp=%p linkcnt=%d\n v_count=%d",
1245 	    (void *)dfnp, (void *)fnp, fnp->fn_linkcnt, vp->v_count));
1246 
1247 	ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock));
1248 	ASSERT(fnp->fn_linkcnt == 1);
1249 
1250 	if (vn_mountedvfs(vp) != NULL) {
1251 		cmn_err(CE_PANIC, "auto_disconnect: vp %p mounted on",
1252 		    (void *)vp);
1253 	}
1254 
1255 	/*
1256 	 * Decrement by 1 because we're removing the entry in dfnp.
1257 	 */
1258 	fnp->fn_linkcnt--;
1259 	fnp->fn_size--;
1260 
1261 	/*
1262 	 * only changed while holding parent's (dfnp) rw_lock
1263 	 */
1264 	fnp->fn_parent = NULL;
1265 
1266 	fnpp = &dfnp->fn_dirents;
1267 	for (;;) {
1268 		tmp = *fnpp;
1269 		if (tmp == NULL) {
1270 			cmn_err(CE_PANIC,
1271 			    "auto_disconnect: %p not in %p dirent list",
1272 			    (void *)fnp, (void *)dfnp);
1273 		}
1274 		if (tmp == fnp) {
1275 			*fnpp = tmp->fn_next; 	/* remove it from the list */
1276 			ASSERT(vp->v_count == 0);
1277 			/* child had a pointer to parent ".." */
1278 			dfnp->fn_linkcnt--;
1279 			dfnp->fn_size--;
1280 			break;
1281 		}
1282 		fnpp = &tmp->fn_next;
1283 	}
1284 
1285 	mutex_enter(&fnp->fn_lock);
1286 	gethrestime(&now);
1287 	fnp->fn_atime = fnp->fn_mtime = now;
1288 	mutex_exit(&fnp->fn_lock);
1289 
1290 	AUTOFS_DPRINT((5, "auto_disconnect: done\n"));
1291 }
1292 
1293 int
1294 auto_enter(fnnode_t *dfnp, char *name, fnnode_t **fnpp, cred_t *cred)
1295 {
1296 	struct fnnode *cfnp, **spp;
1297 	vnode_t *dvp = fntovn(dfnp);
1298 	ushort_t offset = 0;
1299 	ushort_t diff;
1300 
1301 	AUTOFS_DPRINT((4, "auto_enter: dfnp=%p, name=%s ", (void *)dfnp, name));
1302 
1303 	ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock));
1304 
1305 	cfnp = dfnp->fn_dirents;
1306 	if (cfnp == NULL) {
1307 		/*
1308 		 * offset = 0 for '.' and offset = 1 for '..'
1309 		 */
1310 		spp = &dfnp->fn_dirents;
1311 		offset = 2;
1312 	}
1313 
1314 	for (; cfnp; cfnp = cfnp->fn_next) {
1315 		if (strcmp(cfnp->fn_name, name) == 0) {
1316 			mutex_enter(&cfnp->fn_lock);
1317 			if (cfnp->fn_flags & MF_THISUID_MATCH_RQD) {
1318 				/*
1319 				 * "thisuser" kind of node, need to
1320 				 * match CREDs as well
1321 				 */
1322 				mutex_exit(&cfnp->fn_lock);
1323 				if (crcmp(cfnp->fn_cred, cred) == 0)
1324 					return (EEXIST);
1325 			} else {
1326 				mutex_exit(&cfnp->fn_lock);
1327 				return (EEXIST);
1328 			}
1329 		}
1330 
1331 		if (cfnp->fn_next != NULL) {
1332 			diff = (ushort_t)
1333 			    (cfnp->fn_next->fn_offset - cfnp->fn_offset);
1334 			ASSERT(diff != 0);
1335 			if (diff > 1 && offset == 0) {
1336 				offset = (ushort_t)cfnp->fn_offset + 1;
1337 				spp = &cfnp->fn_next;
1338 			}
1339 		} else if (offset == 0) {
1340 			offset = (ushort_t)cfnp->fn_offset + 1;
1341 			spp = &cfnp->fn_next;
1342 		}
1343 	}
1344 
1345 	*fnpp = auto_makefnnode(VDIR, dvp->v_vfsp, name, cred,
1346 	    dfnp->fn_globals);
1347 	if (*fnpp == NULL)
1348 		return (ENOMEM);
1349 
1350 	/*
1351 	 * I don't hold the mutex on fnpp because I created it, and
1352 	 * I'm already holding the writers lock for it's parent
1353 	 * directory, therefore nobody can reference it without me first
1354 	 * releasing the writers lock.
1355 	 */
1356 	(*fnpp)->fn_offset = offset;
1357 	(*fnpp)->fn_next = *spp;
1358 	*spp = *fnpp;
1359 	(*fnpp)->fn_parent = dfnp;
1360 	(*fnpp)->fn_linkcnt++;	/* parent now holds reference to entry */
1361 	(*fnpp)->fn_size++;
1362 
1363 	/*
1364 	 * dfnp->fn_linkcnt and dfnp->fn_size protected by dfnp->rw_lock
1365 	 */
1366 	dfnp->fn_linkcnt++;	/* child now holds reference to parent '..' */
1367 	dfnp->fn_size++;
1368 
1369 	dfnp->fn_ref_time = gethrestime_sec();
1370 
1371 	AUTOFS_DPRINT((5, "*fnpp=%p\n", (void *)*fnpp));
1372 	return (0);
1373 }
1374 
1375 int
1376 auto_search(fnnode_t *dfnp, char *name, fnnode_t **fnpp, cred_t *cred)
1377 {
1378 	vnode_t *dvp;
1379 	fnnode_t *p;
1380 	int error = ENOENT, match = 0;
1381 
1382 	AUTOFS_DPRINT((4, "auto_search: dfnp=%p, name=%s...\n",
1383 	    (void *)dfnp, name));
1384 
1385 	dvp = fntovn(dfnp);
1386 	if (dvp->v_type != VDIR) {
1387 		cmn_err(CE_PANIC, "auto_search: dvp=%p not a directory",
1388 		    (void *)dvp);
1389 	}
1390 
1391 	ASSERT(RW_LOCK_HELD(&dfnp->fn_rwlock));
1392 	for (p = dfnp->fn_dirents; p != NULL; p = p->fn_next) {
1393 		if (strcmp(p->fn_name, name) == 0) {
1394 			mutex_enter(&p->fn_lock);
1395 			if (p->fn_flags & MF_THISUID_MATCH_RQD) {
1396 				/*
1397 				 * "thisuser" kind of node
1398 				 * Need to match CREDs as well
1399 				 */
1400 				mutex_exit(&p->fn_lock);
1401 				match = crcmp(p->fn_cred, cred) == 0;
1402 			} else {
1403 				/*
1404 				 * No need to check CRED
1405 				 */
1406 				mutex_exit(&p->fn_lock);
1407 				match = 1;
1408 			}
1409 		}
1410 		if (match) {
1411 			error = 0;
1412 			if (fnpp) {
1413 				*fnpp = p;
1414 				VN_HOLD(fntovn(*fnpp));
1415 			}
1416 			break;
1417 		}
1418 	}
1419 
1420 	AUTOFS_DPRINT((5, "auto_search: error=%d\n", error));
1421 	return (error);
1422 }
1423 
1424 /*
1425  * If dvp is mounted on, get path's vnode in the mounted on
1426  * filesystem.  Path is relative to dvp, ie "./path".
1427  * If successful, *mvp points to a the held mountpoint vnode.
1428  */
1429 /* ARGSUSED */
1430 static int
1431 auto_getmntpnt(
1432 	vnode_t *dvp,
1433 	char *path,
1434 	vnode_t **mvpp,		/* vnode for mountpoint */
1435 	cred_t *cred)
1436 {
1437 	int error = 0;
1438 	vnode_t *newvp;
1439 	char namebuf[TYPICALMAXPATHLEN];
1440 	struct pathname lookpn;
1441 	vfs_t *vfsp;
1442 
1443 	AUTOFS_DPRINT((4, "auto_getmntpnt: path=%s\n", path));
1444 
1445 	if (error = vn_vfswlock_wait(dvp))
1446 		return (error);
1447 
1448 	/*
1449 	 * Now that we have the vfswlock, check to see if dvp
1450 	 * is still mounted on.  If not, then just bail out as
1451 	 * there is no need to remount the triggers since the
1452 	 * higher level mount point has gotten unmounted.
1453 	 */
1454 	vfsp = vn_mountedvfs(dvp);
1455 	if (vfsp == NULL) {
1456 		vn_vfsunlock(dvp);
1457 		error = EBUSY;
1458 		goto done;
1459 	}
1460 	/*
1461 	 * Since mounted on, lookup "path" in the new filesystem,
1462 	 * it is important that we do the filesystem jump here to
1463 	 * avoid lookuppn() calling auto_lookup on dvp and deadlock.
1464 	 */
1465 	vfs_lock_wait(vfsp);
1466 	vn_vfsunlock(dvp);
1467 	error = VFS_ROOT(vfsp, &newvp);
1468 	vfs_unlock(vfsp);
1469 	if (error)
1470 		goto done;
1471 
1472 	/*
1473 	 * We do a VN_HOLD on newvp just in case the first call to
1474 	 * lookuppnvp() fails with ENAMETOOLONG.  We should still have a
1475 	 * reference to this vnode for the second call to lookuppnvp().
1476 	 */
1477 	VN_HOLD(newvp);
1478 
1479 	/*
1480 	 * Now create the pathname struct so we can make use of lookuppnvp,
1481 	 * and pn_getcomponent.
1482 	 * This code is similar to lookupname() in fs/lookup.c.
1483 	 */
1484 	error = pn_get_buf(path, UIO_SYSSPACE, &lookpn,
1485 		namebuf, sizeof (namebuf));
1486 	if (error == 0) {
1487 		error = lookuppnvp(&lookpn, NULL, NO_FOLLOW, NULLVPP,
1488 		    mvpp, rootdir, newvp, cred);
1489 	} else
1490 		VN_RELE(newvp);
1491 	if (error == ENAMETOOLONG) {
1492 		/*
1493 		 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
1494 		 * newvp is VN_RELE'd by this call to lookuppnvp.
1495 		 *
1496 		 * Using 'rootdir' in a zone's context is OK here: we already
1497 		 * ascertained that there are no '..'s in the path, and we're
1498 		 * not following symlinks.
1499 		 */
1500 		if ((error = pn_get(path, UIO_SYSSPACE, &lookpn)) == 0) {
1501 			error = lookuppnvp(&lookpn, NULL, NO_FOLLOW, NULLVPP,
1502 			    mvpp, rootdir, newvp, cred);
1503 			pn_free(&lookpn);
1504 		} else
1505 			VN_RELE(newvp);
1506 	} else {
1507 		/*
1508 		 * Need to release newvp here since we held it.
1509 		 */
1510 		VN_RELE(newvp);
1511 	}
1512 
1513 done:
1514 	AUTOFS_DPRINT((5, "auto_getmntpnt: path=%s *mvpp=%p error=%d\n",
1515 	    path, (void *)*mvpp, error));
1516 	return (error);
1517 }
1518 
1519 #define	DEEPER(x) (((x)->fn_dirents != NULL) || \
1520 			(vn_mountedvfs(fntovn((x)))) != NULL)
1521 
1522 /*
1523  * The caller, should have already VN_RELE'd its reference to the
1524  * root vnode of this filesystem.
1525  */
1526 static int
1527 auto_inkernel_unmount(vfs_t *vfsp)
1528 {
1529 	vnode_t *cvp = vfsp->vfs_vnodecovered;
1530 	int error;
1531 
1532 	AUTOFS_DPRINT((4,
1533 	    "auto_inkernel_unmount: devid=%lx mntpnt(%p) count %u\n",
1534 	    vfsp->vfs_dev, (void *)cvp, cvp->v_count));
1535 
1536 	ASSERT(vn_vfswlock_held(cvp));
1537 
1538 	/*
1539 	 * Perform the unmount
1540 	 * The mountpoint has already been locked by the caller.
1541 	 */
1542 	error = dounmount(vfsp, 0, kcred);
1543 
1544 	AUTOFS_DPRINT((5, "auto_inkernel_unmount: exit count %u\n",
1545 	    cvp->v_count));
1546 	return (error);
1547 }
1548 
1549 /*
1550  * unmounts trigger nodes in the kernel.
1551  */
1552 static void
1553 unmount_triggers(fnnode_t *fnp, action_list **alp)
1554 {
1555 	fnnode_t *tp, *next;
1556 	int error = 0;
1557 	vfs_t *vfsp;
1558 	vnode_t *tvp;
1559 
1560 	AUTOFS_DPRINT((4, "unmount_triggers: fnp=%p\n", (void *)fnp));
1561 	ASSERT(RW_WRITE_HELD(&fnp->fn_rwlock));
1562 
1563 	*alp = fnp->fn_alp;
1564 	next = fnp->fn_trigger;
1565 	while ((tp = next) != NULL) {
1566 		tvp = fntovn(tp);
1567 		ASSERT(tvp->v_count >= 2);
1568 		next = tp->fn_next;
1569 		/*
1570 		 * drop writer's lock since the unmount will end up
1571 		 * disconnecting this node from fnp and needs to acquire
1572 		 * the writer's lock again.
1573 		 * next has at least a reference count >= 2 since it's
1574 		 * a trigger node, therefore can not be accidentally freed
1575 		 * by a VN_RELE
1576 		 */
1577 		rw_exit(&fnp->fn_rwlock);
1578 
1579 		vfsp = tvp->v_vfsp;
1580 
1581 		/*
1582 		 * Its parent was holding a reference to it, since this
1583 		 * is a trigger vnode.
1584 		 */
1585 		VN_RELE(tvp);
1586 		if (error = auto_inkernel_unmount(vfsp)) {
1587 			cmn_err(CE_PANIC, "unmount_triggers: "
1588 			    "unmount of vp=%p failed error=%d",
1589 			    (void *)tvp, error);
1590 		}
1591 		/*
1592 		 * reacquire writer's lock
1593 		 */
1594 		rw_enter(&fnp->fn_rwlock, RW_WRITER);
1595 	}
1596 
1597 	/*
1598 	 * We were holding a reference to our parent.  Drop that.
1599 	 */
1600 	VN_RELE(fntovn(fnp));
1601 	fnp->fn_trigger = NULL;
1602 	fnp->fn_alp = NULL;
1603 
1604 	AUTOFS_DPRINT((5, "unmount_triggers: finished\n"));
1605 }
1606 
1607 /*
1608  * This routine locks the mountpoint of every trigger node if they're
1609  * not busy, or returns EBUSY if any node is busy. If a trigger node should
1610  * be unmounted first, then it sets nfnp to point to it, otherwise nfnp
1611  * points to NULL.
1612  */
1613 static int
1614 triggers_busy(fnnode_t *fnp, fnnode_t **nfnp)
1615 {
1616 	int error = 0, done;
1617 	int lck_error = 0;
1618 	fnnode_t *tp, *t1p;
1619 	vfs_t *vfsp;
1620 
1621 	ASSERT(RW_WRITE_HELD(&fnp->fn_rwlock));
1622 
1623 	*nfnp = NULL;
1624 	for (tp = fnp->fn_trigger; tp != NULL; tp = tp->fn_next) {
1625 		AUTOFS_DPRINT((10, "\ttrigger: %s\n", tp->fn_name));
1626 		vfsp = fntovn(tp)->v_vfsp;
1627 		error = 0;
1628 		/*
1629 		 * The vn_vfsunlock will be done in auto_inkernel_unmount.
1630 		 */
1631 		lck_error = vn_vfswlock(vfsp->vfs_vnodecovered);
1632 		if (lck_error == 0) {
1633 			mutex_enter(&tp->fn_lock);
1634 			ASSERT((tp->fn_flags & MF_LOOKUP) == 0);
1635 			if (tp->fn_flags & MF_INPROG) {
1636 				/*
1637 				 * a mount is in progress
1638 				 */
1639 				error = EBUSY;
1640 			}
1641 			mutex_exit(&tp->fn_lock);
1642 		}
1643 		if (lck_error || error || DEEPER(tp) ||
1644 		    ((fntovn(tp))->v_count) > 2) {
1645 			/*
1646 			 * couldn't lock it because it's busy,
1647 			 * It is mounted on or has dirents?
1648 			 * If reference count is greater than two, then
1649 			 * somebody else is holding a reference to this vnode.
1650 			 * One reference is for the mountpoint, and the second
1651 			 * is for the trigger node.
1652 			 */
1653 			AUTOFS_DPRINT((10, "\ttrigger busy\n"));
1654 			if ((lck_error == 0) && (error == 0)) {
1655 				*nfnp = tp;
1656 				/*
1657 				 * The matching VN_RELE is done in
1658 				 * unmount_tree().
1659 				 */
1660 				VN_HOLD(fntovn(*nfnp));
1661 			}
1662 			/*
1663 			 * Unlock previously locked mountpoints
1664 			 */
1665 			for (done = 0, t1p = fnp->fn_trigger; !done;
1666 			    t1p = t1p->fn_next) {
1667 				/*
1668 				 * Unlock all nodes previously
1669 				 * locked. All nodes up to 'tp'
1670 				 * were successfully locked. If 'lck_err' is
1671 				 * set, then 'tp' was not locked, and thus
1672 				 * should not be unlocked. If
1673 				 * 'lck_err' is not set, then 'tp' was
1674 				 * successfully locked, and it should
1675 				 * be unlocked.
1676 				 */
1677 				if (t1p != tp || !lck_error) {
1678 					vfsp = fntovn(t1p)->v_vfsp;
1679 					vn_vfsunlock(vfsp->vfs_vnodecovered);
1680 				}
1681 				done = (t1p == tp);
1682 			}
1683 			error = EBUSY;
1684 			break;
1685 		}
1686 	}
1687 
1688 	AUTOFS_DPRINT((4, "triggers_busy: error=%d\n", error));
1689 	return (error);
1690 }
1691 
1692 /*
1693  * Unlock previously locked trigger nodes.
1694  */
1695 static int
1696 triggers_unlock(fnnode_t *fnp)
1697 {
1698 	fnnode_t *tp;
1699 	vfs_t *vfsp;
1700 
1701 	ASSERT(RW_WRITE_HELD(&fnp->fn_rwlock));
1702 
1703 	for (tp = fnp->fn_trigger; tp != NULL; tp = tp->fn_next) {
1704 		AUTOFS_DPRINT((10, "\tunlock trigger: %s\n", tp->fn_name));
1705 		vfsp = fntovn(tp)->v_vfsp;
1706 		vn_vfsunlock(vfsp->vfs_vnodecovered);
1707 	}
1708 
1709 	return (0);
1710 }
1711 
1712 /*
1713  * It is the caller's responsibility to grab the VVFSLOCK.
1714  * Releases the VVFSLOCK upon return.
1715  */
1716 static int
1717 unmount_node(vnode_t *cvp, int force)
1718 {
1719 	int error = 0;
1720 	fnnode_t *cfnp;
1721 	vfs_t *vfsp;
1722 	umntrequest ul;
1723 	fninfo_t *fnip;
1724 
1725 	AUTOFS_DPRINT((4, "\tunmount_node cvp=%p\n", (void *)cvp));
1726 
1727 	ASSERT(vn_vfswlock_held(cvp));
1728 	cfnp = vntofn(cvp);
1729 	vfsp = vn_mountedvfs(cvp);
1730 
1731 	if (force || cfnp->fn_flags & MF_IK_MOUNT) {
1732 		/*
1733 		 * Mount was performed in the kernel, so
1734 		 * do an in-kernel unmount. auto_inkernel_unmount()
1735 		 * will vn_vfsunlock(cvp).
1736 		 */
1737 		error = auto_inkernel_unmount(vfsp);
1738 	} else {
1739 		zone_t *zone = NULL;
1740 		refstr_t *mntpt, *resource;
1741 		size_t mntoptslen;
1742 
1743 		/*
1744 		 * Get the mnttab information of the node
1745 		 * and ask the daemon to unmount it.
1746 		 */
1747 		bzero(&ul, sizeof (ul));
1748 		mntfs_getmntopts(vfsp, &ul.mntopts, &mntoptslen);
1749 		if (ul.mntopts == NULL) {
1750 			auto_log(cfnp->fn_globals, CE_WARN, "unmount_node: "
1751 			    "no memory");
1752 			vn_vfsunlock(cvp);
1753 			error = ENOMEM;
1754 			goto done;
1755 		}
1756 		if (mntoptslen > AUTOFS_MAXOPTSLEN)
1757 			ul.mntopts[AUTOFS_MAXOPTSLEN - 1] = '\0';
1758 
1759 		mntpt = vfs_getmntpoint(vfsp);
1760 		ul.mntpnt = (char *)refstr_value(mntpt);
1761 		resource = vfs_getresource(vfsp);
1762 		ul.mntresource = (char *)refstr_value(resource);
1763 
1764 		fnip = vfstofni(cvp->v_vfsp);
1765 		ul.isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
1766 
1767 		/*
1768 		 * Since a zone'd automountd's view of the autofs mount points
1769 		 * differs from those in the kernel, we need to make sure we
1770 		 * give it consistent mount points.
1771 		 */
1772 		ASSERT(fnip->fi_zoneid == getzoneid());
1773 		zone = curproc->p_zone;
1774 
1775 		if (fnip->fi_zoneid != GLOBAL_ZONEID) {
1776 			if (ZONE_PATH_VISIBLE(ul.mntpnt, zone)) {
1777 				ul.mntpnt =
1778 				    ZONE_PATH_TRANSLATE(ul.mntpnt, zone);
1779 			}
1780 			if (ZONE_PATH_VISIBLE(ul.mntresource, zone)) {
1781 				ul.mntresource =
1782 				    ZONE_PATH_TRANSLATE(ul.mntresource, zone);
1783 			}
1784 		}
1785 		ul.fstype = vfssw[vfsp->vfs_fstype].vsw_name;
1786 		vn_vfsunlock(cvp);
1787 
1788 		error = auto_send_unmount_request(fnip, &ul, CRED(), FALSE);
1789 		kmem_free(ul.mntopts, mntoptslen);
1790 		refstr_rele(mntpt);
1791 		refstr_rele(resource);
1792 	}
1793 
1794 done:
1795 	AUTOFS_DPRINT((5, "\tunmount_node cvp=%p error=%d\n", (void *)cvp,
1796 	    error));
1797 	return (error);
1798 }
1799 
1800 /*
1801  * vp is the "root" of the AUTOFS filesystem.
1802  * return EBUSY if any thread is holding a reference to this vnode
1803  * other than us.
1804  */
1805 static int
1806 check_auto_node(vnode_t *vp)
1807 {
1808 	fnnode_t *fnp;
1809 	int error = 0;
1810 	/*
1811 	 * number of references to expect for
1812 	 * a non-busy vnode.
1813 	 */
1814 	uint_t count;
1815 
1816 	AUTOFS_DPRINT((4, "\tcheck_auto_node vp=%p ", (void *)vp));
1817 	fnp = vntofn(vp);
1818 	ASSERT(fnp->fn_flags & MF_INPROG);
1819 	ASSERT((fnp->fn_flags & MF_LOOKUP) == 0);
1820 
1821 	count = 1;		/* we are holding a reference to vp */
1822 	if (fnp->fn_flags & MF_TRIGGER) {
1823 		/*
1824 		 * parent holds a pointer to us (trigger)
1825 		 */
1826 		count++;
1827 	}
1828 	if (fnp->fn_trigger != NULL) {
1829 		/*
1830 		 * The trigger nodes have a hold on us.
1831 		 */
1832 		count++;
1833 	}
1834 	mutex_enter(&vp->v_lock);
1835 	if (vp->v_flag & VROOT)
1836 		count++;
1837 	ASSERT(vp->v_count > 0);
1838 	AUTOFS_DPRINT((10, "\tcount=%u ", vp->v_count));
1839 	if (vp->v_count > count)
1840 		error = EBUSY;
1841 	mutex_exit(&vp->v_lock);
1842 
1843 	AUTOFS_DPRINT((5, "\tcheck_auto_node error=%d ", error));
1844 	return (error);
1845 }
1846 
1847 /*
1848  * rootvp is the root of the AUTOFS filesystem.
1849  * If rootvp is busy (v_count > 1) returns EBUSY.
1850  * else removes every vnode under this tree.
1851  * ASSUMPTION: Assumes that the only node which can be busy is
1852  * the root vnode. This filesystem better be two levels deep only,
1853  * the root and its immediate subdirs.
1854  * The daemon will "AUTOFS direct-mount" only one level below the root.
1855  */
1856 static int
1857 unmount_autofs(vnode_t *rootvp)
1858 {
1859 	fnnode_t *fnp, *rootfnp, *nfnp;
1860 	int error;
1861 
1862 	AUTOFS_DPRINT((4, "\tunmount_autofs rootvp=%p ", (void *)rootvp));
1863 
1864 	error = check_auto_node(rootvp);
1865 	if (error == 0) {
1866 		/*
1867 		 * Remove all its immediate subdirectories.
1868 		 */
1869 		rootfnp = vntofn(rootvp);
1870 		rw_enter(&rootfnp->fn_rwlock, RW_WRITER);
1871 		nfnp = NULL;	/* lint clean */
1872 		for (fnp = rootfnp->fn_dirents; fnp != NULL; fnp = nfnp) {
1873 			ASSERT(fntovn(fnp)->v_count == 0);
1874 			ASSERT(fnp->fn_dirents == NULL);
1875 			ASSERT(fnp->fn_linkcnt == 2);
1876 			fnp->fn_linkcnt--;
1877 			auto_disconnect(rootfnp, fnp);
1878 			nfnp = fnp->fn_next;
1879 			auto_freefnnode(fnp);
1880 		}
1881 		rw_exit(&rootfnp->fn_rwlock);
1882 	}
1883 	AUTOFS_DPRINT((5, "\tunmount_autofs error=%d ", error));
1884 	return (error);
1885 }
1886 
1887 /*
1888  * max number of unmount threads running
1889  */
1890 static int autofs_unmount_threads = 5;
1891 
1892 /*
1893  * XXX unmount_tree() is not suspend-safe within the scope of
1894  * the present model defined for cpr to suspend the system. Calls made
1895  * by the unmount_tree() that have been identified to be unsafe are
1896  * (1) RPC client handle setup and client calls to automountd which can
1897  * block deep down in the RPC library, (2) kmem_alloc() calls with the
1898  * KM_SLEEP flag which can block if memory is low, and (3) VFS_*() and
1899  * VOP_*() calls which can result in over the wire calls to servers.
1900  * The thread should be completely reevaluated to make it suspend-safe in
1901  * case of future updates to the cpr model.
1902  */
1903 void
1904 unmount_tree(struct autofs_globals *fngp, int force)
1905 {
1906 	vnode_t *vp, *newvp;
1907 	vfs_t *vfsp;
1908 	fnnode_t *fnp, *nfnp, *pfnp;
1909 	action_list *alp;
1910 	int error, ilocked_it = 0;
1911 	fninfo_t *fnip;
1912 	time_t ref_time;
1913 	int autofs_busy_root, unmount_as_unit, unmount_done = 0;
1914 	timestruc_t now;
1915 
1916 	callb_cpr_t cprinfo;
1917 	kmutex_t unmount_tree_cpr_lock;
1918 
1919 	mutex_init(&unmount_tree_cpr_lock, NULL, MUTEX_DEFAULT, NULL);
1920 	CALLB_CPR_INIT(&cprinfo, &unmount_tree_cpr_lock, callb_generic_cpr,
1921 		"unmount_tree");
1922 
1923 	/*
1924 	 * Got to release lock before attempting unmount in case
1925 	 * it hangs.
1926 	 */
1927 	rw_enter(&fngp->fng_rootfnnodep->fn_rwlock, RW_READER);
1928 	if ((fnp = fngp->fng_rootfnnodep->fn_dirents) == NULL) {
1929 		ASSERT(fngp->fng_fnnode_count == 1);
1930 		/*
1931 		 * no autofs mounted, done.
1932 		 */
1933 		rw_exit(&fngp->fng_rootfnnodep->fn_rwlock);
1934 		goto done;
1935 	}
1936 	VN_HOLD(fntovn(fnp));
1937 	rw_exit(&fngp->fng_rootfnnodep->fn_rwlock);
1938 
1939 	vp = fntovn(fnp);
1940 	fnip = vfstofni(vp->v_vfsp);
1941 	/*
1942 	 * autofssys() will be calling in from the global zone and doing
1943 	 * work on the behalf of the given zone, hence we can't always assert
1944 	 * that we have the right credentials, nor that the caller is always in
1945 	 * the correct zone.
1946 	 *
1947 	 * We do, however, know that if this is a "forced unmount" operation
1948 	 * (which autofssys() does), then we won't go down to the krpc layers,
1949 	 * so we don't need to fudge with the credentials.
1950 	 */
1951 	ASSERT(force || fnip->fi_zoneid == getzoneid());
1952 	if (!force && auto_null_request(fnip, kcred, FALSE) != 0) {
1953 		/*
1954 		 * automountd not running in this zone,
1955 		 * don't attempt unmounting this round.
1956 		 */
1957 		VN_RELE(vp);
1958 		goto done;
1959 	}
1960 	/* reference time for this unmount round */
1961 	ref_time = gethrestime_sec();
1962 	/*
1963 	 * If this an autofssys() call, we need to make sure we don't skip
1964 	 * nodes because we think we saw them recently.
1965 	 */
1966 	mutex_enter(&fnp->fn_lock);
1967 	if (force && fnp->fn_unmount_ref_time >= ref_time)
1968 		ref_time = fnp->fn_unmount_ref_time + 1;
1969 	mutex_exit(&fnp->fn_lock);
1970 
1971 	AUTOFS_DPRINT((4, "unmount_tree (ID=%ld)\n", ref_time));
1972 top:
1973 	AUTOFS_DPRINT((10, "unmount_tree: %s\n", fnp->fn_name));
1974 	ASSERT(fnp);
1975 	vp = fntovn(fnp);
1976 	if (vp->v_type == VLNK) {
1977 		/*
1978 		 * can't unmount symbolic links
1979 		 */
1980 		goto next;
1981 	}
1982 	fnip = vfstofni(vp->v_vfsp);
1983 	ASSERT(vp->v_count > 0);
1984 	error = 0;
1985 	autofs_busy_root = unmount_as_unit = 0;
1986 	alp = NULL;
1987 
1988 	ilocked_it = 0;
1989 	mutex_enter(&fnp->fn_lock);
1990 	if (fnp->fn_flags & (MF_INPROG | MF_LOOKUP)) {
1991 		/*
1992 		 * Either a mount, lookup or another unmount of this
1993 		 * subtree is in progress, don't attempt to unmount at
1994 		 * this time.
1995 		 */
1996 		mutex_exit(&fnp->fn_lock);
1997 		error = EBUSY;
1998 		goto next;
1999 	}
2000 	if (fnp->fn_unmount_ref_time >= ref_time) {
2001 		/*
2002 		 * Already been here, try next node.
2003 		 */
2004 		mutex_exit(&fnp->fn_lock);
2005 		error = EBUSY;
2006 		goto next;
2007 	}
2008 	fnp->fn_unmount_ref_time = ref_time;
2009 
2010 	/*
2011 	 * If forced operation ignore timeout values
2012 	 */
2013 	if (!force && fnp->fn_ref_time + fnip->fi_mount_to >
2014 	    gethrestime_sec()) {
2015 		/*
2016 		 * Node has been referenced recently, try the
2017 		 * unmount of its children if any.
2018 		 */
2019 		mutex_exit(&fnp->fn_lock);
2020 		AUTOFS_DPRINT((10, "fn_ref_time within range\n"));
2021 		rw_enter(&fnp->fn_rwlock, RW_READER);
2022 		if (fnp->fn_dirents) {
2023 			/*
2024 			 * Has subdirectory, attempt their
2025 			 * unmount first
2026 			 */
2027 			nfnp = fnp->fn_dirents;
2028 			VN_HOLD(fntovn(nfnp));
2029 			rw_exit(&fnp->fn_rwlock);
2030 
2031 			VN_RELE(vp);
2032 			fnp = nfnp;
2033 			goto top;
2034 		}
2035 		rw_exit(&fnp->fn_rwlock);
2036 		/*
2037 		 * No children, try next node.
2038 		 */
2039 		error = EBUSY;
2040 		goto next;
2041 	}
2042 
2043 	AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG);
2044 	fnp->fn_error = 0;
2045 	mutex_exit(&fnp->fn_lock);
2046 	ilocked_it = 1;
2047 
2048 	rw_enter(&fnp->fn_rwlock, RW_WRITER);
2049 	if (fnp->fn_trigger != NULL) {
2050 		unmount_as_unit = 1;
2051 		if ((vn_mountedvfs(vp) == NULL) && (check_auto_node(vp))) {
2052 			/*
2053 			 * AUTOFS mountpoint is busy, there's
2054 			 * no point trying to unmount. Fall through
2055 			 * to attempt to unmount subtrees rooted
2056 			 * at a possible trigger node, but remember
2057 			 * not to unmount this tree.
2058 			 */
2059 			autofs_busy_root = 1;
2060 		}
2061 
2062 		if (triggers_busy(fnp, &nfnp)) {
2063 			rw_exit(&fnp->fn_rwlock);
2064 			if (nfnp == NULL) {
2065 				error = EBUSY;
2066 				goto next;
2067 			}
2068 			/*
2069 			 * nfnp is busy, try to unmount it first
2070 			 */
2071 			mutex_enter(&fnp->fn_lock);
2072 			AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2073 			mutex_exit(&fnp->fn_lock);
2074 			VN_RELE(vp);
2075 			ASSERT(fntovn(nfnp)->v_count > 1);
2076 			fnp = nfnp;
2077 			goto top;
2078 		}
2079 
2080 		/*
2081 		 * At this point, we know all trigger nodes are locked,
2082 		 * and they're not busy or mounted on.
2083 		 */
2084 
2085 		if (autofs_busy_root) {
2086 			/*
2087 			 * Got to unlock the the trigger nodes since
2088 			 * I'm not really going to unmount the filesystem.
2089 			 */
2090 			(void) triggers_unlock(fnp);
2091 		} else {
2092 			/*
2093 			 * Attempt to unmount all the trigger nodes,
2094 			 * save the action_list in case we need to
2095 			 * remount them later. The action_list will be XDR
2096 			 * freed later if there was no need to remount the
2097 			 * trigger nodes.
2098 			 */
2099 			unmount_triggers(fnp, &alp);
2100 		}
2101 	}
2102 	rw_exit(&fnp->fn_rwlock);
2103 
2104 	if (autofs_busy_root)
2105 		goto next;
2106 
2107 	(void) vn_vfswlock_wait(vp);
2108 
2109 	vfsp = vn_mountedvfs(vp);
2110 	if (vfsp != NULL) {
2111 		/*
2112 		 * Node is mounted on.
2113 		 */
2114 		AUTOFS_DPRINT((10, "\tNode is mounted on\n"));
2115 
2116 		/*
2117 		 * Deal with /xfn/host/jurassic alikes here...
2118 		 */
2119 		if (vfs_matchops(vfsp, vfs_getops(vp->v_vfsp))) {
2120 			/*
2121 			 * If the filesystem mounted here is AUTOFS, and it
2122 			 * is busy, try to unmount the tree rooted on it
2123 			 * first. We know this call to VFS_ROOT is safe to
2124 			 * call while holding VVFSLOCK, since it resolves
2125 			 * to a call to auto_root().
2126 			 */
2127 			AUTOFS_DPRINT((10, "\t\tAUTOFS mounted here\n"));
2128 			vfs_lock_wait(vfsp);
2129 			if (VFS_ROOT(vfsp, &newvp)) {
2130 				cmn_err(CE_PANIC,
2131 				    "unmount_tree: VFS_ROOT(vfs=%p) failed",
2132 				    (void *)vfsp);
2133 			}
2134 			vfs_unlock(vfsp);
2135 			nfnp = vntofn(newvp);
2136 			if (DEEPER(nfnp)) {
2137 				vn_vfsunlock(vp);
2138 				mutex_enter(&fnp->fn_lock);
2139 				AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2140 				mutex_exit(&fnp->fn_lock);
2141 				VN_RELE(vp);
2142 				fnp = nfnp;
2143 				goto top;
2144 			}
2145 			/*
2146 			 * Fall through to unmount this filesystem
2147 			 */
2148 			VN_RELE(newvp);
2149 		}
2150 
2151 		/*
2152 		 * vn_vfsunlock(vp) is done inside unmount_node()
2153 		 */
2154 		error = unmount_node(vp, force);
2155 		if (error == ECONNRESET) {
2156 			AUTOFS_DPRINT((10, "\tConnection dropped\n"));
2157 			if (vn_mountedvfs(vp) == NULL) {
2158 				/*
2159 				 * The filesystem was unmounted before the
2160 				 * daemon died. Unfortunately we can not
2161 				 * determine whether all the cleanup work was
2162 				 * successfully finished (i.e. update mnttab,
2163 				 * or notify NFS server of the unmount).
2164 				 * We should not retry the operation since the
2165 				 * filesystem has already been unmounted, and
2166 				 * may have already been removed from mnttab,
2167 				 * in such case the devid/rdevid we send to
2168 				 * the daemon will not be matched. So we have
2169 				 * to be contempt with the partial unmount.
2170 				 * Since the mountpoint is no longer covered, we
2171 				 * clear the error condition.
2172 				 */
2173 				error = 0;
2174 				auto_log(fngp, CE_WARN,
2175 				    "unmount_tree: automountd connection "
2176 				    "dropped");
2177 				if (fnip->fi_flags & MF_DIRECT) {
2178 					auto_log(fngp, CE_WARN, "unmount_tree: "
2179 					    "%s successfully unmounted - "
2180 					    "do not remount triggers",
2181 					    fnip->fi_path);
2182 				} else {
2183 					auto_log(fngp, CE_WARN, "unmount_tree: "
2184 					    "%s/%s successfully unmounted - "
2185 					    "do not remount triggers",
2186 					    fnip->fi_path, fnp->fn_name);
2187 				}
2188 			}
2189 		}
2190 	} else {
2191 		vn_vfsunlock(vp);
2192 		AUTOFS_DPRINT((10, "\tNode is AUTOFS\n"));
2193 		if (unmount_as_unit) {
2194 			AUTOFS_DPRINT((10, "\tunmount as unit\n"));
2195 			error = unmount_autofs(vp);
2196 		} else {
2197 			AUTOFS_DPRINT((10, "\tunmount one at a time\n"));
2198 			rw_enter(&fnp->fn_rwlock, RW_READER);
2199 			if (fnp->fn_dirents != NULL) {
2200 				/*
2201 				 * Has subdirectory, attempt their
2202 				 * unmount first
2203 				 */
2204 				nfnp = fnp->fn_dirents;
2205 				VN_HOLD(fntovn(nfnp));
2206 				rw_exit(&fnp->fn_rwlock);
2207 
2208 				mutex_enter(&fnp->fn_lock);
2209 				AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2210 				mutex_exit(&fnp->fn_lock);
2211 				VN_RELE(vp);
2212 				fnp = nfnp;
2213 				goto top;
2214 			}
2215 			rw_exit(&fnp->fn_rwlock);
2216 			goto next;
2217 		}
2218 	}
2219 
2220 	if (error) {
2221 		AUTOFS_DPRINT((10, "\tUnmount failed\n"));
2222 		if (alp != NULL) {
2223 			/*
2224 			 * Unmount failed, got to remount triggers.
2225 			 */
2226 			ASSERT((fnp->fn_flags & MF_THISUID_MATCH_RQD) == 0);
2227 			error = auto_perform_actions(fnip, fnp, alp, CRED());
2228 			if (error) {
2229 				auto_log(fngp, CE_WARN, "autofs: can't remount "
2230 				    "triggers fnp=%p error=%d", (void *)fnp,
2231 				    error);
2232 				error = 0;
2233 				/*
2234 				 * The action list should have been
2235 				 * xdr_free'd by auto_perform_actions
2236 				 * since an error occured
2237 				 */
2238 				alp = NULL;
2239 			}
2240 		}
2241 	} else {
2242 		/*
2243 		 * The unmount succeeded, which will cause this node to
2244 		 * be removed from its parent if its an indirect mount,
2245 		 * therefore update the parent's atime and mtime now.
2246 		 * I don't update them in auto_disconnect() because I
2247 		 * don't want atime and mtime changing every time a
2248 		 * lookup goes to the daemon and creates a new node.
2249 		 */
2250 		unmount_done = 1;
2251 		if ((fnip->fi_flags & MF_DIRECT) == 0) {
2252 			gethrestime(&now);
2253 			if (fnp->fn_parent == fngp->fng_rootfnnodep)
2254 				fnp->fn_atime = fnp->fn_mtime = now;
2255 			else
2256 				fnp->fn_parent->fn_atime =
2257 					fnp->fn_parent->fn_mtime = now;
2258 		}
2259 
2260 		/*
2261 		 * Free the action list here
2262 		 */
2263 		if (alp != NULL) {
2264 			xdr_free(xdr_action_list, (char *)alp);
2265 			alp = NULL;
2266 		}
2267 	}
2268 
2269 	fnp->fn_ref_time = gethrestime_sec();
2270 
2271 next:
2272 	/*
2273 	 * Obtain parent's readers lock before grabbing
2274 	 * reference to next sibling.
2275 	 * XXX Note that nodes in the top level list (mounted
2276 	 * in user space not by the daemon in the kernel) parent is itself,
2277 	 * therefore grabbing the lock makes no sense, but doesn't
2278 	 * hurt either.
2279 	 */
2280 	pfnp = fnp->fn_parent;
2281 	ASSERT(pfnp != NULL);
2282 	rw_enter(&pfnp->fn_rwlock, RW_READER);
2283 	if ((nfnp = fnp->fn_next) != NULL)
2284 		VN_HOLD(fntovn(nfnp));
2285 	rw_exit(&pfnp->fn_rwlock);
2286 
2287 	if (ilocked_it) {
2288 		mutex_enter(&fnp->fn_lock);
2289 		if (unmount_done) {
2290 			/*
2291 			 * Other threads may be waiting for this unmount to
2292 			 * finish. We must let it know that in order to
2293 			 * proceed, it must trigger the mount itself.
2294 			 */
2295 			fnp->fn_flags &= ~MF_IK_MOUNT;
2296 			if (fnp->fn_flags & MF_WAITING)
2297 				fnp->fn_error = EAGAIN;
2298 			unmount_done = 0;
2299 		}
2300 		AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2301 		mutex_exit(&fnp->fn_lock);
2302 		ilocked_it = 0;
2303 	}
2304 
2305 	if (nfnp != NULL) {
2306 		VN_RELE(vp);
2307 		fnp = nfnp;
2308 		/*
2309 		 * Unmount next element
2310 		 */
2311 		goto top;
2312 	}
2313 
2314 	/*
2315 	 * We don't want to unmount rootfnnodep, so the check is made here
2316 	 */
2317 	ASSERT(pfnp != fnp);
2318 	if (pfnp != fngp->fng_rootfnnodep) {
2319 		/*
2320 		 * Now attempt to unmount my parent
2321 		 */
2322 		VN_HOLD(fntovn(pfnp));
2323 		VN_RELE(vp);
2324 		fnp = pfnp;
2325 
2326 		goto top;
2327 	}
2328 
2329 	VN_RELE(vp);
2330 
2331 	/*
2332 	 * At this point we've walked the entire tree and attempted to unmount
2333 	 * as much as we can one level at a time.
2334 	 */
2335 done:
2336 	mutex_enter(&unmount_tree_cpr_lock);
2337 	CALLB_CPR_EXIT(&cprinfo);
2338 	mutex_destroy(&unmount_tree_cpr_lock);
2339 }
2340 
2341 static void
2342 unmount_zone_tree(struct autofs_globals *fngp)
2343 {
2344 	unmount_tree(fngp, 0);
2345 	mutex_enter(&fngp->fng_unmount_threads_lock);
2346 	fngp->fng_unmount_threads--;
2347 	mutex_exit(&fngp->fng_unmount_threads_lock);
2348 
2349 	AUTOFS_DPRINT((5, "unmount_tree done. Thread exiting.\n"));
2350 
2351 	zthread_exit();
2352 	/* NOTREACHED */
2353 }
2354 
2355 static int autofs_unmount_thread_timer = 120;	/* in seconds */
2356 
2357 void
2358 auto_do_unmount(struct autofs_globals *fngp)
2359 {
2360 	callb_cpr_t cprinfo;
2361 	clock_t timeleft;
2362 	zone_t *zone = curproc->p_zone;
2363 
2364 	CALLB_CPR_INIT(&cprinfo, &fngp->fng_unmount_threads_lock,
2365 		callb_generic_cpr, "auto_do_unmount");
2366 
2367 	for (;;) {	/* forever */
2368 		mutex_enter(&fngp->fng_unmount_threads_lock);
2369 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
2370 newthread:
2371 		mutex_exit(&fngp->fng_unmount_threads_lock);
2372 		timeleft = zone_status_timedwait(zone, lbolt +
2373 		    autofs_unmount_thread_timer * hz, ZONE_IS_SHUTTING_DOWN);
2374 		mutex_enter(&fngp->fng_unmount_threads_lock);
2375 
2376 		if (timeleft != -1) {	/* didn't time out */
2377 			ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN);
2378 			/*
2379 			 * zone is exiting... don't create any new threads.
2380 			 * fng_unmount_threads_lock is released implicitly by
2381 			 * the below.
2382 			 */
2383 			CALLB_CPR_SAFE_END(&cprinfo,
2384 				&fngp->fng_unmount_threads_lock);
2385 			CALLB_CPR_EXIT(&cprinfo);
2386 			zthread_exit();
2387 			/* NOTREACHED */
2388 		}
2389 		if (fngp->fng_unmount_threads < autofs_unmount_threads) {
2390 			fngp->fng_unmount_threads++;
2391 			CALLB_CPR_SAFE_END(&cprinfo,
2392 				&fngp->fng_unmount_threads_lock);
2393 			mutex_exit(&fngp->fng_unmount_threads_lock);
2394 
2395 			(void) zthread_create(NULL, 0, unmount_zone_tree, fngp,
2396 			    0, minclsyspri);
2397 		} else
2398 			goto newthread;
2399 	}
2400 	/* NOTREACHED */
2401 }
2402 
2403 /*
2404  * Is nobrowse specified in option string?
2405  * opts should be a null ('\0') terminated string.
2406  * Returns non-zero if nobrowse has been specified.
2407  */
2408 int
2409 auto_nobrowse_option(char *opts)
2410 {
2411 	char *buf;
2412 	char *p;
2413 	char *t;
2414 	int nobrowse = 0;
2415 	int last_opt = 0;
2416 	size_t len;
2417 
2418 	len = strlen(opts) + 1;
2419 	p = buf = kmem_alloc(len, KM_SLEEP);
2420 	(void) strcpy(buf, opts);
2421 	do {
2422 		if (t = strchr(p, ','))
2423 			*t++ = '\0';
2424 		else
2425 			last_opt++;
2426 		if (strcmp(p, MNTOPT_NOBROWSE) == 0)
2427 			nobrowse = 1;
2428 		else if (strcmp(p, MNTOPT_BROWSE) == 0)
2429 			nobrowse = 0;
2430 		p = t;
2431 	} while (!last_opt);
2432 	kmem_free(buf, len);
2433 
2434 	return (nobrowse);
2435 }
2436 
2437 /*
2438  * used to log warnings only if automountd is running
2439  * with verbose mode set
2440  */
2441 void
2442 auto_log(struct autofs_globals *fngp, int level, const char *fmt, ...)
2443 {
2444 	va_list args;
2445 
2446 	if (fngp->fng_verbose > 0) {
2447 		va_start(args, fmt);
2448 		vzcmn_err(fngp->fng_zoneid, level, fmt, args);
2449 		va_end(args);
2450 	}
2451 }
2452 
2453 #ifdef DEBUG
2454 static int autofs_debug = 0;
2455 
2456 /*
2457  * Utilities used by both client and server
2458  * Standard levels:
2459  * 0) no debugging
2460  * 1) hard failures
2461  * 2) soft failures
2462  * 3) current test software
2463  * 4) main procedure entry points
2464  * 5) main procedure exit points
2465  * 6) utility procedure entry points
2466  * 7) utility procedure exit points
2467  * 8) obscure procedure entry points
2468  * 9) obscure procedure exit points
2469  * 10) random stuff
2470  * 11) all <= 1
2471  * 12) all <= 2
2472  * 13) all <= 3
2473  * ...
2474  */
2475 /* PRINTFLIKE2 */
2476 void
2477 auto_dprint(int level, const char *fmt, ...)
2478 {
2479 	va_list args;
2480 
2481 	if (autofs_debug == level ||
2482 	    (autofs_debug > 10 && (autofs_debug - 10) >= level)) {
2483 		va_start(args, fmt);
2484 		(void) vprintf(fmt, args);
2485 		va_end(args);
2486 	}
2487 }
2488 #endif /* DEBUG */
2489