xref: /titanic_50/usr/src/uts/common/fs/autofs/auto_subr.c (revision c13de8f6a88563211bd4432ca11ca38ed3bf0fc0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/kmem.h>
31 #include <sys/errno.h>
32 #include <sys/proc.h>
33 #include <sys/disp.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/pathname.h>
37 #include <sys/cred.h>
38 #include <sys/mount.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/systm.h>
42 #include <rpc/types.h>
43 #include <rpc/xdr.h>
44 #include <rpc/auth.h>
45 #include <rpc/clnt.h>
46 #include <sys/ticotsord.h>
47 #include <sys/dirent.h>
48 #include <fs/fs_subr.h>
49 #include <rpcsvc/autofs_prot.h>
50 #include <sys/fs/autofs.h>
51 #include <sys/callb.h>
52 #include <sys/sysmacros.h>
53 #include <sys/zone.h>
54 #include <sys/fs/mntdata.h>
55 
56 /*
57  * Autofs and Zones:
58  *
59  * Zones are delegated the responsibility of managing their own autofs mounts
60  * and maps.  Each zone runs its own copy of automountd, with its own timeouts,
61  * and other logically "global" parameters.  kRPC and virtualization in the
62  * loopback transport (tl) will prevent a zone from communicating with another
63  * zone's automountd.
64  *
65  * Each zone has its own "rootfnnode" and associated tree of auto nodes.
66  *
67  * Each zone also has its own set of "unmounter" kernel threads; these are
68  * created and run within the zone's context (ie, they are created via
69  * zthread_create()).
70  *
71  * Cross-zone mount triggers are disallowed.  There is a check in
72  * auto_trigger_mount() to this effect; EPERM is returned to indicate that the
73  * mount is not owned by the caller.
74  *
75  * autofssys() enables a caller in the global zone to clean up in-kernel (as
76  * well as regular) autofs mounts via the unmount_tree() mechanism.  This is
77  * routinely done when all mounts are removed as part of zone shutdown.
78  */
79 #define	TYPICALMAXPATHLEN	64
80 
81 static kmutex_t autofs_nodeid_lock;
82 
83 static int auto_perform_link(fnnode_t *, struct linka *, cred_t *);
84 static int auto_perform_actions(fninfo_t *, fnnode_t *,
85     action_list *, cred_t *);
86 static int auto_getmntpnt(vnode_t *, char *, vnode_t **, cred_t *);
87 static int auto_lookup_request(fninfo_t *, char *, struct linka *,
88     cred_t *, bool_t, bool_t *);
89 static int auto_mount_request(fninfo_t *, char *, action_list **,
90     cred_t *, bool_t);
91 
92 /*
93  * Clears the MF_INPROG flag, and wakes up those threads sleeping on
94  * fn_cv_mount if MF_WAITING is set.
95  */
96 void
97 auto_unblock_others(
98 	fnnode_t *fnp,
99 	uint_t operation)		/* either MF_INPROG or MF_LOOKUP */
100 {
101 	ASSERT(operation & (MF_INPROG | MF_LOOKUP));
102 	fnp->fn_flags &= ~operation;
103 	if (fnp->fn_flags & MF_WAITING) {
104 		fnp->fn_flags &= ~MF_WAITING;
105 		cv_broadcast(&fnp->fn_cv_mount);
106 	}
107 }
108 
109 int
110 auto_wait4mount(fnnode_t *fnp)
111 {
112 	int error;
113 	k_sigset_t smask;
114 
115 	AUTOFS_DPRINT((4, "auto_wait4mount: fnp=%p\n", (void *)fnp));
116 
117 	mutex_enter(&fnp->fn_lock);
118 	while (fnp->fn_flags & (MF_INPROG | MF_LOOKUP)) {
119 		/*
120 		 * There is a mount or a lookup in progress.
121 		 */
122 		fnp->fn_flags |= MF_WAITING;
123 		sigintr(&smask, 1);
124 		if (!cv_wait_sig(&fnp->fn_cv_mount, &fnp->fn_lock)) {
125 			/*
126 			 * Decided not to wait for operation to
127 			 * finish after all.
128 			 */
129 			sigunintr(&smask);
130 			mutex_exit(&fnp->fn_lock);
131 			return (EINTR);
132 		}
133 		sigunintr(&smask);
134 	}
135 	error = fnp->fn_error;
136 
137 	if (error == EINTR) {
138 		/*
139 		 * The thread doing the mount got interrupted, we need to
140 		 * try again, by returning EAGAIN.
141 		 */
142 		error = EAGAIN;
143 	}
144 	mutex_exit(&fnp->fn_lock);
145 
146 	AUTOFS_DPRINT((5, "auto_wait4mount: fnp=%p error=%d\n", (void *)fnp,
147 	    error));
148 	return (error);
149 }
150 
151 int
152 auto_lookup_aux(fnnode_t *fnp, char *name, cred_t *cred)
153 {
154 	struct fninfo *fnip;
155 	struct linka link;
156 	bool_t mountreq = FALSE;
157 	int error = 0;
158 
159 	fnip = vfstofni(fntovn(fnp)->v_vfsp);
160 	bzero(&link, sizeof (link));
161 	error = auto_lookup_request(fnip, name, &link, cred, TRUE, &mountreq);
162 	if (!error) {
163 		if (link.link != NULL) {
164 			/*
165 			 * This node should be a symlink
166 			 */
167 			error = auto_perform_link(fnp, &link, cred);
168 			kmem_free(link.dir, strlen(link.dir) + 1);
169 			kmem_free(link.link, strlen(link.link) + 1);
170 		} else if (mountreq) {
171 			/*
172 			 * The automount daemon is requesting a mount,
173 			 * implying this entry must be a wildcard match and
174 			 * therefore in need of verification that the entry
175 			 * exists on the server.
176 			 */
177 			mutex_enter(&fnp->fn_lock);
178 			AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG);
179 			fnp->fn_error = 0;
180 
181 			/*
182 			 * Unblock other lookup requests on this node,
183 			 * this is needed to let the lookup generated by
184 			 * the mount call to complete. The caveat is
185 			 * other lookups on this node can also get by,
186 			 * i.e., another lookup on this node that occurs
187 			 * while this lookup is attempting the mount
188 			 * would return a positive result no matter what.
189 			 * Therefore two lookups on the this node could
190 			 * potentially get disparate results.
191 			 */
192 			AUTOFS_UNBLOCK_OTHERS(fnp, MF_LOOKUP);
193 			mutex_exit(&fnp->fn_lock);
194 			/*
195 			 * auto_new_mount_thread fires up a new thread which
196 			 * calls automountd finishing up the work
197 			 */
198 			auto_new_mount_thread(fnp, name, cred);
199 
200 			/*
201 			 * At this point, we are simply another thread
202 			 * waiting for the mount to complete
203 			 */
204 			error = auto_wait4mount(fnp);
205 			if (error == AUTOFS_SHUTDOWN)
206 				error = ENOENT;
207 		}
208 	}
209 
210 	mutex_enter(&fnp->fn_lock);
211 	fnp->fn_error = error;
212 
213 	/*
214 	 * Notify threads waiting for lookup/mount that
215 	 * it's done.
216 	 */
217 	if (mountreq) {
218 		AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
219 	} else {
220 		AUTOFS_UNBLOCK_OTHERS(fnp, MF_LOOKUP);
221 	}
222 	mutex_exit(&fnp->fn_lock);
223 	return (error);
224 }
225 
226 /*
227  * Starting point for thread to handle mount requests with automountd.
228  * XXX auto_mount_thread() is not suspend-safe within the scope of
229  * the present model defined for cpr to suspend the system. Calls
230  * made by the auto_mount_thread() that have been identified to be unsafe
231  * are (1) RPC client handle setup and client calls to automountd which
232  * can block deep down in the RPC library, (2) kmem_alloc() calls with the
233  * KM_SLEEP flag which can block if memory is low, and (3) VFS_*(), and
234  * lookuppnvp() calls which can result in over the wire calls to servers.
235  * The thread should be completely reevaluated to make it suspend-safe in
236  * case of future updates to the cpr model.
237  */
238 static void
239 auto_mount_thread(struct autofs_callargs *argsp)
240 {
241 	struct fninfo *fnip;
242 	fnnode_t *fnp;
243 	vnode_t *vp;
244 	char *name;
245 	size_t namelen;
246 	cred_t *cred;
247 	action_list *alp = NULL;
248 	int error;
249 	callb_cpr_t cprinfo;
250 	kmutex_t auto_mount_thread_cpr_lock;
251 
252 	mutex_init(&auto_mount_thread_cpr_lock, NULL, MUTEX_DEFAULT, NULL);
253 	CALLB_CPR_INIT(&cprinfo, &auto_mount_thread_cpr_lock, callb_generic_cpr,
254 		"auto_mount_thread");
255 
256 	fnp = argsp->fnc_fnp;
257 	vp = fntovn(fnp);
258 	fnip = vfstofni(vp->v_vfsp);
259 	name = argsp->fnc_name;
260 	cred = argsp->fnc_cred;
261 	ASSERT(crgetzoneid(argsp->fnc_cred) == fnip->fi_zoneid);
262 
263 	error = auto_mount_request(fnip, name, &alp, cred, TRUE);
264 	if (!error)
265 		error = auto_perform_actions(fnip, fnp, alp, cred);
266 	mutex_enter(&fnp->fn_lock);
267 	fnp->fn_error = error;
268 
269 	/*
270 	 * Notify threads waiting for mount that
271 	 * it's done.
272 	 */
273 	AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
274 	mutex_exit(&fnp->fn_lock);
275 
276 	VN_RELE(vp);
277 	crfree(argsp->fnc_cred);
278 	namelen = strlen(argsp->fnc_name) + 1;
279 	kmem_free(argsp->fnc_name, namelen);
280 	kmem_free(argsp, sizeof (*argsp));
281 
282 	mutex_enter(&auto_mount_thread_cpr_lock);
283 	CALLB_CPR_EXIT(&cprinfo);
284 	mutex_destroy(&auto_mount_thread_cpr_lock);
285 	zthread_exit();
286 	/* NOTREACHED */
287 }
288 
289 static int autofs_thr_success = 0;
290 
291 /*
292  * Creates new thread which calls auto_mount_thread which does
293  * the bulk of the work calling automountd, via 'auto_perform_actions'.
294  */
295 void
296 auto_new_mount_thread(fnnode_t *fnp, char *name, cred_t *cred)
297 {
298 	struct autofs_callargs *argsp;
299 
300 	argsp = kmem_alloc(sizeof (*argsp), KM_SLEEP);
301 	VN_HOLD(fntovn(fnp));
302 	argsp->fnc_fnp = fnp;
303 	argsp->fnc_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
304 	(void) strcpy(argsp->fnc_name, name);
305 	argsp->fnc_origin = curthread;
306 	crhold(cred);
307 	argsp->fnc_cred = cred;
308 
309 	(void) zthread_create(NULL, 0, auto_mount_thread, argsp, 0,
310 	    minclsyspri);
311 	autofs_thr_success++;
312 }
313 
314 int
315 auto_calldaemon(
316 	fninfo_t *fnip,
317 	rpcproc_t which,
318 	xdrproc_t xdrargs,
319 	void *argsp,
320 	xdrproc_t xdrres,
321 	void *resp,
322 	cred_t *cred,
323 	bool_t hard)				/* retry forever? */
324 {
325 	CLIENT *client;
326 	enum clnt_stat status;
327 	struct rpc_err rpcerr;
328 	struct timeval wait;
329 	bool_t tryagain;
330 	int error = 0;
331 	k_sigset_t smask;
332 	struct autofs_globals *fngp = vntofn(fnip->fi_rootvp)->fn_globals;
333 
334 	AUTOFS_DPRINT((4, "auto_calldaemon\n"));
335 
336 	error = clnt_tli_kcreate(&fnip->fi_knconf, &fnip->fi_addr,
337 	    AUTOFS_PROG, AUTOFS_VERS, 0, INT_MAX, cred, &client);
338 
339 	if (error) {
340 		auto_log(fngp, CE_WARN, "autofs: clnt_tli_kcreate: error %d",
341 		    error);
342 		goto done;
343 	}
344 
345 	/*
346 	 * Release the old authentication handle.  It was probably
347 	 * AUTH_UNIX.
348 	 */
349 	auth_destroy(client->cl_auth);
350 
351 	/*
352 	 * Create a new authentication handle for AUTH_LOOPBACK.  This
353 	 * will allow us to correctly handle the entire groups list.
354 	 */
355 	client->cl_auth = authloopback_create();
356 	if (client->cl_auth == NULL) {
357 		clnt_destroy(client);
358 		error = EINTR;
359 		auto_log(fngp, CE_WARN,
360 		    "autofs: authloopback_create: error %d", error);
361 		goto done;
362 	}
363 
364 	wait.tv_sec = fnip->fi_rpc_to;
365 	wait.tv_usec = 0;
366 	do {
367 		tryagain = FALSE;
368 		error = 0;
369 
370 		/*
371 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
372 		 * and SIGTERM. (Preserving the existing masks)
373 		 */
374 		sigintr(&smask, 1);
375 
376 		status = CLNT_CALL(client, which, xdrargs, argsp,
377 		    xdrres, resp, wait);
378 
379 		/*
380 		 * Restore original signal mask
381 		 */
382 		sigunintr(&smask);
383 
384 		switch (status) {
385 		case RPC_SUCCESS:
386 			break;
387 
388 		case RPC_INTR:
389 			error = EINTR;
390 			break;
391 
392 		case RPC_TIMEDOUT:
393 			tryagain = TRUE;
394 			error = ETIMEDOUT;
395 			break;
396 
397 		case RPC_CANTCONNECT:
398 		case RPC_CANTCREATESTREAM:
399 			/*
400 			 * The connection could not be established
401 			 */
402 			/* fall thru */
403 		case RPC_XPRTFAILED:
404 			/*
405 			 * The connection could not be established or
406 			 * was dropped, we differentiate between the two
407 			 * conditions by calling CLNT_GETERR and look at
408 			 * rpcerror.re_errno.
409 			 * If rpcerr.re_errno == ECONNREFUSED, then the
410 			 * connection could not be established at all.
411 			 */
412 			error = ECONNREFUSED;
413 			if (status == RPC_XPRTFAILED) {
414 				CLNT_GETERR(client, &rpcerr);
415 				if (rpcerr.re_errno != ECONNREFUSED) {
416 					/*
417 					 * The connection was dropped, return
418 					 * to the caller if hard is not set.
419 					 * It is the responsability of the
420 					 * caller to retry the call if
421 					 * appropriate.
422 					 */
423 					error = ECONNRESET;
424 				}
425 			}
426 			/*
427 			 * We know that the current thread is doing work on
428 			 * behalf of its own zone, so it's ok to use
429 			 * curproc->p_zone.
430 			 */
431 			ASSERT(fngp->fng_zoneid == getzoneid());
432 			if (zone_status_get(curproc->p_zone) >=
433 			    ZONE_IS_SHUTTING_DOWN) {
434 				/*
435 				 * There's no point in trying to talk to
436 				 * automountd.  Plus, zone_shutdown() is
437 				 * waiting for us.
438 				 */
439 				tryagain = FALSE;
440 				break;
441 			}
442 			tryagain = hard;
443 			if (!fngp->fng_printed_not_running_msg) {
444 				if (tryagain) {
445 					fngp->fng_printed_not_running_msg = 1;
446 					zprintf(fngp->fng_zoneid,
447 					"automountd not running, retrying\n");
448 				}
449 			}
450 			break;
451 
452 		default:
453 			auto_log(fngp, CE_WARN, "autofs: %s",
454 			    clnt_sperrno(status));
455 			error = ENOENT;
456 			break;
457 		}
458 	} while (tryagain);
459 
460 	if (status == RPC_SUCCESS) {
461 		if (fngp->fng_printed_not_running_msg == 1) {
462 			fngp->fng_printed_not_running_msg = 0;
463 			zprintf(fngp->fng_zoneid, "automountd OK\n");
464 		}
465 	}
466 	auth_destroy(client->cl_auth);
467 	clnt_destroy(client);
468 
469 done:
470 	ASSERT(status == RPC_SUCCESS || error != 0);
471 
472 	AUTOFS_DPRINT((5, "auto_calldaemon error=%d\n", error));
473 	return (error);
474 }
475 
476 static int
477 auto_null_request(fninfo_t *fnip, cred_t *cred, bool_t hard)
478 {
479 	int error;
480 
481 	AUTOFS_DPRINT((4, "\tauto_null_request\n"));
482 
483 	error = auto_calldaemon(fnip, NULLPROC, xdr_void, NULL, xdr_void, NULL,
484 	    cred, hard);
485 
486 	AUTOFS_DPRINT((5, "\tauto_null_request: error=%d\n", error));
487 	return (error);
488 }
489 
490 static int
491 auto_lookup_request(
492 	fninfo_t *fnip,
493 	char *key,
494 	struct linka *lnp,
495 	cred_t *cred,
496 	bool_t hard,
497 	bool_t *mountreq)
498 {
499 	int error;
500 	struct autofs_globals *fngp;
501 	struct autofs_lookupargs request;
502 	struct autofs_lookupres result;
503 	struct linka *p;
504 
505 	AUTOFS_DPRINT((4, "auto_lookup_request: path=%s name=%s\n",
506 	    fnip->fi_path, key));
507 
508 	fngp = vntofn(fnip->fi_rootvp)->fn_globals;
509 	request.map = fnip->fi_map;
510 	request.path = fnip->fi_path;
511 
512 	if (fnip->fi_flags & MF_DIRECT)
513 		request.name = fnip->fi_key;
514 	else
515 		request.name = key;
516 	AUTOFS_DPRINT((4, "auto_lookup_request: using key=%s\n", request.name));
517 
518 	request.subdir = fnip->fi_subdir;
519 	request.opts = fnip->fi_opts;
520 	request.isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
521 
522 	bzero(&result, sizeof (result));
523 	error = auto_calldaemon(fnip, AUTOFS_LOOKUP,
524 	    xdr_autofs_lookupargs, &request,
525 	    xdr_autofs_lookupres, &result,
526 	    cred, hard);
527 	if (!error) {
528 		fngp->fng_verbose = result.lu_verbose;
529 		switch (result.lu_res) {
530 		case AUTOFS_OK:
531 			switch (result.lu_type.action) {
532 			case AUTOFS_MOUNT_RQ:
533 				lnp->link = NULL;
534 				lnp->dir = NULL;
535 				*mountreq = TRUE;
536 				break;
537 			case AUTOFS_LINK_RQ:
538 				p =
539 				&result.lu_type.lookup_result_type_u.lt_linka;
540 				lnp->dir = kmem_alloc(strlen(p->dir) + 1,
541 				    KM_SLEEP);
542 				(void) strcpy(lnp->dir, p->dir);
543 				lnp->link = kmem_alloc(strlen(p->link) + 1,
544 				    KM_SLEEP);
545 				(void) strcpy(lnp->link, p->link);
546 				break;
547 			case AUTOFS_NONE:
548 				lnp->link = NULL;
549 				lnp->dir = NULL;
550 				break;
551 			default:
552 				auto_log(fngp, CE_WARN,
553 				    "auto_lookup_request: bad action type %d",
554 				    result.lu_res);
555 				error = ENOENT;
556 			}
557 			break;
558 		case AUTOFS_NOENT:
559 			error = ENOENT;
560 			break;
561 		default:
562 			error = ENOENT;
563 			auto_log(fngp, CE_WARN,
564 			    "auto_lookup_request: unknown result: %d",
565 			    result.lu_res);
566 			break;
567 		}
568 	}
569 
570 done:
571 	xdr_free(xdr_autofs_lookupres, (char *)&result);
572 
573 	AUTOFS_DPRINT((5, "auto_lookup_request: path=%s name=%s error=%d\n",
574 	    fnip->fi_path, key, error));
575 	return (error);
576 }
577 
578 static int
579 auto_mount_request(
580 	fninfo_t *fnip,
581 	char *key,
582 	action_list **alpp,
583 	cred_t *cred,
584 	bool_t hard)
585 {
586 	int error;
587 	struct autofs_globals *fngp;
588 	struct autofs_lookupargs request;
589 	struct autofs_mountres *result;
590 
591 	AUTOFS_DPRINT((4, "auto_mount_request: path=%s name=%s\n",
592 	    fnip->fi_path, key));
593 
594 	fngp = vntofn(fnip->fi_rootvp)->fn_globals;
595 	request.map = fnip->fi_map;
596 	request.path = fnip->fi_path;
597 
598 	if (fnip->fi_flags & MF_DIRECT)
599 		request.name = fnip->fi_key;
600 	else
601 		request.name = key;
602 	AUTOFS_DPRINT((4, "auto_mount_request: using key=%s\n", request.name));
603 
604 	request.subdir = fnip->fi_subdir;
605 	request.opts = fnip->fi_opts;
606 	request.isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
607 
608 	*alpp = NULL;
609 	result = kmem_zalloc(sizeof (*result), KM_SLEEP);
610 	error = auto_calldaemon(fnip, AUTOFS_MOUNT,
611 	    xdr_autofs_lookupargs, &request,
612 	    xdr_autofs_mountres, result,
613 	    cred, hard);
614 	if (!error) {
615 		fngp->fng_verbose = result->mr_verbose;
616 		switch (result->mr_type.status) {
617 		case AUTOFS_ACTION:
618 			error = 0;
619 			/*
620 			 * Save the action list since it is used by
621 			 * the caller. We NULL the action list pointer
622 			 * in 'result' so that xdr_free() will not free
623 			 * the list.
624 			 */
625 			*alpp = result->mr_type.mount_result_type_u.list;
626 			result->mr_type.mount_result_type_u.list = NULL;
627 			break;
628 		case AUTOFS_DONE:
629 			error = result->mr_type.mount_result_type_u.error;
630 			break;
631 		default:
632 			error = ENOENT;
633 			auto_log(fngp, CE_WARN,
634 			    "auto_mount_request: unknown status %d",
635 			    result->mr_type.status);
636 			break;
637 		}
638 	}
639 
640 	xdr_free(xdr_autofs_mountres, (char *)result);
641 	kmem_free(result, sizeof (*result));
642 
643 	AUTOFS_DPRINT((5, "auto_mount_request: path=%s name=%s error=%d\n",
644 	    fnip->fi_path, key, error));
645 	return (error);
646 }
647 
648 
649 static int
650 auto_send_unmount_request(
651 	fninfo_t *fnip,
652 	umntrequest *ul,
653 	cred_t *cred,
654 	bool_t hard)
655 {
656 	int error;
657 	umntres result;
658 
659 	AUTOFS_DPRINT((4, "\tauto_send_unmount_request: fstype=%s "
660 			" mntpnt=%s\n", ul->fstype, ul->mntpnt));
661 
662 	error = auto_calldaemon(fnip, AUTOFS_UNMOUNT,
663 	    xdr_umntrequest, ul,
664 	    xdr_umntres, &result,
665 	    cred, hard);
666 	if (!error)
667 		error = result.status;
668 
669 	AUTOFS_DPRINT((5, "\tauto_send_unmount_request: error=%d\n", error));
670 
671 	return (error);
672 }
673 
674 static int
675 auto_perform_link(fnnode_t *fnp, struct linka *linkp, cred_t *cred)
676 {
677 	vnode_t *vp;
678 	size_t len;
679 	char *tmp;
680 
681 	AUTOFS_DPRINT((3, "auto_perform_link: fnp=%p dir=%s link=%s\n",
682 	    (void *)fnp, linkp->dir, linkp->link));
683 
684 	len = strlen(linkp->link) + 1;		/* include '\0' */
685 	tmp = kmem_zalloc(len, KM_SLEEP);
686 	(void) kcopy(linkp->link, tmp, len);
687 	mutex_enter(&fnp->fn_lock);
688 	fnp->fn_symlink = tmp;
689 	fnp->fn_symlinklen = (uint_t)len;
690 	fnp->fn_flags |= MF_THISUID_MATCH_RQD;
691 	crhold(cred);
692 	fnp->fn_cred = cred;
693 	mutex_exit(&fnp->fn_lock);
694 
695 	vp = fntovn(fnp);
696 	vp->v_type = VLNK;
697 
698 	return (0);
699 }
700 
701 static boolean_t
702 auto_invalid_action(fninfo_t *dfnip, fnnode_t *dfnp, action_list *p)
703 {
704 	struct mounta *m;
705 	struct autofs_args *argsp;
706 	vnode_t *dvp;
707 	char buff[AUTOFS_MAXPATHLEN];
708 	size_t len;
709 	struct autofs_globals *fngp;
710 
711 	fngp = dfnp->fn_globals;
712 	dvp = fntovn(dfnp);
713 	/*
714 	 * Before we go any further, this better be a mount request.
715 	 */
716 	if (p->action.action != AUTOFS_MOUNT_RQ)
717 		return (B_TRUE);
718 	m = &p->action.action_list_entry_u.mounta;
719 	/*
720 	 * Make sure we aren't geting passed NULL values or a "dir" that
721 	 * isn't "." and doesn't begin with "./".
722 	 *
723 	 * We also only want to perform autofs mounts, so make sure
724 	 * no-one is trying to trick us into doing anything else.
725 	 */
726 	if (m->spec == NULL || m->dir == NULL || m->dir[0] != '.' ||
727 	    (m->dir[1] != '/' && m->dir[1] != '\0') ||
728 	    m->fstype == NULL || strcmp(m->fstype, "autofs") != 0 ||
729 	    m->dataptr == NULL || m->datalen != sizeof (struct autofs_args) ||
730 	    m->optptr == NULL)
731 		return (B_TRUE);
732 	/*
733 	 * We also don't like ".."s in the pathname.  Symlinks are
734 	 * handled by the fact that we'll use NOFOLLOW when we do
735 	 * lookup()s.
736 	 */
737 	if (strstr(m->dir, "/../") != NULL ||
738 	    (len = strlen(m->dir)) > sizeof ("/..") - 1 &&
739 	    m->dir[len] == '.' && m->dir[len - 1] == '.' &&
740 	    m->dir[len - 2] == '/')
741 		return (B_TRUE);
742 	argsp = (struct autofs_args *)m->dataptr;
743 	/*
744 	 * We don't want NULL values here either.
745 	 */
746 	if (argsp->addr.buf == NULL || argsp->path == NULL ||
747 	    argsp->opts == NULL || argsp->map == NULL || argsp->subdir == NULL)
748 		return (B_TRUE);
749 	/*
750 	 * We know what the claimed pathname *should* look like:
751 	 *
752 	 * If the parent (dfnp) is a mount point (VROOT), then
753 	 * the path should be (dfnip->fi_path + m->dir).
754 	 *
755 	 * Else, we know we're only two levels deep, so we use
756 	 * (dfnip->fi_path + dfnp->fn_name + m->dir).
757 	 *
758 	 * Furthermore, "." only makes sense if dfnp is a
759 	 * trigger node.
760 	 *
761 	 * At this point it seems like the passed-in path is
762 	 * redundant.
763 	 */
764 	if (dvp->v_flag & VROOT) {
765 		if (m->dir[1] == '\0' && !(dfnp->fn_flags & MF_TRIGGER))
766 			return (B_TRUE);
767 		(void) snprintf(buff, sizeof (buff), "%s%s",
768 		    dfnip->fi_path, m->dir + 1);
769 	} else {
770 		(void) snprintf(buff, sizeof (buff), "%s/%s%s",
771 		    dfnip->fi_path, dfnp->fn_name, m->dir + 1);
772 	}
773 	if (strcmp(argsp->path, buff) != 0) {
774 		auto_log(fngp, CE_WARN, "autofs: expected path of '%s', "
775 		    "got '%s' instead.", buff, argsp->path);
776 		return (B_TRUE);
777 	}
778 	return (B_FALSE); /* looks OK */
779 }
780 
781 static int
782 auto_perform_actions(
783 	fninfo_t *dfnip,
784 	fnnode_t *dfnp,
785 	action_list *alp,
786 	cred_t *cred)	/* Credentials of the caller */
787 {
788 	action_list *p;
789 	struct mounta *m, margs;
790 	struct autofs_args *argsp;
791 	int error, success = 0;
792 	vnode_t *mvp, *dvp, *newvp;
793 	fnnode_t *newfnp, *mfnp;
794 	int auto_mount = 0;
795 	int save_triggers = 0;		/* set when we need to save at least */
796 					/* one trigger node */
797 	int update_times = 0;
798 	char *mntpnt;
799 	char buff[AUTOFS_MAXPATHLEN];
800 	timestruc_t now;
801 	struct autofs_globals *fngp;
802 	cred_t *zcred;	/* kcred-like credentials limited by our zone */
803 
804 	AUTOFS_DPRINT((4, "auto_perform_actions: alp=%p\n", (void *)alp));
805 
806 	fngp = dfnp->fn_globals;
807 	dvp = fntovn(dfnp);
808 
809 	/*
810 	 * As automountd running in a zone may be compromised, and this may be
811 	 * an attack, we can't trust everything passed in by automountd, and we
812 	 * need to do argument verification.  We'll issue a warning and drop
813 	 * the request if it doesn't seem right.
814 	 */
815 	for (p = alp; p != NULL; p = p->next) {
816 		if (auto_invalid_action(dfnip, dfnp, p)) {
817 			/*
818 			 * This warning should be sent to the global zone,
819 			 * since presumably the zone administrator is the same
820 			 * as the attacker.
821 			 */
822 			cmn_err(CE_WARN, "autofs: invalid action list received "
823 			    "by automountd in zone %s.",
824 			    curproc->p_zone->zone_name);
825 			/*
826 			 * This conversation is over.
827 			 */
828 			xdr_free(xdr_action_list, (char *)alp);
829 			return (EINVAL);
830 		}
831 	}
832 
833 	zcred = zone_get_kcred(getzoneid());
834 	ASSERT(zcred != NULL);
835 
836 	if (vn_mountedvfs(dvp) != NULL) {
837 		/*
838 		 * The daemon successfully mounted a filesystem
839 		 * on the AUTOFS root node.
840 		 */
841 		mutex_enter(&dfnp->fn_lock);
842 		dfnp->fn_flags |= MF_MOUNTPOINT;
843 		ASSERT(dfnp->fn_dirents == NULL);
844 		mutex_exit(&dfnp->fn_lock);
845 		success++;
846 	} else {
847 		/*
848 		 * Clear MF_MOUNTPOINT.
849 		 */
850 		mutex_enter(&dfnp->fn_lock);
851 		if (dfnp->fn_flags & MF_MOUNTPOINT) {
852 			AUTOFS_DPRINT((10, "autofs: clearing mountpoint "
853 			    "flag on %s.", dfnp->fn_name));
854 			ASSERT(dfnp->fn_dirents == NULL);
855 			ASSERT(dfnp->fn_trigger == NULL);
856 		}
857 		dfnp->fn_flags &= ~MF_MOUNTPOINT;
858 		mutex_exit(&dfnp->fn_lock);
859 	}
860 
861 	for (p = alp; p != NULL; p = p->next) {
862 		vfs_t *vfsp;	/* dummy argument */
863 		vfs_t *mvfsp;
864 
865 		auto_mount = 0;
866 
867 		m = &p->action.action_list_entry_u.mounta;
868 		argsp = (struct autofs_args *)m->dataptr;
869 		/*
870 		 * use the parent directory's timeout since it's the
871 		 * one specified/inherited by automount.
872 		 */
873 		argsp->mount_to = dfnip->fi_mount_to;
874 		/*
875 		 * The mountpoint is relative, and it is guaranteed to
876 		 * begin with "."
877 		 *
878 		 */
879 		ASSERT(m->dir[0] == '.');
880 		if (m->dir[0] == '.' && m->dir[1] == '\0') {
881 			/*
882 			 * mounting on the trigger node
883 			 */
884 			mvp = dvp;
885 			VN_HOLD(mvp);
886 			goto mount;
887 		}
888 		/*
889 		 * ignore "./" in front of mountpoint
890 		 */
891 		ASSERT(m->dir[1] == '/');
892 		mntpnt = m->dir + 2;
893 
894 		AUTOFS_DPRINT((10, "\tdfnip->fi_path=%s\n", dfnip->fi_path));
895 		AUTOFS_DPRINT((10, "\tdfnip->fi_flags=%x\n", dfnip->fi_flags));
896 		AUTOFS_DPRINT((10, "\tmntpnt=%s\n", mntpnt));
897 
898 		if (dfnip->fi_flags & MF_DIRECT) {
899 			AUTOFS_DPRINT((10, "\tDIRECT\n"));
900 			(void) sprintf(buff, "%s/%s", dfnip->fi_path, mntpnt);
901 		} else {
902 			AUTOFS_DPRINT((10, "\tINDIRECT\n"));
903 			(void) sprintf(buff, "%s/%s/%s", dfnip->fi_path,
904 			    dfnp->fn_name, mntpnt);
905 		}
906 
907 		if (vn_mountedvfs(dvp) == NULL) {
908 			/*
909 			 * Daemon didn't mount anything on the root
910 			 * We have to create the mountpoint if it doesn't
911 			 * exist already
912 			 *
913 			 * We use the caller's credentials in case a UID-match
914 			 * is required (MF_THISUID_MATCH_RQD).
915 			 */
916 			rw_enter(&dfnp->fn_rwlock, RW_WRITER);
917 			error = auto_search(dfnp, mntpnt, &mfnp, cred);
918 			if (error == 0) {
919 				/*
920 				 * AUTOFS mountpoint exists
921 				 */
922 				if (vn_mountedvfs(fntovn(mfnp)) != NULL) {
923 					cmn_err(CE_PANIC,
924 					    "auto_perform_actions: "
925 					    "mfnp=%p covered", (void *)mfnp);
926 				}
927 			} else {
928 				/*
929 				 * Create AUTOFS mountpoint
930 				 */
931 				ASSERT((dfnp->fn_flags & MF_MOUNTPOINT) == 0);
932 				error = auto_enter(dfnp, mntpnt, &mfnp, cred);
933 				ASSERT(mfnp->fn_linkcnt == 1);
934 				mfnp->fn_linkcnt++;
935 			}
936 			if (!error)
937 				update_times = 1;
938 			rw_exit(&dfnp->fn_rwlock);
939 			ASSERT(error != EEXIST);
940 			if (!error) {
941 				/*
942 				 * mfnp is already held.
943 				 */
944 				mvp = fntovn(mfnp);
945 			} else {
946 				auto_log(fngp, CE_WARN, "autofs: mount of %s "
947 				    "failed - can't create mountpoint.", buff);
948 				continue;
949 			}
950 		} else {
951 			/*
952 			 * Find mountpoint in VFS mounted here. If not found,
953 			 * fail the submount, though the overall mount has
954 			 * succeeded since the root is mounted.
955 			 */
956 			if (error = auto_getmntpnt(dvp, mntpnt, &mvp, kcred)) {
957 				auto_log(fngp, CE_WARN, "autofs: mount of %s "
958 				    "failed - mountpoint doesn't exist.", buff);
959 				continue;
960 			}
961 			if (mvp->v_type == VLNK) {
962 				auto_log(fngp, CE_WARN, "autofs: %s symbolic "
963 				    "link: not a valid mountpoint "
964 				    "- mount failed", buff);
965 				VN_RELE(mvp);
966 				error = ENOENT;
967 				continue;
968 			}
969 		}
970 mount:
971 		m->flags |= MS_SYSSPACE | MS_OPTIONSTR;
972 		/*
973 		 * Copy mounta struct here so we can substitute a buffer
974 		 * that is large enough to hold the returned option string,
975 		 * if that string is longer that the input option string.
976 		 * This can happen if there are default options enabled
977 		 * that were not in the input option string.
978 		 */
979 		bcopy(m, &margs, sizeof (*m));
980 		margs.optptr = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
981 		margs.optlen = MAX_MNTOPT_STR;
982 		(void) strcpy(margs.optptr, m->optptr);
983 		margs.dir = argsp->path;
984 		/*
985 		 * We use the zone's kcred because we don't want the zone to be
986 		 * able to thus do something it wouldn't normally be able to.
987 		 */
988 		error = domount(NULL, &margs, mvp, zcred, &vfsp);
989 		kmem_free(margs.optptr, MAX_MNTOPT_STR);
990 		if (error != 0) {
991 			auto_log(fngp, CE_WARN,
992 			    "autofs: domount of %s failed error=%d",
993 			    buff, error);
994 			VN_RELE(mvp);
995 			continue;
996 		}
997 		VFS_RELE(vfsp);
998 
999 		/*
1000 		 * If mountpoint is an AUTOFS node, then I'm going to
1001 		 * flag it that the Filesystem mounted on top was mounted
1002 		 * in the kernel so that the unmount can be done inside the
1003 		 * kernel as well.
1004 		 * I don't care to flag non-AUTOFS mountpoints when an AUTOFS
1005 		 * in-kernel mount was done on top, because the unmount
1006 		 * routine already knows that such case was done in the kernel.
1007 		 */
1008 		if (vfs_matchops(dvp->v_vfsp, vfs_getops(mvp->v_vfsp))) {
1009 			mfnp = vntofn(mvp);
1010 			mutex_enter(&mfnp->fn_lock);
1011 			mfnp->fn_flags |= MF_IK_MOUNT;
1012 			mutex_exit(&mfnp->fn_lock);
1013 		}
1014 
1015 		(void) vn_vfsrlock_wait(mvp);
1016 		mvfsp = vn_mountedvfs(mvp);
1017 		if (mvfsp != NULL) {
1018 			error = VFS_ROOT(mvfsp, &newvp);
1019 			vn_vfsunlock(mvp);
1020 			if (error) {
1021 				/*
1022 				 * We've dropped the locks, so let's get
1023 				 * the mounted vfs again in case it changed.
1024 				 */
1025 				(void) vn_vfswlock_wait(mvp);
1026 				mvfsp = vn_mountedvfs(mvp);
1027 				if (mvfsp != NULL) {
1028 					error = dounmount(mvfsp, 0, CRED());
1029 					if (error) {
1030 						cmn_err(CE_WARN,
1031 						    "autofs: could not "
1032 						    "unmount vfs=%p",
1033 						(void *)mvfsp);
1034 					}
1035 				} else
1036 					vn_vfsunlock(mvp);
1037 				VN_RELE(mvp);
1038 				continue;
1039 			}
1040 		} else {
1041 			vn_vfsunlock(mvp);
1042 			VN_RELE(mvp);
1043 			continue;
1044 		}
1045 
1046 		auto_mount = vfs_matchops(dvp->v_vfsp,
1047 						vfs_getops(newvp->v_vfsp));
1048 		newfnp = vntofn(newvp);
1049 		newfnp->fn_parent = dfnp;
1050 
1051 		/*
1052 		 * At this time we want to save the AUTOFS filesystem as
1053 		 * a trigger node. (We only do this if the mount occured
1054 		 * on a node different from the root.
1055 		 * We look at the trigger nodes during
1056 		 * the automatic unmounting to make sure we remove them
1057 		 * as a unit and remount them as a unit if the filesystem
1058 		 * mounted at the root could not be unmounted.
1059 		 */
1060 		if (auto_mount && (error == 0) && (mvp != dvp)) {
1061 			save_triggers++;
1062 			/*
1063 			 * Add AUTOFS mount to hierarchy
1064 			 */
1065 			newfnp->fn_flags |= MF_TRIGGER;
1066 			rw_enter(&newfnp->fn_rwlock, RW_WRITER);
1067 			newfnp->fn_next = dfnp->fn_trigger;
1068 			rw_exit(&newfnp->fn_rwlock);
1069 			rw_enter(&dfnp->fn_rwlock, RW_WRITER);
1070 			dfnp->fn_trigger = newfnp;
1071 			rw_exit(&dfnp->fn_rwlock);
1072 			/*
1073 			 * Don't VN_RELE(newvp) here since dfnp now holds
1074 			 * reference to it as its trigger node.
1075 			 */
1076 			AUTOFS_DPRINT((10, "\tadding trigger %s to %s\n",
1077 			    newfnp->fn_name, dfnp->fn_name));
1078 			AUTOFS_DPRINT((10, "\tfirst trigger is %s\n",
1079 			    dfnp->fn_trigger->fn_name));
1080 			if (newfnp->fn_next != NULL)
1081 				AUTOFS_DPRINT((10, "\tnext trigger is %s\n",
1082 				    newfnp->fn_next->fn_name));
1083 			else
1084 				AUTOFS_DPRINT((10, "\tno next trigger\n"));
1085 		} else
1086 			VN_RELE(newvp);
1087 
1088 		if (!error)
1089 			success++;
1090 
1091 		if (update_times) {
1092 			gethrestime(&now);
1093 			dfnp->fn_atime = dfnp->fn_mtime = now;
1094 		}
1095 
1096 		VN_RELE(mvp);
1097 	}
1098 
1099 	if (save_triggers) {
1100 		/*
1101 		 * Make sure the parent can't be freed while it has triggers.
1102 		 */
1103 		VN_HOLD(dvp);
1104 	}
1105 
1106 	crfree(zcred);
1107 
1108 done:
1109 	/*
1110 	 * Return failure if daemon didn't mount anything, and all
1111 	 * kernel mounts attempted failed.
1112 	 */
1113 	error = success ? 0 : ENOENT;
1114 
1115 	if (alp != NULL) {
1116 		if ((error == 0) && save_triggers) {
1117 			/*
1118 			 * Save action_list information, so that we can use it
1119 			 * when it comes time to remount the trigger nodes
1120 			 * The action list is freed when the directory node
1121 			 * containing the reference to it is unmounted in
1122 			 * unmount_tree().
1123 			 */
1124 			mutex_enter(&dfnp->fn_lock);
1125 			ASSERT(dfnp->fn_alp == NULL);
1126 			dfnp->fn_alp = alp;
1127 			mutex_exit(&dfnp->fn_lock);
1128 		} else {
1129 			/*
1130 			 * free the action list now,
1131 			 */
1132 			xdr_free(xdr_action_list, (char *)alp);
1133 		}
1134 	}
1135 
1136 	AUTOFS_DPRINT((5, "auto_perform_actions: error=%d\n", error));
1137 	return (error);
1138 }
1139 
1140 fnnode_t *
1141 auto_makefnnode(
1142 	vtype_t type,
1143 	vfs_t *vfsp,
1144 	char *name,
1145 	cred_t *cred,
1146 	struct autofs_globals *fngp)
1147 {
1148 	fnnode_t *fnp;
1149 	vnode_t *vp;
1150 	char *tmpname;
1151 	timestruc_t now;
1152 	/*
1153 	 * autofs uses odd inode numbers
1154 	 * automountd uses even inode numbers
1155 	 *
1156 	 * To preserve the age-old semantics that inum+devid is unique across
1157 	 * the system, this variable must be global across zones.
1158 	 */
1159 	static ino_t nodeid = 3;
1160 
1161 	fnp = kmem_zalloc(sizeof (*fnp), KM_SLEEP);
1162 	fnp->fn_vnode = vn_alloc(KM_SLEEP);
1163 
1164 	vp = fntovn(fnp);
1165 	tmpname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1166 	(void) strcpy(tmpname, name);
1167 	fnp->fn_name = &tmpname[0];
1168 	fnp->fn_namelen = (int)strlen(tmpname) + 1;	/* include '\0' */
1169 	fnp->fn_uid = crgetuid(cred);
1170 	fnp->fn_gid = crgetgid(cred);
1171 	/*
1172 	 * ".." is added in auto_enter and auto_mount.
1173 	 * "." is added in auto_mkdir and auto_mount.
1174 	 */
1175 	/*
1176 	 * Note that fn_size and fn_linkcnt are already 0 since
1177 	 * we used kmem_zalloc to allocated fnp
1178 	 */
1179 	fnp->fn_mode = AUTOFS_MODE;
1180 	gethrestime(&now);
1181 	fnp->fn_atime = fnp->fn_mtime = fnp->fn_ctime = now;
1182 	fnp->fn_ref_time = now.tv_sec;
1183 	mutex_enter(&autofs_nodeid_lock);
1184 	fnp->fn_nodeid = nodeid;
1185 	nodeid += 2;
1186 	fnp->fn_globals = fngp;
1187 	fngp->fng_fnnode_count++;
1188 	mutex_exit(&autofs_nodeid_lock);
1189 	vn_setops(vp, auto_vnodeops);
1190 	vp->v_type = type;
1191 	vp->v_data = (void *)fnp;
1192 	vp->v_vfsp = vfsp;
1193 	mutex_init(&fnp->fn_lock, NULL, MUTEX_DEFAULT, NULL);
1194 	rw_init(&fnp->fn_rwlock, NULL, RW_DEFAULT, NULL);
1195 	cv_init(&fnp->fn_cv_mount, NULL, CV_DEFAULT, NULL);
1196 	vn_exists(vp);
1197 	return (fnp);
1198 }
1199 
1200 
1201 void
1202 auto_freefnnode(fnnode_t *fnp)
1203 {
1204 	vnode_t *vp = fntovn(fnp);
1205 
1206 	AUTOFS_DPRINT((4, "auto_freefnnode: fnp=%p\n", (void *)fnp));
1207 
1208 	ASSERT(fnp->fn_linkcnt == 0);
1209 	ASSERT(vp->v_count == 0);
1210 	ASSERT(fnp->fn_dirents == NULL);
1211 	ASSERT(fnp->fn_parent == NULL);
1212 
1213 	vn_invalid(vp);
1214 	kmem_free(fnp->fn_name, fnp->fn_namelen);
1215 	if (fnp->fn_symlink) {
1216 		ASSERT(fnp->fn_flags & MF_THISUID_MATCH_RQD);
1217 		kmem_free(fnp->fn_symlink, fnp->fn_symlinklen);
1218 	}
1219 	if (fnp->fn_cred)
1220 		crfree(fnp->fn_cred);
1221 	mutex_destroy(&fnp->fn_lock);
1222 	rw_destroy(&fnp->fn_rwlock);
1223 	cv_destroy(&fnp->fn_cv_mount);
1224 	vn_free(vp);
1225 
1226 	mutex_enter(&autofs_nodeid_lock);
1227 	fnp->fn_globals->fng_fnnode_count--;
1228 	mutex_exit(&autofs_nodeid_lock);
1229 	kmem_free(fnp, sizeof (*fnp));
1230 }
1231 
1232 void
1233 auto_disconnect(
1234 	fnnode_t *dfnp,
1235 	fnnode_t *fnp)
1236 {
1237 	fnnode_t *tmp, **fnpp;
1238 	vnode_t *vp = fntovn(fnp);
1239 	timestruc_t now;
1240 
1241 	AUTOFS_DPRINT((4,
1242 	    "auto_disconnect: dfnp=%p fnp=%p linkcnt=%d\n v_count=%d",
1243 	    (void *)dfnp, (void *)fnp, fnp->fn_linkcnt, vp->v_count));
1244 
1245 	ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock));
1246 	ASSERT(fnp->fn_linkcnt == 1);
1247 
1248 	if (vn_mountedvfs(vp) != NULL) {
1249 		cmn_err(CE_PANIC, "auto_disconnect: vp %p mounted on",
1250 		    (void *)vp);
1251 	}
1252 
1253 	/*
1254 	 * Decrement by 1 because we're removing the entry in dfnp.
1255 	 */
1256 	fnp->fn_linkcnt--;
1257 	fnp->fn_size--;
1258 
1259 	/*
1260 	 * only changed while holding parent's (dfnp) rw_lock
1261 	 */
1262 	fnp->fn_parent = NULL;
1263 
1264 	fnpp = &dfnp->fn_dirents;
1265 	for (;;) {
1266 		tmp = *fnpp;
1267 		if (tmp == NULL) {
1268 			cmn_err(CE_PANIC,
1269 			    "auto_disconnect: %p not in %p dirent list",
1270 			    (void *)fnp, (void *)dfnp);
1271 		}
1272 		if (tmp == fnp) {
1273 			*fnpp = tmp->fn_next; 	/* remove it from the list */
1274 			ASSERT(vp->v_count == 0);
1275 			/* child had a pointer to parent ".." */
1276 			dfnp->fn_linkcnt--;
1277 			dfnp->fn_size--;
1278 			break;
1279 		}
1280 		fnpp = &tmp->fn_next;
1281 	}
1282 
1283 	mutex_enter(&fnp->fn_lock);
1284 	gethrestime(&now);
1285 	fnp->fn_atime = fnp->fn_mtime = now;
1286 	mutex_exit(&fnp->fn_lock);
1287 
1288 	AUTOFS_DPRINT((5, "auto_disconnect: done\n"));
1289 }
1290 
1291 int
1292 auto_enter(fnnode_t *dfnp, char *name, fnnode_t **fnpp, cred_t *cred)
1293 {
1294 	struct fnnode *cfnp, **spp;
1295 	vnode_t *dvp = fntovn(dfnp);
1296 	ushort_t offset = 0;
1297 	ushort_t diff;
1298 
1299 	AUTOFS_DPRINT((4, "auto_enter: dfnp=%p, name=%s ", (void *)dfnp, name));
1300 
1301 	ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock));
1302 
1303 	cfnp = dfnp->fn_dirents;
1304 	if (cfnp == NULL) {
1305 		/*
1306 		 * offset = 0 for '.' and offset = 1 for '..'
1307 		 */
1308 		spp = &dfnp->fn_dirents;
1309 		offset = 2;
1310 	}
1311 
1312 	for (; cfnp; cfnp = cfnp->fn_next) {
1313 		if (strcmp(cfnp->fn_name, name) == 0) {
1314 			mutex_enter(&cfnp->fn_lock);
1315 			if (cfnp->fn_flags & MF_THISUID_MATCH_RQD) {
1316 				/*
1317 				 * "thisuser" kind of node, need to
1318 				 * match CREDs as well
1319 				 */
1320 				mutex_exit(&cfnp->fn_lock);
1321 				if (crcmp(cfnp->fn_cred, cred) == 0)
1322 					return (EEXIST);
1323 			} else {
1324 				mutex_exit(&cfnp->fn_lock);
1325 				return (EEXIST);
1326 			}
1327 		}
1328 
1329 		if (cfnp->fn_next != NULL) {
1330 			diff = (ushort_t)
1331 			    (cfnp->fn_next->fn_offset - cfnp->fn_offset);
1332 			ASSERT(diff != 0);
1333 			if (diff > 1 && offset == 0) {
1334 				offset = (ushort_t)cfnp->fn_offset + 1;
1335 				spp = &cfnp->fn_next;
1336 			}
1337 		} else if (offset == 0) {
1338 			offset = (ushort_t)cfnp->fn_offset + 1;
1339 			spp = &cfnp->fn_next;
1340 		}
1341 	}
1342 
1343 	*fnpp = auto_makefnnode(VDIR, dvp->v_vfsp, name, cred,
1344 	    dfnp->fn_globals);
1345 	if (*fnpp == NULL)
1346 		return (ENOMEM);
1347 
1348 	/*
1349 	 * I don't hold the mutex on fnpp because I created it, and
1350 	 * I'm already holding the writers lock for it's parent
1351 	 * directory, therefore nobody can reference it without me first
1352 	 * releasing the writers lock.
1353 	 */
1354 	(*fnpp)->fn_offset = offset;
1355 	(*fnpp)->fn_next = *spp;
1356 	*spp = *fnpp;
1357 	(*fnpp)->fn_parent = dfnp;
1358 	(*fnpp)->fn_linkcnt++;	/* parent now holds reference to entry */
1359 	(*fnpp)->fn_size++;
1360 
1361 	/*
1362 	 * dfnp->fn_linkcnt and dfnp->fn_size protected by dfnp->rw_lock
1363 	 */
1364 	dfnp->fn_linkcnt++;	/* child now holds reference to parent '..' */
1365 	dfnp->fn_size++;
1366 
1367 	dfnp->fn_ref_time = gethrestime_sec();
1368 
1369 	AUTOFS_DPRINT((5, "*fnpp=%p\n", (void *)*fnpp));
1370 	return (0);
1371 }
1372 
1373 int
1374 auto_search(fnnode_t *dfnp, char *name, fnnode_t **fnpp, cred_t *cred)
1375 {
1376 	vnode_t *dvp;
1377 	fnnode_t *p;
1378 	int error = ENOENT, match = 0;
1379 
1380 	AUTOFS_DPRINT((4, "auto_search: dfnp=%p, name=%s...\n",
1381 	    (void *)dfnp, name));
1382 
1383 	dvp = fntovn(dfnp);
1384 	if (dvp->v_type != VDIR) {
1385 		cmn_err(CE_PANIC, "auto_search: dvp=%p not a directory",
1386 		    (void *)dvp);
1387 	}
1388 
1389 	ASSERT(RW_LOCK_HELD(&dfnp->fn_rwlock));
1390 	for (p = dfnp->fn_dirents; p != NULL; p = p->fn_next) {
1391 		if (strcmp(p->fn_name, name) == 0) {
1392 			mutex_enter(&p->fn_lock);
1393 			if (p->fn_flags & MF_THISUID_MATCH_RQD) {
1394 				/*
1395 				 * "thisuser" kind of node
1396 				 * Need to match CREDs as well
1397 				 */
1398 				mutex_exit(&p->fn_lock);
1399 				match = crcmp(p->fn_cred, cred) == 0;
1400 			} else {
1401 				/*
1402 				 * No need to check CRED
1403 				 */
1404 				mutex_exit(&p->fn_lock);
1405 				match = 1;
1406 			}
1407 		}
1408 		if (match) {
1409 			error = 0;
1410 			if (fnpp) {
1411 				*fnpp = p;
1412 				VN_HOLD(fntovn(*fnpp));
1413 			}
1414 			break;
1415 		}
1416 	}
1417 
1418 	AUTOFS_DPRINT((5, "auto_search: error=%d\n", error));
1419 	return (error);
1420 }
1421 
1422 /*
1423  * If dvp is mounted on, get path's vnode in the mounted on
1424  * filesystem.  Path is relative to dvp, ie "./path".
1425  * If successful, *mvp points to a the held mountpoint vnode.
1426  */
1427 /* ARGSUSED */
1428 static int
1429 auto_getmntpnt(
1430 	vnode_t *dvp,
1431 	char *path,
1432 	vnode_t **mvpp,		/* vnode for mountpoint */
1433 	cred_t *cred)
1434 {
1435 	int error = 0;
1436 	vnode_t *newvp;
1437 	char namebuf[TYPICALMAXPATHLEN];
1438 	struct pathname lookpn;
1439 	vfs_t *vfsp;
1440 
1441 	AUTOFS_DPRINT((4, "auto_getmntpnt: path=%s\n", path));
1442 
1443 	if (error = vn_vfsrlock_wait(dvp))
1444 		return (error);
1445 
1446 	/*
1447 	 * Now that we have the vfswlock, check to see if dvp
1448 	 * is still mounted on.  If not, then just bail out as
1449 	 * there is no need to remount the triggers since the
1450 	 * higher level mount point has gotten unmounted.
1451 	 */
1452 	vfsp = vn_mountedvfs(dvp);
1453 	if (vfsp == NULL) {
1454 		vn_vfsunlock(dvp);
1455 		error = EBUSY;
1456 		goto done;
1457 	}
1458 	/*
1459 	 * Since mounted on, lookup "path" in the new filesystem,
1460 	 * it is important that we do the filesystem jump here to
1461 	 * avoid lookuppn() calling auto_lookup on dvp and deadlock.
1462 	 */
1463 	error = VFS_ROOT(vfsp, &newvp);
1464 	vn_vfsunlock(dvp);
1465 	if (error)
1466 		goto done;
1467 
1468 	/*
1469 	 * We do a VN_HOLD on newvp just in case the first call to
1470 	 * lookuppnvp() fails with ENAMETOOLONG.  We should still have a
1471 	 * reference to this vnode for the second call to lookuppnvp().
1472 	 */
1473 	VN_HOLD(newvp);
1474 
1475 	/*
1476 	 * Now create the pathname struct so we can make use of lookuppnvp,
1477 	 * and pn_getcomponent.
1478 	 * This code is similar to lookupname() in fs/lookup.c.
1479 	 */
1480 	error = pn_get_buf(path, UIO_SYSSPACE, &lookpn,
1481 		namebuf, sizeof (namebuf));
1482 	if (error == 0) {
1483 		error = lookuppnvp(&lookpn, NULL, NO_FOLLOW, NULLVPP,
1484 		    mvpp, rootdir, newvp, cred);
1485 	} else
1486 		VN_RELE(newvp);
1487 	if (error == ENAMETOOLONG) {
1488 		/*
1489 		 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
1490 		 * newvp is VN_RELE'd by this call to lookuppnvp.
1491 		 *
1492 		 * Using 'rootdir' in a zone's context is OK here: we already
1493 		 * ascertained that there are no '..'s in the path, and we're
1494 		 * not following symlinks.
1495 		 */
1496 		if ((error = pn_get(path, UIO_SYSSPACE, &lookpn)) == 0) {
1497 			error = lookuppnvp(&lookpn, NULL, NO_FOLLOW, NULLVPP,
1498 			    mvpp, rootdir, newvp, cred);
1499 			pn_free(&lookpn);
1500 		} else
1501 			VN_RELE(newvp);
1502 	} else {
1503 		/*
1504 		 * Need to release newvp here since we held it.
1505 		 */
1506 		VN_RELE(newvp);
1507 	}
1508 
1509 done:
1510 	AUTOFS_DPRINT((5, "auto_getmntpnt: path=%s *mvpp=%p error=%d\n",
1511 	    path, (void *)*mvpp, error));
1512 	return (error);
1513 }
1514 
1515 #define	DEEPER(x) (((x)->fn_dirents != NULL) || \
1516 			(vn_mountedvfs(fntovn((x)))) != NULL)
1517 
1518 /*
1519  * The caller, should have already VN_RELE'd its reference to the
1520  * root vnode of this filesystem.
1521  */
1522 static int
1523 auto_inkernel_unmount(vfs_t *vfsp)
1524 {
1525 	vnode_t *cvp = vfsp->vfs_vnodecovered;
1526 	int error;
1527 
1528 	AUTOFS_DPRINT((4,
1529 	    "auto_inkernel_unmount: devid=%lx mntpnt(%p) count %u\n",
1530 	    vfsp->vfs_dev, (void *)cvp, cvp->v_count));
1531 
1532 	ASSERT(vn_vfswlock_held(cvp));
1533 
1534 	/*
1535 	 * Perform the unmount
1536 	 * The mountpoint has already been locked by the caller.
1537 	 */
1538 	error = dounmount(vfsp, 0, kcred);
1539 
1540 	AUTOFS_DPRINT((5, "auto_inkernel_unmount: exit count %u\n",
1541 	    cvp->v_count));
1542 	return (error);
1543 }
1544 
1545 /*
1546  * unmounts trigger nodes in the kernel.
1547  */
1548 static void
1549 unmount_triggers(fnnode_t *fnp, action_list **alp)
1550 {
1551 	fnnode_t *tp, *next;
1552 	int error = 0;
1553 	vfs_t *vfsp;
1554 	vnode_t *tvp;
1555 
1556 	AUTOFS_DPRINT((4, "unmount_triggers: fnp=%p\n", (void *)fnp));
1557 	ASSERT(RW_WRITE_HELD(&fnp->fn_rwlock));
1558 
1559 	*alp = fnp->fn_alp;
1560 	next = fnp->fn_trigger;
1561 	while ((tp = next) != NULL) {
1562 		tvp = fntovn(tp);
1563 		ASSERT(tvp->v_count >= 2);
1564 		next = tp->fn_next;
1565 		/*
1566 		 * drop writer's lock since the unmount will end up
1567 		 * disconnecting this node from fnp and needs to acquire
1568 		 * the writer's lock again.
1569 		 * next has at least a reference count >= 2 since it's
1570 		 * a trigger node, therefore can not be accidentally freed
1571 		 * by a VN_RELE
1572 		 */
1573 		rw_exit(&fnp->fn_rwlock);
1574 
1575 		vfsp = tvp->v_vfsp;
1576 
1577 		/*
1578 		 * Its parent was holding a reference to it, since this
1579 		 * is a trigger vnode.
1580 		 */
1581 		VN_RELE(tvp);
1582 		if (error = auto_inkernel_unmount(vfsp)) {
1583 			cmn_err(CE_PANIC, "unmount_triggers: "
1584 			    "unmount of vp=%p failed error=%d",
1585 			    (void *)tvp, error);
1586 		}
1587 		/*
1588 		 * reacquire writer's lock
1589 		 */
1590 		rw_enter(&fnp->fn_rwlock, RW_WRITER);
1591 	}
1592 
1593 	/*
1594 	 * We were holding a reference to our parent.  Drop that.
1595 	 */
1596 	VN_RELE(fntovn(fnp));
1597 	fnp->fn_trigger = NULL;
1598 	fnp->fn_alp = NULL;
1599 
1600 	AUTOFS_DPRINT((5, "unmount_triggers: finished\n"));
1601 }
1602 
1603 /*
1604  * This routine locks the mountpoint of every trigger node if they're
1605  * not busy, or returns EBUSY if any node is busy. If a trigger node should
1606  * be unmounted first, then it sets nfnp to point to it, otherwise nfnp
1607  * points to NULL.
1608  */
1609 static int
1610 triggers_busy(fnnode_t *fnp, fnnode_t **nfnp)
1611 {
1612 	int error = 0, done;
1613 	int lck_error = 0;
1614 	fnnode_t *tp, *t1p;
1615 	vfs_t *vfsp;
1616 
1617 	ASSERT(RW_WRITE_HELD(&fnp->fn_rwlock));
1618 
1619 	*nfnp = NULL;
1620 	for (tp = fnp->fn_trigger; tp != NULL; tp = tp->fn_next) {
1621 		AUTOFS_DPRINT((10, "\ttrigger: %s\n", tp->fn_name));
1622 		vfsp = fntovn(tp)->v_vfsp;
1623 		error = 0;
1624 		/*
1625 		 * The vn_vfsunlock will be done in auto_inkernel_unmount.
1626 		 */
1627 		lck_error = vn_vfswlock(vfsp->vfs_vnodecovered);
1628 		if (lck_error == 0) {
1629 			mutex_enter(&tp->fn_lock);
1630 			ASSERT((tp->fn_flags & MF_LOOKUP) == 0);
1631 			if (tp->fn_flags & MF_INPROG) {
1632 				/*
1633 				 * a mount is in progress
1634 				 */
1635 				error = EBUSY;
1636 			}
1637 			mutex_exit(&tp->fn_lock);
1638 		}
1639 		if (lck_error || error || DEEPER(tp) ||
1640 		    ((fntovn(tp))->v_count) > 2) {
1641 			/*
1642 			 * couldn't lock it because it's busy,
1643 			 * It is mounted on or has dirents?
1644 			 * If reference count is greater than two, then
1645 			 * somebody else is holding a reference to this vnode.
1646 			 * One reference is for the mountpoint, and the second
1647 			 * is for the trigger node.
1648 			 */
1649 			AUTOFS_DPRINT((10, "\ttrigger busy\n"));
1650 			if ((lck_error == 0) && (error == 0)) {
1651 				*nfnp = tp;
1652 				/*
1653 				 * The matching VN_RELE is done in
1654 				 * unmount_tree().
1655 				 */
1656 				VN_HOLD(fntovn(*nfnp));
1657 			}
1658 			/*
1659 			 * Unlock previously locked mountpoints
1660 			 */
1661 			for (done = 0, t1p = fnp->fn_trigger; !done;
1662 			    t1p = t1p->fn_next) {
1663 				/*
1664 				 * Unlock all nodes previously
1665 				 * locked. All nodes up to 'tp'
1666 				 * were successfully locked. If 'lck_err' is
1667 				 * set, then 'tp' was not locked, and thus
1668 				 * should not be unlocked. If
1669 				 * 'lck_err' is not set, then 'tp' was
1670 				 * successfully locked, and it should
1671 				 * be unlocked.
1672 				 */
1673 				if (t1p != tp || !lck_error) {
1674 					vfsp = fntovn(t1p)->v_vfsp;
1675 					vn_vfsunlock(vfsp->vfs_vnodecovered);
1676 				}
1677 				done = (t1p == tp);
1678 			}
1679 			error = EBUSY;
1680 			break;
1681 		}
1682 	}
1683 
1684 	AUTOFS_DPRINT((4, "triggers_busy: error=%d\n", error));
1685 	return (error);
1686 }
1687 
1688 /*
1689  * Unlock previously locked trigger nodes.
1690  */
1691 static int
1692 triggers_unlock(fnnode_t *fnp)
1693 {
1694 	fnnode_t *tp;
1695 	vfs_t *vfsp;
1696 
1697 	ASSERT(RW_WRITE_HELD(&fnp->fn_rwlock));
1698 
1699 	for (tp = fnp->fn_trigger; tp != NULL; tp = tp->fn_next) {
1700 		AUTOFS_DPRINT((10, "\tunlock trigger: %s\n", tp->fn_name));
1701 		vfsp = fntovn(tp)->v_vfsp;
1702 		vn_vfsunlock(vfsp->vfs_vnodecovered);
1703 	}
1704 
1705 	return (0);
1706 }
1707 
1708 /*
1709  * It is the caller's responsibility to grab the VVFSLOCK.
1710  * Releases the VVFSLOCK upon return.
1711  */
1712 static int
1713 unmount_node(vnode_t *cvp, int force)
1714 {
1715 	int error = 0;
1716 	fnnode_t *cfnp;
1717 	vfs_t *vfsp;
1718 	umntrequest ul;
1719 	fninfo_t *fnip;
1720 
1721 	AUTOFS_DPRINT((4, "\tunmount_node cvp=%p\n", (void *)cvp));
1722 
1723 	ASSERT(vn_vfswlock_held(cvp));
1724 	cfnp = vntofn(cvp);
1725 	vfsp = vn_mountedvfs(cvp);
1726 
1727 	if (force || cfnp->fn_flags & MF_IK_MOUNT) {
1728 		/*
1729 		 * Mount was performed in the kernel, so
1730 		 * do an in-kernel unmount. auto_inkernel_unmount()
1731 		 * will vn_vfsunlock(cvp).
1732 		 */
1733 		error = auto_inkernel_unmount(vfsp);
1734 	} else {
1735 		zone_t *zone = NULL;
1736 		refstr_t *mntpt, *resource;
1737 		size_t mntoptslen;
1738 
1739 		/*
1740 		 * Get the mnttab information of the node
1741 		 * and ask the daemon to unmount it.
1742 		 */
1743 		bzero(&ul, sizeof (ul));
1744 		mntfs_getmntopts(vfsp, &ul.mntopts, &mntoptslen);
1745 		if (ul.mntopts == NULL) {
1746 			auto_log(cfnp->fn_globals, CE_WARN, "unmount_node: "
1747 			    "no memory");
1748 			vn_vfsunlock(cvp);
1749 			error = ENOMEM;
1750 			goto done;
1751 		}
1752 		if (mntoptslen > AUTOFS_MAXOPTSLEN)
1753 			ul.mntopts[AUTOFS_MAXOPTSLEN - 1] = '\0';
1754 
1755 		mntpt = vfs_getmntpoint(vfsp);
1756 		ul.mntpnt = (char *)refstr_value(mntpt);
1757 		resource = vfs_getresource(vfsp);
1758 		ul.mntresource = (char *)refstr_value(resource);
1759 
1760 		fnip = vfstofni(cvp->v_vfsp);
1761 		ul.isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
1762 
1763 		/*
1764 		 * Since a zone'd automountd's view of the autofs mount points
1765 		 * differs from those in the kernel, we need to make sure we
1766 		 * give it consistent mount points.
1767 		 */
1768 		ASSERT(fnip->fi_zoneid == getzoneid());
1769 		zone = curproc->p_zone;
1770 
1771 		if (fnip->fi_zoneid != GLOBAL_ZONEID) {
1772 			if (ZONE_PATH_VISIBLE(ul.mntpnt, zone)) {
1773 				ul.mntpnt =
1774 				    ZONE_PATH_TRANSLATE(ul.mntpnt, zone);
1775 			}
1776 			if (ZONE_PATH_VISIBLE(ul.mntresource, zone)) {
1777 				ul.mntresource =
1778 				    ZONE_PATH_TRANSLATE(ul.mntresource, zone);
1779 			}
1780 		}
1781 		ul.fstype = vfssw[vfsp->vfs_fstype].vsw_name;
1782 		vn_vfsunlock(cvp);
1783 
1784 		error = auto_send_unmount_request(fnip, &ul, CRED(), FALSE);
1785 		kmem_free(ul.mntopts, mntoptslen);
1786 		refstr_rele(mntpt);
1787 		refstr_rele(resource);
1788 	}
1789 
1790 done:
1791 	AUTOFS_DPRINT((5, "\tunmount_node cvp=%p error=%d\n", (void *)cvp,
1792 	    error));
1793 	return (error);
1794 }
1795 
1796 /*
1797  * vp is the "root" of the AUTOFS filesystem.
1798  * return EBUSY if any thread is holding a reference to this vnode
1799  * other than us.
1800  */
1801 static int
1802 check_auto_node(vnode_t *vp)
1803 {
1804 	fnnode_t *fnp;
1805 	int error = 0;
1806 	/*
1807 	 * number of references to expect for
1808 	 * a non-busy vnode.
1809 	 */
1810 	uint_t count;
1811 
1812 	AUTOFS_DPRINT((4, "\tcheck_auto_node vp=%p ", (void *)vp));
1813 	fnp = vntofn(vp);
1814 	ASSERT(fnp->fn_flags & MF_INPROG);
1815 	ASSERT((fnp->fn_flags & MF_LOOKUP) == 0);
1816 
1817 	count = 1;		/* we are holding a reference to vp */
1818 	if (fnp->fn_flags & MF_TRIGGER) {
1819 		/*
1820 		 * parent holds a pointer to us (trigger)
1821 		 */
1822 		count++;
1823 	}
1824 	if (fnp->fn_trigger != NULL) {
1825 		/*
1826 		 * The trigger nodes have a hold on us.
1827 		 */
1828 		count++;
1829 	}
1830 	mutex_enter(&vp->v_lock);
1831 	if (vp->v_flag & VROOT)
1832 		count++;
1833 	ASSERT(vp->v_count > 0);
1834 	AUTOFS_DPRINT((10, "\tcount=%u ", vp->v_count));
1835 	if (vp->v_count > count)
1836 		error = EBUSY;
1837 	mutex_exit(&vp->v_lock);
1838 
1839 	AUTOFS_DPRINT((5, "\tcheck_auto_node error=%d ", error));
1840 	return (error);
1841 }
1842 
1843 /*
1844  * rootvp is the root of the AUTOFS filesystem.
1845  * If rootvp is busy (v_count > 1) returns EBUSY.
1846  * else removes every vnode under this tree.
1847  * ASSUMPTION: Assumes that the only node which can be busy is
1848  * the root vnode. This filesystem better be two levels deep only,
1849  * the root and its immediate subdirs.
1850  * The daemon will "AUTOFS direct-mount" only one level below the root.
1851  */
1852 static int
1853 unmount_autofs(vnode_t *rootvp)
1854 {
1855 	fnnode_t *fnp, *rootfnp, *nfnp;
1856 	int error;
1857 
1858 	AUTOFS_DPRINT((4, "\tunmount_autofs rootvp=%p ", (void *)rootvp));
1859 
1860 	error = check_auto_node(rootvp);
1861 	if (error == 0) {
1862 		/*
1863 		 * Remove all its immediate subdirectories.
1864 		 */
1865 		rootfnp = vntofn(rootvp);
1866 		rw_enter(&rootfnp->fn_rwlock, RW_WRITER);
1867 		nfnp = NULL;	/* lint clean */
1868 		for (fnp = rootfnp->fn_dirents; fnp != NULL; fnp = nfnp) {
1869 			ASSERT(fntovn(fnp)->v_count == 0);
1870 			ASSERT(fnp->fn_dirents == NULL);
1871 			ASSERT(fnp->fn_linkcnt == 2);
1872 			fnp->fn_linkcnt--;
1873 			auto_disconnect(rootfnp, fnp);
1874 			nfnp = fnp->fn_next;
1875 			auto_freefnnode(fnp);
1876 		}
1877 		rw_exit(&rootfnp->fn_rwlock);
1878 	}
1879 	AUTOFS_DPRINT((5, "\tunmount_autofs error=%d ", error));
1880 	return (error);
1881 }
1882 
1883 /*
1884  * max number of unmount threads running
1885  */
1886 static int autofs_unmount_threads = 5;
1887 
1888 /*
1889  * XXX unmount_tree() is not suspend-safe within the scope of
1890  * the present model defined for cpr to suspend the system. Calls made
1891  * by the unmount_tree() that have been identified to be unsafe are
1892  * (1) RPC client handle setup and client calls to automountd which can
1893  * block deep down in the RPC library, (2) kmem_alloc() calls with the
1894  * KM_SLEEP flag which can block if memory is low, and (3) VFS_*() and
1895  * VOP_*() calls which can result in over the wire calls to servers.
1896  * The thread should be completely reevaluated to make it suspend-safe in
1897  * case of future updates to the cpr model.
1898  */
1899 void
1900 unmount_tree(struct autofs_globals *fngp, int force)
1901 {
1902 	vnode_t *vp, *newvp;
1903 	vfs_t *vfsp;
1904 	fnnode_t *fnp, *nfnp, *pfnp;
1905 	action_list *alp;
1906 	int error, ilocked_it = 0;
1907 	fninfo_t *fnip;
1908 	time_t ref_time;
1909 	int autofs_busy_root, unmount_as_unit, unmount_done = 0;
1910 	timestruc_t now;
1911 
1912 	callb_cpr_t cprinfo;
1913 	kmutex_t unmount_tree_cpr_lock;
1914 
1915 	mutex_init(&unmount_tree_cpr_lock, NULL, MUTEX_DEFAULT, NULL);
1916 	CALLB_CPR_INIT(&cprinfo, &unmount_tree_cpr_lock, callb_generic_cpr,
1917 		"unmount_tree");
1918 
1919 	/*
1920 	 * Got to release lock before attempting unmount in case
1921 	 * it hangs.
1922 	 */
1923 	rw_enter(&fngp->fng_rootfnnodep->fn_rwlock, RW_READER);
1924 	if ((fnp = fngp->fng_rootfnnodep->fn_dirents) == NULL) {
1925 		ASSERT(fngp->fng_fnnode_count == 1);
1926 		/*
1927 		 * no autofs mounted, done.
1928 		 */
1929 		rw_exit(&fngp->fng_rootfnnodep->fn_rwlock);
1930 		goto done;
1931 	}
1932 	VN_HOLD(fntovn(fnp));
1933 	rw_exit(&fngp->fng_rootfnnodep->fn_rwlock);
1934 
1935 	vp = fntovn(fnp);
1936 	fnip = vfstofni(vp->v_vfsp);
1937 	/*
1938 	 * autofssys() will be calling in from the global zone and doing
1939 	 * work on the behalf of the given zone, hence we can't always assert
1940 	 * that we have the right credentials, nor that the caller is always in
1941 	 * the correct zone.
1942 	 *
1943 	 * We do, however, know that if this is a "forced unmount" operation
1944 	 * (which autofssys() does), then we won't go down to the krpc layers,
1945 	 * so we don't need to fudge with the credentials.
1946 	 */
1947 	ASSERT(force || fnip->fi_zoneid == getzoneid());
1948 	if (!force && auto_null_request(fnip, kcred, FALSE) != 0) {
1949 		/*
1950 		 * automountd not running in this zone,
1951 		 * don't attempt unmounting this round.
1952 		 */
1953 		VN_RELE(vp);
1954 		goto done;
1955 	}
1956 	/* reference time for this unmount round */
1957 	ref_time = gethrestime_sec();
1958 	/*
1959 	 * If this an autofssys() call, we need to make sure we don't skip
1960 	 * nodes because we think we saw them recently.
1961 	 */
1962 	mutex_enter(&fnp->fn_lock);
1963 	if (force && fnp->fn_unmount_ref_time >= ref_time)
1964 		ref_time = fnp->fn_unmount_ref_time + 1;
1965 	mutex_exit(&fnp->fn_lock);
1966 
1967 	AUTOFS_DPRINT((4, "unmount_tree (ID=%ld)\n", ref_time));
1968 top:
1969 	AUTOFS_DPRINT((10, "unmount_tree: %s\n", fnp->fn_name));
1970 	ASSERT(fnp);
1971 	vp = fntovn(fnp);
1972 	if (vp->v_type == VLNK) {
1973 		/*
1974 		 * can't unmount symbolic links
1975 		 */
1976 		goto next;
1977 	}
1978 	fnip = vfstofni(vp->v_vfsp);
1979 	ASSERT(vp->v_count > 0);
1980 	error = 0;
1981 	autofs_busy_root = unmount_as_unit = 0;
1982 	alp = NULL;
1983 
1984 	ilocked_it = 0;
1985 	mutex_enter(&fnp->fn_lock);
1986 	if (fnp->fn_flags & (MF_INPROG | MF_LOOKUP)) {
1987 		/*
1988 		 * Either a mount, lookup or another unmount of this
1989 		 * subtree is in progress, don't attempt to unmount at
1990 		 * this time.
1991 		 */
1992 		mutex_exit(&fnp->fn_lock);
1993 		error = EBUSY;
1994 		goto next;
1995 	}
1996 	if (fnp->fn_unmount_ref_time >= ref_time) {
1997 		/*
1998 		 * Already been here, try next node.
1999 		 */
2000 		mutex_exit(&fnp->fn_lock);
2001 		error = EBUSY;
2002 		goto next;
2003 	}
2004 	fnp->fn_unmount_ref_time = ref_time;
2005 
2006 	/*
2007 	 * If forced operation ignore timeout values
2008 	 */
2009 	if (!force && fnp->fn_ref_time + fnip->fi_mount_to >
2010 	    gethrestime_sec()) {
2011 		/*
2012 		 * Node has been referenced recently, try the
2013 		 * unmount of its children if any.
2014 		 */
2015 		mutex_exit(&fnp->fn_lock);
2016 		AUTOFS_DPRINT((10, "fn_ref_time within range\n"));
2017 		rw_enter(&fnp->fn_rwlock, RW_READER);
2018 		if (fnp->fn_dirents) {
2019 			/*
2020 			 * Has subdirectory, attempt their
2021 			 * unmount first
2022 			 */
2023 			nfnp = fnp->fn_dirents;
2024 			VN_HOLD(fntovn(nfnp));
2025 			rw_exit(&fnp->fn_rwlock);
2026 
2027 			VN_RELE(vp);
2028 			fnp = nfnp;
2029 			goto top;
2030 		}
2031 		rw_exit(&fnp->fn_rwlock);
2032 		/*
2033 		 * No children, try next node.
2034 		 */
2035 		error = EBUSY;
2036 		goto next;
2037 	}
2038 
2039 	AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG);
2040 	fnp->fn_error = 0;
2041 	mutex_exit(&fnp->fn_lock);
2042 	ilocked_it = 1;
2043 
2044 	rw_enter(&fnp->fn_rwlock, RW_WRITER);
2045 	if (fnp->fn_trigger != NULL) {
2046 		unmount_as_unit = 1;
2047 		if ((vn_mountedvfs(vp) == NULL) && (check_auto_node(vp))) {
2048 			/*
2049 			 * AUTOFS mountpoint is busy, there's
2050 			 * no point trying to unmount. Fall through
2051 			 * to attempt to unmount subtrees rooted
2052 			 * at a possible trigger node, but remember
2053 			 * not to unmount this tree.
2054 			 */
2055 			autofs_busy_root = 1;
2056 		}
2057 
2058 		if (triggers_busy(fnp, &nfnp)) {
2059 			rw_exit(&fnp->fn_rwlock);
2060 			if (nfnp == NULL) {
2061 				error = EBUSY;
2062 				goto next;
2063 			}
2064 			/*
2065 			 * nfnp is busy, try to unmount it first
2066 			 */
2067 			mutex_enter(&fnp->fn_lock);
2068 			AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2069 			mutex_exit(&fnp->fn_lock);
2070 			VN_RELE(vp);
2071 			ASSERT(fntovn(nfnp)->v_count > 1);
2072 			fnp = nfnp;
2073 			goto top;
2074 		}
2075 
2076 		/*
2077 		 * At this point, we know all trigger nodes are locked,
2078 		 * and they're not busy or mounted on.
2079 		 */
2080 
2081 		if (autofs_busy_root) {
2082 			/*
2083 			 * Got to unlock the the trigger nodes since
2084 			 * I'm not really going to unmount the filesystem.
2085 			 */
2086 			(void) triggers_unlock(fnp);
2087 		} else {
2088 			/*
2089 			 * Attempt to unmount all the trigger nodes,
2090 			 * save the action_list in case we need to
2091 			 * remount them later. The action_list will be XDR
2092 			 * freed later if there was no need to remount the
2093 			 * trigger nodes.
2094 			 */
2095 			unmount_triggers(fnp, &alp);
2096 		}
2097 	}
2098 	rw_exit(&fnp->fn_rwlock);
2099 
2100 	if (autofs_busy_root)
2101 		goto next;
2102 
2103 	(void) vn_vfswlock_wait(vp);
2104 
2105 	vfsp = vn_mountedvfs(vp);
2106 	if (vfsp != NULL) {
2107 		/*
2108 		 * Node is mounted on.
2109 		 */
2110 		AUTOFS_DPRINT((10, "\tNode is mounted on\n"));
2111 
2112 		/*
2113 		 * Deal with /xfn/host/jurassic alikes here...
2114 		 */
2115 		if (vfs_matchops(vfsp, vfs_getops(vp->v_vfsp))) {
2116 			/*
2117 			 * If the filesystem mounted here is AUTOFS, and it
2118 			 * is busy, try to unmount the tree rooted on it
2119 			 * first. We know this call to VFS_ROOT is safe to
2120 			 * call while holding VVFSLOCK, since it resolves
2121 			 * to a call to auto_root().
2122 			 */
2123 			AUTOFS_DPRINT((10, "\t\tAUTOFS mounted here\n"));
2124 			if (VFS_ROOT(vfsp, &newvp)) {
2125 				cmn_err(CE_PANIC,
2126 				    "unmount_tree: VFS_ROOT(vfs=%p) failed",
2127 				    (void *)vfsp);
2128 			}
2129 			nfnp = vntofn(newvp);
2130 			if (DEEPER(nfnp)) {
2131 				vn_vfsunlock(vp);
2132 				mutex_enter(&fnp->fn_lock);
2133 				AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2134 				mutex_exit(&fnp->fn_lock);
2135 				VN_RELE(vp);
2136 				fnp = nfnp;
2137 				goto top;
2138 			}
2139 			/*
2140 			 * Fall through to unmount this filesystem
2141 			 */
2142 			VN_RELE(newvp);
2143 		}
2144 
2145 		/*
2146 		 * vn_vfsunlock(vp) is done inside unmount_node()
2147 		 */
2148 		error = unmount_node(vp, force);
2149 		if (error == ECONNRESET) {
2150 			AUTOFS_DPRINT((10, "\tConnection dropped\n"));
2151 			if (vn_mountedvfs(vp) == NULL) {
2152 				/*
2153 				 * The filesystem was unmounted before the
2154 				 * daemon died. Unfortunately we can not
2155 				 * determine whether all the cleanup work was
2156 				 * successfully finished (i.e. update mnttab,
2157 				 * or notify NFS server of the unmount).
2158 				 * We should not retry the operation since the
2159 				 * filesystem has already been unmounted, and
2160 				 * may have already been removed from mnttab,
2161 				 * in such case the devid/rdevid we send to
2162 				 * the daemon will not be matched. So we have
2163 				 * to be contempt with the partial unmount.
2164 				 * Since the mountpoint is no longer covered, we
2165 				 * clear the error condition.
2166 				 */
2167 				error = 0;
2168 				auto_log(fngp, CE_WARN,
2169 				    "unmount_tree: automountd connection "
2170 				    "dropped");
2171 				if (fnip->fi_flags & MF_DIRECT) {
2172 					auto_log(fngp, CE_WARN, "unmount_tree: "
2173 					    "%s successfully unmounted - "
2174 					    "do not remount triggers",
2175 					    fnip->fi_path);
2176 				} else {
2177 					auto_log(fngp, CE_WARN, "unmount_tree: "
2178 					    "%s/%s successfully unmounted - "
2179 					    "do not remount triggers",
2180 					    fnip->fi_path, fnp->fn_name);
2181 				}
2182 			}
2183 		}
2184 	} else {
2185 		vn_vfsunlock(vp);
2186 		AUTOFS_DPRINT((10, "\tNode is AUTOFS\n"));
2187 		if (unmount_as_unit) {
2188 			AUTOFS_DPRINT((10, "\tunmount as unit\n"));
2189 			error = unmount_autofs(vp);
2190 		} else {
2191 			AUTOFS_DPRINT((10, "\tunmount one at a time\n"));
2192 			rw_enter(&fnp->fn_rwlock, RW_READER);
2193 			if (fnp->fn_dirents != NULL) {
2194 				/*
2195 				 * Has subdirectory, attempt their
2196 				 * unmount first
2197 				 */
2198 				nfnp = fnp->fn_dirents;
2199 				VN_HOLD(fntovn(nfnp));
2200 				rw_exit(&fnp->fn_rwlock);
2201 
2202 				mutex_enter(&fnp->fn_lock);
2203 				AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2204 				mutex_exit(&fnp->fn_lock);
2205 				VN_RELE(vp);
2206 				fnp = nfnp;
2207 				goto top;
2208 			}
2209 			rw_exit(&fnp->fn_rwlock);
2210 			goto next;
2211 		}
2212 	}
2213 
2214 	if (error) {
2215 		AUTOFS_DPRINT((10, "\tUnmount failed\n"));
2216 		if (alp != NULL) {
2217 			/*
2218 			 * Unmount failed, got to remount triggers.
2219 			 */
2220 			ASSERT((fnp->fn_flags & MF_THISUID_MATCH_RQD) == 0);
2221 			error = auto_perform_actions(fnip, fnp, alp, CRED());
2222 			if (error) {
2223 				auto_log(fngp, CE_WARN, "autofs: can't remount "
2224 				    "triggers fnp=%p error=%d", (void *)fnp,
2225 				    error);
2226 				error = 0;
2227 				/*
2228 				 * The action list should have been
2229 				 * xdr_free'd by auto_perform_actions
2230 				 * since an error occured
2231 				 */
2232 				alp = NULL;
2233 			}
2234 		}
2235 	} else {
2236 		/*
2237 		 * The unmount succeeded, which will cause this node to
2238 		 * be removed from its parent if its an indirect mount,
2239 		 * therefore update the parent's atime and mtime now.
2240 		 * I don't update them in auto_disconnect() because I
2241 		 * don't want atime and mtime changing every time a
2242 		 * lookup goes to the daemon and creates a new node.
2243 		 */
2244 		unmount_done = 1;
2245 		if ((fnip->fi_flags & MF_DIRECT) == 0) {
2246 			gethrestime(&now);
2247 			if (fnp->fn_parent == fngp->fng_rootfnnodep)
2248 				fnp->fn_atime = fnp->fn_mtime = now;
2249 			else
2250 				fnp->fn_parent->fn_atime =
2251 					fnp->fn_parent->fn_mtime = now;
2252 		}
2253 
2254 		/*
2255 		 * Free the action list here
2256 		 */
2257 		if (alp != NULL) {
2258 			xdr_free(xdr_action_list, (char *)alp);
2259 			alp = NULL;
2260 		}
2261 	}
2262 
2263 	fnp->fn_ref_time = gethrestime_sec();
2264 
2265 next:
2266 	/*
2267 	 * Obtain parent's readers lock before grabbing
2268 	 * reference to next sibling.
2269 	 * XXX Note that nodes in the top level list (mounted
2270 	 * in user space not by the daemon in the kernel) parent is itself,
2271 	 * therefore grabbing the lock makes no sense, but doesn't
2272 	 * hurt either.
2273 	 */
2274 	pfnp = fnp->fn_parent;
2275 	ASSERT(pfnp != NULL);
2276 	rw_enter(&pfnp->fn_rwlock, RW_READER);
2277 	if ((nfnp = fnp->fn_next) != NULL)
2278 		VN_HOLD(fntovn(nfnp));
2279 	rw_exit(&pfnp->fn_rwlock);
2280 
2281 	if (ilocked_it) {
2282 		mutex_enter(&fnp->fn_lock);
2283 		if (unmount_done) {
2284 			/*
2285 			 * Other threads may be waiting for this unmount to
2286 			 * finish. We must let it know that in order to
2287 			 * proceed, it must trigger the mount itself.
2288 			 */
2289 			fnp->fn_flags &= ~MF_IK_MOUNT;
2290 			if (fnp->fn_flags & MF_WAITING)
2291 				fnp->fn_error = EAGAIN;
2292 			unmount_done = 0;
2293 		}
2294 		AUTOFS_UNBLOCK_OTHERS(fnp, MF_INPROG);
2295 		mutex_exit(&fnp->fn_lock);
2296 		ilocked_it = 0;
2297 	}
2298 
2299 	if (nfnp != NULL) {
2300 		VN_RELE(vp);
2301 		fnp = nfnp;
2302 		/*
2303 		 * Unmount next element
2304 		 */
2305 		goto top;
2306 	}
2307 
2308 	/*
2309 	 * We don't want to unmount rootfnnodep, so the check is made here
2310 	 */
2311 	ASSERT(pfnp != fnp);
2312 	if (pfnp != fngp->fng_rootfnnodep) {
2313 		/*
2314 		 * Now attempt to unmount my parent
2315 		 */
2316 		VN_HOLD(fntovn(pfnp));
2317 		VN_RELE(vp);
2318 		fnp = pfnp;
2319 
2320 		goto top;
2321 	}
2322 
2323 	VN_RELE(vp);
2324 
2325 	/*
2326 	 * At this point we've walked the entire tree and attempted to unmount
2327 	 * as much as we can one level at a time.
2328 	 */
2329 done:
2330 	mutex_enter(&unmount_tree_cpr_lock);
2331 	CALLB_CPR_EXIT(&cprinfo);
2332 	mutex_destroy(&unmount_tree_cpr_lock);
2333 }
2334 
2335 static void
2336 unmount_zone_tree(struct autofs_globals *fngp)
2337 {
2338 	unmount_tree(fngp, 0);
2339 	mutex_enter(&fngp->fng_unmount_threads_lock);
2340 	fngp->fng_unmount_threads--;
2341 	mutex_exit(&fngp->fng_unmount_threads_lock);
2342 
2343 	AUTOFS_DPRINT((5, "unmount_tree done. Thread exiting.\n"));
2344 
2345 	zthread_exit();
2346 	/* NOTREACHED */
2347 }
2348 
2349 static int autofs_unmount_thread_timer = 120;	/* in seconds */
2350 
2351 void
2352 auto_do_unmount(struct autofs_globals *fngp)
2353 {
2354 	callb_cpr_t cprinfo;
2355 	clock_t timeleft;
2356 	zone_t *zone = curproc->p_zone;
2357 
2358 	CALLB_CPR_INIT(&cprinfo, &fngp->fng_unmount_threads_lock,
2359 		callb_generic_cpr, "auto_do_unmount");
2360 
2361 	for (;;) {	/* forever */
2362 		mutex_enter(&fngp->fng_unmount_threads_lock);
2363 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
2364 newthread:
2365 		mutex_exit(&fngp->fng_unmount_threads_lock);
2366 		timeleft = zone_status_timedwait(zone, lbolt +
2367 		    autofs_unmount_thread_timer * hz, ZONE_IS_SHUTTING_DOWN);
2368 		mutex_enter(&fngp->fng_unmount_threads_lock);
2369 
2370 		if (timeleft != -1) {	/* didn't time out */
2371 			ASSERT(zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN);
2372 			/*
2373 			 * zone is exiting... don't create any new threads.
2374 			 * fng_unmount_threads_lock is released implicitly by
2375 			 * the below.
2376 			 */
2377 			CALLB_CPR_SAFE_END(&cprinfo,
2378 				&fngp->fng_unmount_threads_lock);
2379 			CALLB_CPR_EXIT(&cprinfo);
2380 			zthread_exit();
2381 			/* NOTREACHED */
2382 		}
2383 		if (fngp->fng_unmount_threads < autofs_unmount_threads) {
2384 			fngp->fng_unmount_threads++;
2385 			CALLB_CPR_SAFE_END(&cprinfo,
2386 				&fngp->fng_unmount_threads_lock);
2387 			mutex_exit(&fngp->fng_unmount_threads_lock);
2388 
2389 			(void) zthread_create(NULL, 0, unmount_zone_tree, fngp,
2390 			    0, minclsyspri);
2391 		} else
2392 			goto newthread;
2393 	}
2394 	/* NOTREACHED */
2395 }
2396 
2397 /*
2398  * Is nobrowse specified in option string?
2399  * opts should be a null ('\0') terminated string.
2400  * Returns non-zero if nobrowse has been specified.
2401  */
2402 int
2403 auto_nobrowse_option(char *opts)
2404 {
2405 	char *buf;
2406 	char *p;
2407 	char *t;
2408 	int nobrowse = 0;
2409 	int last_opt = 0;
2410 	size_t len;
2411 
2412 	len = strlen(opts) + 1;
2413 	p = buf = kmem_alloc(len, KM_SLEEP);
2414 	(void) strcpy(buf, opts);
2415 	do {
2416 		if (t = strchr(p, ','))
2417 			*t++ = '\0';
2418 		else
2419 			last_opt++;
2420 		if (strcmp(p, MNTOPT_NOBROWSE) == 0)
2421 			nobrowse = 1;
2422 		else if (strcmp(p, MNTOPT_BROWSE) == 0)
2423 			nobrowse = 0;
2424 		p = t;
2425 	} while (!last_opt);
2426 	kmem_free(buf, len);
2427 
2428 	return (nobrowse);
2429 }
2430 
2431 /*
2432  * used to log warnings only if automountd is running
2433  * with verbose mode set
2434  */
2435 void
2436 auto_log(struct autofs_globals *fngp, int level, const char *fmt, ...)
2437 {
2438 	va_list args;
2439 
2440 	if (fngp->fng_verbose > 0) {
2441 		va_start(args, fmt);
2442 		vzcmn_err(fngp->fng_zoneid, level, fmt, args);
2443 		va_end(args);
2444 	}
2445 }
2446 
2447 #ifdef DEBUG
2448 static int autofs_debug = 0;
2449 
2450 /*
2451  * Utilities used by both client and server
2452  * Standard levels:
2453  * 0) no debugging
2454  * 1) hard failures
2455  * 2) soft failures
2456  * 3) current test software
2457  * 4) main procedure entry points
2458  * 5) main procedure exit points
2459  * 6) utility procedure entry points
2460  * 7) utility procedure exit points
2461  * 8) obscure procedure entry points
2462  * 9) obscure procedure exit points
2463  * 10) random stuff
2464  * 11) all <= 1
2465  * 12) all <= 2
2466  * 13) all <= 3
2467  * ...
2468  */
2469 /* PRINTFLIKE2 */
2470 void
2471 auto_dprint(int level, const char *fmt, ...)
2472 {
2473 	va_list args;
2474 
2475 	if (autofs_debug == level ||
2476 	    (autofs_debug > 10 && (autofs_debug - 10) >= level)) {
2477 		va_start(args, fmt);
2478 		(void) vprintf(fmt, args);
2479 		va_end(args);
2480 	}
2481 }
2482 #endif /* DEBUG */
2483