/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2015 by Delphix. All rights reserved.
 * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
 * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
 */

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/cred.h>
#include <sys/cmn_err.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/pathname.h>
#include <sys/utsname.h>
#include <sys/debug.h>
#include <sys/door.h>
#include <sys/sdt.h>
#include <sys/thread.h>
#include <sys/avl.h>

#include <rpc/types.h>
#include <rpc/auth.h>
#include <rpc/clnt.h>

#include <nfs/nfs.h>
#include <nfs/export.h>
#include <nfs/nfs_clnt.h>
#include <nfs/auth.h>

static struct kmem_cache *exi_cache_handle;
static void exi_cache_reclaim(void *);
static void exi_cache_reclaim_zone(nfs_globals_t *);
static void exi_cache_trim(struct exportinfo *exi);

extern pri_t minclsyspri;

/* NFS auth cache statistics */
volatile uint_t nfsauth_cache_hit;
volatile uint_t nfsauth_cache_miss;
volatile uint_t nfsauth_cache_refresh;
volatile uint_t nfsauth_cache_reclaim;
volatile uint_t exi_cache_auth_reclaim_failed;
volatile uint_t exi_cache_clnt_reclaim_failed;

/*
 * The lifetime of an auth cache entry:
 * ------------------------------------
 *
 * An auth cache entry is created with both the auth_time
 * and auth_freshness times set to the current time.
 *
 * Upon every client access which results in a hit, the
 * auth_time will be updated.
 *
 * If a client access determines that the auth_freshness
 * indicates that the entry is STALE, then it will be
 * refreshed. Note that this will explicitly reset
 * auth_time.
 *
 * When the REFRESH successfully occurs, then the
 * auth_freshness is updated.
 *
 * There are two ways for an entry to leave the cache:
 *
 * 1) Purged by an action on the export (remove or changed)
 * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
 *
 * For 2) we check the timeout value against auth_time.
 */

/*
 * Number of seconds until we mark for refresh an auth cache entry.
 */
#define	NFSAUTH_CACHE_REFRESH 600

/*
 * Number of idle seconds until we yield to backpressure
 * to trim a cache entry.
 */
#define	NFSAUTH_CACHE_TRIM 3600

/*
 * While we could encapuslate the exi_list inside the
 * exi structure, we can't do that for the auth_list.
 * So, to keep things looking clean, we keep them both
 * in these external lists.
 */
typedef struct refreshq_exi_node {
	struct exportinfo	*ren_exi;
	list_t			ren_authlist;
	list_node_t		ren_node;
} refreshq_exi_node_t;

typedef struct refreshq_auth_node {
	struct auth_cache	*ran_auth;
	char			*ran_netid;
	list_node_t		ran_node;
} refreshq_auth_node_t;

/*
 * Used to manipulate things on the refreshq_queue.  Note that the refresh
 * thread will effectively pop a node off of the queue, at which point it
 * will no longer need to hold the mutex.
 */
static kmutex_t refreshq_lock;
static list_t refreshq_queue;
static kcondvar_t refreshq_cv;

/*
 * If there is ever a problem with loading the module, then nfsauth_fini()
 * needs to be called to remove state.  In that event, since the refreshq
 * thread has been started, they need to work together to get rid of state.
 */
typedef enum nfsauth_refreshq_thread_state {
	REFRESHQ_THREAD_RUNNING,
	REFRESHQ_THREAD_FINI_REQ,
	REFRESHQ_THREAD_HALTED,
	REFRESHQ_THREAD_NEED_CREATE
} nfsauth_refreshq_thread_state_t;

typedef struct nfsauth_globals {
	kmutex_t	mountd_lock;
	door_handle_t   mountd_dh;

	/*
	 * Used to manipulate things on the refreshq_queue.  Note that the
	 * refresh thread will effectively pop a node off of the queue,
	 * at which point it will no longer need to hold the mutex.
	 */
	kmutex_t	refreshq_lock;
	list_t		refreshq_queue;
	kcondvar_t	refreshq_cv;

	/*
	 * A list_t would be overkill.  These are auth_cache entries which are
	 * no longer linked to an exi.  It should be the case that all of their
	 * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
	 * list is iff their state indicated that they had been placed on the
	 * refreshq_queue.
	 *
	 * Note that while there is no link from the exi or back to the exi,
	 * the exi can not go away until these entries are harvested.
	 */
	struct auth_cache		*refreshq_dead_entries;
	nfsauth_refreshq_thread_state_t	refreshq_thread_state;

} nfsauth_globals_t;

static void nfsauth_free_node(struct auth_cache *);
static void nfsauth_refresh_thread(nfsauth_globals_t *);

static int nfsauth_cache_compar(const void *, const void *);

static nfsauth_globals_t *
nfsauth_get_zg(void)
{
	nfs_globals_t *ng = nfs_srv_getzg();
	nfsauth_globals_t *nag = ng->nfs_auth;
	ASSERT(nag != NULL);
	return (nag);
}

void
mountd_args(uint_t did)
{
	nfsauth_globals_t *nag;

	nag = nfsauth_get_zg();
	mutex_enter(&nag->mountd_lock);
	if (nag->mountd_dh != NULL)
		door_ki_rele(nag->mountd_dh);
	nag->mountd_dh = door_ki_lookup(did);
	mutex_exit(&nag->mountd_lock);
}

void
nfsauth_init(void)
{
	exi_cache_handle = kmem_cache_create("exi_cache_handle",
	    sizeof (struct auth_cache), 0, NULL, NULL,
	    exi_cache_reclaim, NULL, NULL, 0);
}

void
nfsauth_fini(void)
{
	kmem_cache_destroy(exi_cache_handle);
}

void
nfsauth_zone_init(nfs_globals_t *ng)
{
	nfsauth_globals_t *nag;

	nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);

	/*
	 * mountd can be restarted by smf(5).  We need to make sure
	 * the updated door handle will safely make it to mountd_dh.
	 */
	mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
	list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
	    offsetof(refreshq_exi_node_t, ren_node));
	cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
	nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;

	ng->nfs_auth = nag;
}

void
nfsauth_zone_shutdown(nfs_globals_t *ng)
{
	refreshq_exi_node_t	*ren;
	nfsauth_globals_t	*nag = ng->nfs_auth;

	/* Prevent the nfsauth_refresh_thread from getting new work */
	mutex_enter(&nag->refreshq_lock);
	if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
		nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
		cv_broadcast(&nag->refreshq_cv);

		/* Wait for nfsauth_refresh_thread() to exit */
		while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
			cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
	}
	mutex_exit(&nag->refreshq_lock);

	/*
	 * Walk the exi_list and in turn, walk the auth_lists and free all
	 * lists.  In addition, free INVALID auth_cache entries.
	 */
	while ((ren = list_remove_head(&nag->refreshq_queue))) {
		refreshq_auth_node_t *ran;

		while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
			struct auth_cache *p = ran->ran_auth;
			if (p->auth_state == NFS_AUTH_INVALID)
				nfsauth_free_node(p);
			strfree(ran->ran_netid);
			kmem_free(ran, sizeof (*ran));
		}

		list_destroy(&ren->ren_authlist);
		exi_rele(ren->ren_exi);
		kmem_free(ren, sizeof (*ren));
	}
}

void
nfsauth_zone_fini(nfs_globals_t *ng)
{
	nfsauth_globals_t *nag = ng->nfs_auth;

	ng->nfs_auth = NULL;

	list_destroy(&nag->refreshq_queue);
	cv_destroy(&nag->refreshq_cv);
	mutex_destroy(&nag->refreshq_lock);
	mutex_destroy(&nag->mountd_lock);
	/* Extra cleanup. */
	if (nag->mountd_dh != NULL)
		door_ki_rele(nag->mountd_dh);
	kmem_free(nag, sizeof (*nag));
}

/*
 * Convert the address in a netbuf to
 * a hash index for the auth_cache table.
 */
static int
hash(struct netbuf *a)
{
	int i, h = 0;

	for (i = 0; i < a->len; i++)
		h ^= a->buf[i];

	return (h & (AUTH_TABLESIZE - 1));
}

/*
 * Mask out the components of an
 * address that do not identify
 * a host. For socket addresses the
 * masking gets rid of the port number.
 */
static void
addrmask(struct netbuf *addr, struct netbuf *mask)
{
	int i;

	for (i = 0; i < addr->len; i++)
		addr->buf[i] &= mask->buf[i];
}

/*
 * nfsauth4_access is used for NFS V4 auth checking. Besides doing
 * the common nfsauth_access(), it will check if the client can
 * have a limited access to this vnode even if the security flavor
 * used does not meet the policy.
 */
int
nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
    cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
	int access;

	access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);

	/*
	 * There are cases that the server needs to allow the client
	 * to have a limited view.
	 *
	 * e.g.
	 * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
	 * /export/home is shared as "sec=sys,rw"
	 *
	 * When the client mounts /export with sec=sys, the client
	 * would get a limited view with RO access on /export to see
	 * "home" only because the client is allowed to access
	 * /export/home with auth_sys.
	 */
	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
		/*
		 * Allow ro permission with LIMITED view if there is a
		 * sub-dir exported under vp.
		 */
		if (has_visible(exi, vp))
			return (NFSAUTH_LIMITED);
	}

	return (access);
}

static void
sys_log(const char *msg)
{
	static time_t	tstamp = 0;
	time_t		now;

	/*
	 * msg is shown (at most) once per minute
	 */
	now = gethrestime_sec();
	if ((tstamp + 60) < now) {
		tstamp = now;
		cmn_err(CE_WARN, msg);
	}
}

/*
 * Callup to the mountd to get access information in the kernel.
 */
static bool_t
nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
    char *req_netid, int flavor, struct netbuf *addr, int *access,
    cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
    gid_t **srv_gids)
{
	varg_t			  varg = {0};
	nfsauth_res_t		  res = {0};
	XDR			  xdrs;
	size_t			  absz;
	caddr_t			  abuf;
	int			  last = 0;
	door_arg_t		  da;
	door_info_t		  di;
	door_handle_t		  dh;
	uint_t			  ntries = 0;

	/*
	 * No entry in the cache for this client/flavor
	 * so we need to call the nfsauth service in the
	 * mount daemon.
	 */

	varg.vers = V_PROTO;
	varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
	varg.arg_u.arg.areq.req_client.n_len = addr->len;
	varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
	varg.arg_u.arg.areq.req_netid = req_netid;
	varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
	varg.arg_u.arg.areq.req_flavor = flavor;
	varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
	varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
	varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
	varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);

	DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);

	/*
	 * Setup the XDR stream for encoding the arguments. Notice that
	 * in addition to the args having variable fields (req_netid and
	 * req_path), the argument data structure is itself versioned,
	 * so we need to make sure we can size the arguments buffer
	 * appropriately to encode all the args. If we can't get sizing
	 * info _or_ properly encode the arguments, there's really no
	 * point in continuting, so we fail the request.
	 */
	if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
		*access = NFSAUTH_DENIED;
		return (FALSE);
	}

	abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
	xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
	if (!xdr_varg(&xdrs, &varg)) {
		XDR_DESTROY(&xdrs);
		goto fail;
	}
	XDR_DESTROY(&xdrs);

	/*
	 * Prepare the door arguments
	 *
	 * We don't know the size of the message the daemon
	 * will pass back to us.  By setting rbuf to NULL,
	 * we force the door code to allocate a buf of the
	 * appropriate size.  We must set rsize > 0, however,
	 * else the door code acts as if no response was
	 * expected and doesn't pass the data to us.
	 */
	da.data_ptr = (char *)abuf;
	da.data_size = absz;
	da.desc_ptr = NULL;
	da.desc_num = 0;
	da.rbuf = NULL;
	da.rsize = 1;

retry:
	mutex_enter(&nag->mountd_lock);
	dh = nag->mountd_dh;
	if (dh != NULL)
		door_ki_hold(dh);
	mutex_exit(&nag->mountd_lock);

	if (dh == NULL) {
		/*
		 * The rendezvous point has not been established yet!
		 * This could mean that either mountd(1m) has not yet
		 * been started or that _this_ routine nuked the door
		 * handle after receiving an EINTR for a REVOKED door.
		 *
		 * Returning NFSAUTH_DROP will cause the NFS client
		 * to retransmit the request, so let's try to be more
		 * rescillient and attempt for ntries before we bail.
		 */
		if (++ntries % NFSAUTH_DR_TRYCNT) {
			delay(hz);
			goto retry;
		}

		kmem_free(abuf, absz);

		sys_log("nfsauth: mountd has not established door");
		*access = NFSAUTH_DROP;
		return (FALSE);
	}

	ntries = 0;

	/*
	 * Now that we've got what we need, place the call.
	 */
	switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
	case 0:				/* Success */
		door_ki_rele(dh);

		if (da.data_ptr == NULL && da.data_size == 0) {
			/*
			 * The door_return that contained the data
			 * failed! We're here because of the 2nd
			 * door_return (w/o data) such that we can
			 * get control of the thread (and exit
			 * gracefully).
			 */
			DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
			    door_arg_t *, &da);
			goto fail;
		}

		break;

	case EAGAIN:
		/*
		 * Server out of resources; back off for a bit
		 */
		door_ki_rele(dh);
		delay(hz);
		goto retry;
		/* NOTREACHED */

	case EINTR:
		if (!door_ki_info(dh, &di)) {
			door_ki_rele(dh);

			if (di.di_attributes & DOOR_REVOKED) {
				/*
				 * The server barfed and revoked
				 * the (existing) door on us; we
				 * want to wait to give smf(5) a
				 * chance to restart mountd(1m)
				 * and establish a new door handle.
				 */
				mutex_enter(&nag->mountd_lock);
				if (dh == nag->mountd_dh) {
					door_ki_rele(nag->mountd_dh);
					nag->mountd_dh = NULL;
				}
				mutex_exit(&nag->mountd_lock);
				delay(hz);
				goto retry;
			}
			/*
			 * If the door was _not_ revoked on us,
			 * then more than likely we took an INTR,
			 * so we need to fail the operation.
			 */
			goto fail;
		}
		/*
		 * The only failure that can occur from getting
		 * the door info is EINVAL, so we let the code
		 * below handle it.
		 */
		/* FALLTHROUGH */

	case EBADF:
	case EINVAL:
	default:
		/*
		 * If we have a stale door handle, give smf a last
		 * chance to start it by sleeping for a little bit.
		 * If we're still hosed, we'll fail the call.
		 *
		 * Since we're going to reacquire the door handle
		 * upon the retry, we opt to sleep for a bit and
		 * _not_ to clear mountd_dh. If mountd restarted
		 * and was able to set mountd_dh, we should see
		 * the new instance; if not, we won't get caught
		 * up in the retry/DELAY loop.
		 */
		door_ki_rele(dh);
		if (!last) {
			delay(hz);
			last++;
			goto retry;
		}
		sys_log("nfsauth: stale mountd door handle");
		goto fail;
	}

	ASSERT(da.rbuf != NULL);

	/*
	 * No door errors encountered; setup the XDR stream for decoding
	 * the results. If we fail to decode the results, we've got no
	 * other recourse than to fail the request.
	 */
	xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
	if (!xdr_nfsauth_res(&xdrs, &res)) {
		xdr_free(xdr_nfsauth_res, (char *)&res);
		XDR_DESTROY(&xdrs);
		kmem_free(da.rbuf, da.rsize);
		goto fail;
	}
	XDR_DESTROY(&xdrs);
	kmem_free(da.rbuf, da.rsize);

	DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
	switch (res.stat) {
		case NFSAUTH_DR_OKAY:
			*access = res.ares.auth_perm;
			*srv_uid = res.ares.auth_srv_uid;
			*srv_gid = res.ares.auth_srv_gid;

			if ((*srv_gids_cnt = res.ares.auth_srv_gids.len) != 0) {
				*srv_gids = kmem_alloc(*srv_gids_cnt *
				    sizeof (gid_t), KM_SLEEP);
				bcopy(res.ares.auth_srv_gids.val, *srv_gids,
				    *srv_gids_cnt * sizeof (gid_t));
			} else {
				*srv_gids = NULL;
			}

			break;

		case NFSAUTH_DR_EFAIL:
		case NFSAUTH_DR_DECERR:
		case NFSAUTH_DR_BADCMD:
		default:
			xdr_free(xdr_nfsauth_res, (char *)&res);
fail:
			*access = NFSAUTH_DENIED;
			kmem_free(abuf, absz);
			return (FALSE);
			/* NOTREACHED */
	}

	xdr_free(xdr_nfsauth_res, (char *)&res);
	kmem_free(abuf, absz);

	return (TRUE);
}

static void
nfsauth_refresh_thread(nfsauth_globals_t *nag)
{
	refreshq_exi_node_t	*ren;
	refreshq_auth_node_t	*ran;

	struct exportinfo	*exi;

	int			access;
	bool_t			retrieval;

	callb_cpr_t		cprinfo;

	CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
	    "nfsauth_refresh");

	for (;;) {
		mutex_enter(&nag->refreshq_lock);
		if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
			/* Keep the hold on the lock! */
			break;
		}

		ren = list_remove_head(&nag->refreshq_queue);
		if (ren == NULL) {
			CALLB_CPR_SAFE_BEGIN(&cprinfo);
			cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
			CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
			mutex_exit(&nag->refreshq_lock);
			continue;
		}
		mutex_exit(&nag->refreshq_lock);

		exi = ren->ren_exi;
		ASSERT(exi != NULL);

		/*
		 * Since the ren was removed from the refreshq_queue above,
		 * this is the only thread aware about the ren existence, so we
		 * have the exclusive ownership of it and we do not need to
		 * protect it by any lock.
		 */
		while ((ran = list_remove_head(&ren->ren_authlist))) {
			uid_t uid;
			gid_t gid;
			uint_t ngids;
			gid_t *gids;
			struct auth_cache *p = ran->ran_auth;
			char *netid = ran->ran_netid;

			ASSERT(p != NULL);
			ASSERT(netid != NULL);

			kmem_free(ran, sizeof (refreshq_auth_node_t));

			mutex_enter(&p->auth_lock);

			/*
			 * Once the entry goes INVALID, it can not change
			 * state.
			 *
			 * No need to refresh entries also in a case we are
			 * just shutting down.
			 *
			 * In general, there is no need to hold the
			 * refreshq_lock to test the refreshq_thread_state.  We
			 * do hold it at other places because there is some
			 * related thread synchronization (or some other tasks)
			 * close to the refreshq_thread_state check.
			 *
			 * The check for the refreshq_thread_state value here
			 * is purely advisory to allow the faster
			 * nfsauth_refresh_thread() shutdown.  In a case we
			 * will miss such advisory, nothing catastrophic
			 * happens: we will just spin longer here before the
			 * shutdown.
			 */
			if (p->auth_state == NFS_AUTH_INVALID ||
			    nag->refreshq_thread_state !=
			    REFRESHQ_THREAD_RUNNING) {
				mutex_exit(&p->auth_lock);

				if (p->auth_state == NFS_AUTH_INVALID)
					nfsauth_free_node(p);

				strfree(netid);

				continue;
			}

			/*
			 * Make sure the state is valid.  Note that once we
			 * change the state to NFS_AUTH_REFRESHING, no other
			 * thread will be able to work on this entry.
			 */
			ASSERT(p->auth_state == NFS_AUTH_STALE);

			p->auth_state = NFS_AUTH_REFRESHING;
			mutex_exit(&p->auth_lock);

			DTRACE_PROBE2(nfsauth__debug__cache__refresh,
			    struct exportinfo *, exi,
			    struct auth_cache *, p);

			/*
			 * The first caching of the access rights
			 * is done with the netid pulled out of the
			 * request from the client. All subsequent
			 * users of the cache may or may not have
			 * the same netid. It doesn't matter. So
			 * when we refresh, we simply use the netid
			 * of the request which triggered the
			 * refresh attempt.
			 */
			retrieval = nfsauth_retrieve(nag, exi, netid,
			    p->auth_flavor, &p->auth_clnt->authc_addr, &access,
			    p->auth_clnt_cred, &uid, &gid, &ngids, &gids);

			/*
			 * This can only be set in one other place
			 * and the state has to be NFS_AUTH_FRESH.
			 */
			strfree(netid);

			mutex_enter(&p->auth_lock);
			if (p->auth_state == NFS_AUTH_INVALID) {
				mutex_exit(&p->auth_lock);
				nfsauth_free_node(p);
				if (retrieval == TRUE)
					kmem_free(gids, ngids * sizeof (gid_t));
			} else {
				/*
				 * If we got an error, do not reset the
				 * time. This will cause the next access
				 * check for the client to reschedule this
				 * node.
				 */
				if (retrieval == TRUE) {
					p->auth_access = access;

					p->auth_srv_uid = uid;
					p->auth_srv_gid = gid;
					kmem_free(p->auth_srv_gids,
					    p->auth_srv_ngids * sizeof (gid_t));
					p->auth_srv_ngids = ngids;
					p->auth_srv_gids = gids;

					p->auth_freshness = gethrestime_sec();
				}
				p->auth_state = NFS_AUTH_FRESH;

				cv_broadcast(&p->auth_cv);
				mutex_exit(&p->auth_lock);
			}
		}

		list_destroy(&ren->ren_authlist);
		exi_rele(ren->ren_exi);
		kmem_free(ren, sizeof (refreshq_exi_node_t));
	}

	nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
	cv_broadcast(&nag->refreshq_cv);
	CALLB_CPR_EXIT(&cprinfo);
	DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
	zthread_exit();
}

int
nfsauth_cache_clnt_compar(const void *v1, const void *v2)
{
	int c;

	const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
	const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;

	if (a1->authc_addr.len < a2->authc_addr.len)
		return (-1);
	if (a1->authc_addr.len > a2->authc_addr.len)
		return (1);

	c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
	if (c < 0)
		return (-1);
	if (c > 0)
		return (1);

	return (0);
}

static int
nfsauth_cache_compar(const void *v1, const void *v2)
{
	int c;

	const struct auth_cache *a1 = (const struct auth_cache *)v1;
	const struct auth_cache *a2 = (const struct auth_cache *)v2;

	if (a1->auth_flavor < a2->auth_flavor)
		return (-1);
	if (a1->auth_flavor > a2->auth_flavor)
		return (1);

	if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
		return (-1);
	if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
		return (1);

	if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
		return (-1);
	if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
		return (1);

	if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
		return (-1);
	if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
		return (1);

	c = memcmp(crgetgroups(a1->auth_clnt_cred),
	    crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
	if (c < 0)
		return (-1);
	if (c > 0)
		return (1);

	return (0);
}

/*
 * Get the access information from the cache or callup to the mountd
 * to get and cache the access information in the kernel.
 */
static int
nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
    cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
	nfsauth_globals_t	*nag;
	struct netbuf		*taddrmask;
	struct netbuf		addr;	/* temporary copy of client's address */
	const struct netbuf	*claddr;
	avl_tree_t		*tree;
	struct auth_cache	ac;	/* used as a template for avl_find() */
	struct auth_cache_clnt	*c;
	struct auth_cache_clnt	acc;	/* used as a template for avl_find() */
	struct auth_cache	*p = NULL;
	int			access;

	uid_t			tmpuid;
	gid_t			tmpgid;
	uint_t			tmpngids;
	gid_t			*tmpgids;

	avl_index_t		where;	/* used for avl_find()/avl_insert() */

	ASSERT(cr != NULL);

	ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
	nag = nfsauth_get_zg();

	/*
	 * Now check whether this client already
	 * has an entry for this flavor in the cache
	 * for this export.
	 * Get the caller's address, mask off the
	 * parts of the address that do not identify
	 * the host (port number, etc), and then hash
	 * it to find the chain of cache entries.
	 */

	claddr = svc_getrpccaller(req->rq_xprt);
	addr = *claddr;
	if (claddr->len != 0) {
		addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
		bcopy(claddr->buf, addr.buf, claddr->len);
	} else {
		addr.buf = NULL;
	}

	SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
	ASSERT(taddrmask != NULL);
	addrmask(&addr, taddrmask);

	ac.auth_flavor = flavor;
	ac.auth_clnt_cred = crdup(cr);

	acc.authc_addr = addr;

	tree = exi->exi_cache[hash(&addr)];

	rw_enter(&exi->exi_cache_lock, RW_READER);
	c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);

	if (c == NULL) {
		struct auth_cache_clnt *nc;

		rw_exit(&exi->exi_cache_lock);

		nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
		if (nc == NULL)
			goto retrieve;

		/*
		 * Initialize the new auth_cache_clnt
		 */
		nc->authc_addr = addr;
		nc->authc_addr.buf = kmem_alloc(addr.maxlen,
		    KM_NOSLEEP | KM_NORMALPRI);
		if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
			kmem_free(nc, sizeof (*nc));
			goto retrieve;
		}
		bcopy(addr.buf, nc->authc_addr.buf, addr.len);
		rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
		avl_create(&nc->authc_tree, nfsauth_cache_compar,
		    sizeof (struct auth_cache),
		    offsetof(struct auth_cache, auth_link));

		rw_enter(&exi->exi_cache_lock, RW_WRITER);
		c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
		if (c == NULL) {
			avl_insert(tree, nc, where);
			rw_downgrade(&exi->exi_cache_lock);
			c = nc;
		} else {
			rw_downgrade(&exi->exi_cache_lock);

			avl_destroy(&nc->authc_tree);
			rw_destroy(&nc->authc_lock);
			kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
			kmem_free(nc, sizeof (*nc));
		}
	}

	ASSERT(c != NULL);

	rw_enter(&c->authc_lock, RW_READER);
	p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);

	if (p == NULL) {
		struct auth_cache *np;

		rw_exit(&c->authc_lock);

		np = kmem_cache_alloc(exi_cache_handle,
		    KM_NOSLEEP | KM_NORMALPRI);
		if (np == NULL) {
			rw_exit(&exi->exi_cache_lock);
			goto retrieve;
		}

		/*
		 * Initialize the new auth_cache
		 */
		np->auth_clnt = c;
		np->auth_flavor = flavor;
		np->auth_clnt_cred = ac.auth_clnt_cred;
		np->auth_srv_ngids = 0;
		np->auth_srv_gids = NULL;
		np->auth_time = np->auth_freshness = gethrestime_sec();
		np->auth_state = NFS_AUTH_NEW;
		mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
		cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);

		rw_enter(&c->authc_lock, RW_WRITER);
		rw_exit(&exi->exi_cache_lock);

		p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
		if (p == NULL) {
			avl_insert(&c->authc_tree, np, where);
			rw_downgrade(&c->authc_lock);
			p = np;
		} else {
			rw_downgrade(&c->authc_lock);

			cv_destroy(&np->auth_cv);
			mutex_destroy(&np->auth_lock);
			crfree(ac.auth_clnt_cred);
			kmem_cache_free(exi_cache_handle, np);
		}
	} else {
		rw_exit(&exi->exi_cache_lock);
		crfree(ac.auth_clnt_cred);
	}

	mutex_enter(&p->auth_lock);
	rw_exit(&c->authc_lock);

	/*
	 * If the entry is in the WAITING state then some other thread is just
	 * retrieving the required info.  The entry was either NEW, or the list
	 * of client's supplemental groups is going to be changed (either by
	 * this thread, or by some other thread).  We need to wait until the
	 * nfsauth_retrieve() is done.
	 */
	while (p->auth_state == NFS_AUTH_WAITING)
		cv_wait(&p->auth_cv, &p->auth_lock);

	/*
	 * Here the entry cannot be in WAITING or INVALID state.
	 */
	ASSERT(p->auth_state != NFS_AUTH_WAITING);
	ASSERT(p->auth_state != NFS_AUTH_INVALID);

	/*
	 * If the cache entry is not valid yet, we need to retrieve the
	 * info ourselves.
	 */
	if (p->auth_state == NFS_AUTH_NEW) {
		bool_t res;
		/*
		 * NFS_AUTH_NEW is the default output auth_state value in a
		 * case we failed somewhere below.
		 */
		auth_state_t state = NFS_AUTH_NEW;

		p->auth_state = NFS_AUTH_WAITING;
		mutex_exit(&p->auth_lock);
		kmem_free(addr.buf, addr.maxlen);
		addr = p->auth_clnt->authc_addr;

		atomic_inc_uint(&nfsauth_cache_miss);

		res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
		    flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
		    &tmpgids);

		p->auth_access = access;
		p->auth_time = p->auth_freshness = gethrestime_sec();

		if (res == TRUE) {
			if (uid != NULL)
				*uid = tmpuid;
			if (gid != NULL)
				*gid = tmpgid;
			if (ngids != NULL && gids != NULL) {
				*ngids = tmpngids;
				*gids = tmpgids;

				/*
				 * We need a copy of gids for the
				 * auth_cache entry
				 */
				tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
				    KM_NOSLEEP | KM_NORMALPRI);
				if (tmpgids != NULL)
					bcopy(*gids, tmpgids,
					    tmpngids * sizeof (gid_t));
			}

			if (tmpgids != NULL || tmpngids == 0) {
				p->auth_srv_uid = tmpuid;
				p->auth_srv_gid = tmpgid;
				p->auth_srv_ngids = tmpngids;
				p->auth_srv_gids = tmpgids;

				state = NFS_AUTH_FRESH;
			}
		}

		/*
		 * Set the auth_state and notify waiters.
		 */
		mutex_enter(&p->auth_lock);
		p->auth_state = state;
		cv_broadcast(&p->auth_cv);
		mutex_exit(&p->auth_lock);
	} else {
		uint_t nach;
		time_t refresh;

		refresh = gethrestime_sec() - p->auth_freshness;

		p->auth_time = gethrestime_sec();

		if (uid != NULL)
			*uid = p->auth_srv_uid;
		if (gid != NULL)
			*gid = p->auth_srv_gid;
		if (ngids != NULL && gids != NULL) {
			if ((*ngids = p->auth_srv_ngids) != 0) {
				size_t sz = *ngids * sizeof (gid_t);
				*gids = kmem_alloc(sz, KM_SLEEP);
				bcopy(p->auth_srv_gids, *gids, sz);
			} else {
				*gids = NULL;
			}
		}

		access = p->auth_access;

		if ((refresh > NFSAUTH_CACHE_REFRESH) &&
		    p->auth_state == NFS_AUTH_FRESH) {
			refreshq_auth_node_t *ran;
			uint_t nacr;

			p->auth_state = NFS_AUTH_STALE;
			mutex_exit(&p->auth_lock);

			nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
			DTRACE_PROBE3(nfsauth__debug__cache__stale,
			    struct exportinfo *, exi,
			    struct auth_cache *, p,
			    uint_t, nacr);

			ran = kmem_alloc(sizeof (refreshq_auth_node_t),
			    KM_SLEEP);
			ran->ran_auth = p;
			ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));

			mutex_enter(&nag->refreshq_lock);

			if (nag->refreshq_thread_state ==
			    REFRESHQ_THREAD_NEED_CREATE) {
				/* Launch nfsauth refresh thread */
				nag->refreshq_thread_state =
				    REFRESHQ_THREAD_RUNNING;
				(void) zthread_create(NULL, 0,
				    nfsauth_refresh_thread, nag, 0,
				    minclsyspri);
			}

			/*
			 * We should not add a work queue item if the thread
			 * is not accepting them.
			 */
			if (nag->refreshq_thread_state ==
			    REFRESHQ_THREAD_RUNNING) {
				refreshq_exi_node_t *ren;

				/*
				 * Is there an existing exi_list?
				 */
				for (ren = list_head(&nag->refreshq_queue);
				    ren != NULL;
				    ren = list_next(&nag->refreshq_queue,
				    ren)) {
					if (ren->ren_exi == exi) {
						list_insert_tail(
						    &ren->ren_authlist, ran);
						break;
					}
				}

				if (ren == NULL) {
					ren = kmem_alloc(
					    sizeof (refreshq_exi_node_t),
					    KM_SLEEP);

					exi_hold(exi);
					ren->ren_exi = exi;

					list_create(&ren->ren_authlist,
					    sizeof (refreshq_auth_node_t),
					    offsetof(refreshq_auth_node_t,
					    ran_node));

					list_insert_tail(&ren->ren_authlist,
					    ran);
					list_insert_tail(&nag->refreshq_queue,
					    ren);
				}

				cv_broadcast(&nag->refreshq_cv);
			} else {
				strfree(ran->ran_netid);
				kmem_free(ran, sizeof (refreshq_auth_node_t));
			}

			mutex_exit(&nag->refreshq_lock);
		} else {
			mutex_exit(&p->auth_lock);
		}

		nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
		DTRACE_PROBE2(nfsauth__debug__cache__hit,
		    uint_t, nach,
		    time_t, refresh);

		kmem_free(addr.buf, addr.maxlen);
	}

	return (access);

retrieve:
	crfree(ac.auth_clnt_cred);

	/*
	 * Retrieve the required data without caching.
	 */

	ASSERT(p == NULL);

	atomic_inc_uint(&nfsauth_cache_miss);

	if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
	    &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
		if (uid != NULL)
			*uid = tmpuid;
		if (gid != NULL)
			*gid = tmpgid;
		if (ngids != NULL && gids != NULL) {
			*ngids = tmpngids;
			*gids = tmpgids;
		} else {
			kmem_free(tmpgids, tmpngids * sizeof (gid_t));
		}
	}

	kmem_free(addr.buf, addr.maxlen);

	return (access);
}

/*
 * Check if the requesting client has access to the filesystem with
 * a given nfs flavor number which is an explicitly shared flavor.
 */
int
nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
    int flavor, int perm, cred_t *cr)
{
	int access;

	if (! (perm & M_4SEC_EXPORTED)) {
		return (NFSAUTH_DENIED);
	}

	/*
	 * Optimize if there are no lists
	 */
	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
		perm &= ~M_4SEC_EXPORTED;
		if (perm == M_RO)
			return (NFSAUTH_RO);
		if (perm == M_RW)
			return (NFSAUTH_RW);
	}

	access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
	    NULL);

	return (access);
}

int
nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
    uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
	int access, mapaccess;
	struct secinfo *sp;
	int i, flavor, perm;
	int authnone_entry = -1;

	/*
	 * By default root is mapped to anonymous user.
	 * This might get overriden later in nfsauth_cache_get().
	 */
	if (crgetuid(cr) == 0) {
		if (uid != NULL)
			*uid = exi->exi_export.ex_anon;
		if (gid != NULL)
			*gid = exi->exi_export.ex_anon;
	} else {
		if (uid != NULL)
			*uid = crgetuid(cr);
		if (gid != NULL)
			*gid = crgetgid(cr);
	}

	if (ngids != NULL)
		*ngids = 0;
	if (gids != NULL)
		*gids = NULL;

	/*
	 *  Get the nfs flavor number from xprt.
	 */
	flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;

	/*
	 * First check the access restrictions on the filesystem.  If
	 * there are no lists associated with this flavor then there's no
	 * need to make an expensive call to the nfsauth service or to
	 * cache anything.
	 */

	sp = exi->exi_export.ex_secinfo;
	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
		if (flavor != sp[i].s_secinfo.sc_nfsnum) {
			if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
				authnone_entry = i;
			continue;
		}
		break;
	}

	mapaccess = 0;

	if (i >= exi->exi_export.ex_seccnt) {
		/*
		 * Flavor not found, but use AUTH_NONE if it exists
		 */
		if (authnone_entry == -1)
			return (NFSAUTH_DENIED);
		flavor = AUTH_NONE;
		mapaccess = NFSAUTH_MAPNONE;
		i = authnone_entry;
	}

	/*
	 * If the flavor is in the ex_secinfo list, but not an explicitly
	 * shared flavor by the user, it is a result of the nfsv4 server
	 * namespace setup. We will grant an RO permission similar for
	 * a pseudo node except that this node is a shared one.
	 *
	 * e.g. flavor in (flavor) indicates that it is not explictly
	 *	shared by the user:
	 *
	 *		/	(sys, krb5)
	 *		|
	 *		export  #share -o sec=sys (krb5)
	 *		|
	 *		secure  #share -o sec=krb5
	 *
	 *	In this case, when a krb5 request coming in to access
	 *	/export, RO permission is granted.
	 */
	if (!(sp[i].s_flags & M_4SEC_EXPORTED))
		return (mapaccess | NFSAUTH_RO);

	/*
	 * Optimize if there are no lists.
	 * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
	 */
	perm = sp[i].s_flags;
	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
	    flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
		perm &= ~M_4SEC_EXPORTED;
		if (perm == M_RO)
			return (mapaccess | NFSAUTH_RO);
		if (perm == M_RW)
			return (mapaccess | NFSAUTH_RW);
	}

	access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);

	/*
	 * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
	 * the supplemental groups.
	 */
	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
		if (ngids != NULL && gids != NULL) {
			kmem_free(*gids, *ngids * sizeof (gid_t));
			*ngids = 0;
			*gids = NULL;
		}
	}

	/*
	 * Client's security flavor doesn't match with "ro" or
	 * "rw" list. Try again using AUTH_NONE if present.
	 */
	if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
		/*
		 * Have we already encountered AUTH_NONE ?
		 */
		if (authnone_entry != -1) {
			mapaccess = NFSAUTH_MAPNONE;
			access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
			    NULL, NULL, NULL, NULL);
		} else {
			/*
			 * Check for AUTH_NONE presence.
			 */
			for (; i < exi->exi_export.ex_seccnt; i++) {
				if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
					mapaccess = NFSAUTH_MAPNONE;
					access = nfsauth_cache_get(exi, req,
					    AUTH_NONE, cr, NULL, NULL, NULL,
					    NULL);
					break;
				}
			}
		}
	}

	if (access & NFSAUTH_DENIED)
		access = NFSAUTH_DENIED;

	return (access | mapaccess);
}

static void
nfsauth_free_clnt_node(struct auth_cache_clnt *p)
{
	void *cookie = NULL;
	struct auth_cache *node;

	while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
		nfsauth_free_node(node);
	avl_destroy(&p->authc_tree);

	kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
	rw_destroy(&p->authc_lock);

	kmem_free(p, sizeof (*p));
}

static void
nfsauth_free_node(struct auth_cache *p)
{
	crfree(p->auth_clnt_cred);
	kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
	mutex_destroy(&p->auth_lock);
	cv_destroy(&p->auth_cv);
	kmem_cache_free(exi_cache_handle, p);
}

/*
 * Free the nfsauth cache for a given export
 */
void
nfsauth_cache_free(struct exportinfo *exi)
{
	int i;

	/*
	 * The only way we got here was with an exi_rele, which means that no
	 * auth cache entry is being refreshed.
	 */

	for (i = 0; i < AUTH_TABLESIZE; i++) {
		avl_tree_t *tree = exi->exi_cache[i];
		void *cookie = NULL;
		struct auth_cache_clnt *node;

		while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
			nfsauth_free_clnt_node(node);
	}
}

/*
 * Called by the kernel memory allocator when memory is low.
 * Free unused cache entries. If that's not enough, the VM system
 * will call again for some more.
 *
 * This needs to operate on all zones, so we take a reader lock
 * on the list of zones and walk the list.  This is OK here
 * becuase exi_cache_trim doesn't block or cause new objects
 * to be allocated (basically just frees lots of stuff).
 * Use care if nfssrv_globals_rwl is taken as reader in any
 * other cases because it will block nfs_server_zone_init
 * and nfs_server_zone_fini, which enter as writer.
 */
/*ARGSUSED*/
void
exi_cache_reclaim(void *cdrarg)
{
	nfs_globals_t *ng;

	rw_enter(&nfssrv_globals_rwl, RW_READER);

	ng = list_head(&nfssrv_globals_list);
	while (ng != NULL) {
		exi_cache_reclaim_zone(ng);
		ng = list_next(&nfssrv_globals_list, ng);
	}

	rw_exit(&nfssrv_globals_rwl);
}

static void
exi_cache_reclaim_zone(nfs_globals_t *ng)
{
	int i;
	struct exportinfo *exi;
	nfs_export_t *ne = ng->nfs_export;

	rw_enter(&ne->exported_lock, RW_READER);

	for (i = 0; i < EXPTABLESIZE; i++) {
		for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
			exi_cache_trim(exi);
	}

	rw_exit(&ne->exported_lock);

	atomic_inc_uint(&nfsauth_cache_reclaim);
}

static void
exi_cache_trim(struct exportinfo *exi)
{
	struct auth_cache_clnt *c;
	struct auth_cache_clnt *nextc;
	struct auth_cache *p;
	struct auth_cache *next;
	int i;
	time_t stale_time;
	avl_tree_t *tree;

	for (i = 0; i < AUTH_TABLESIZE; i++) {
		tree = exi->exi_cache[i];
		stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
		rw_enter(&exi->exi_cache_lock, RW_READER);

		/*
		 * Free entries that have not been
		 * used for NFSAUTH_CACHE_TRIM seconds.
		 */
		for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
			/*
			 * We are being called by the kmem subsystem to reclaim
			 * memory so don't block if we can't get the lock.
			 */
			if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
				exi_cache_auth_reclaim_failed++;
				rw_exit(&exi->exi_cache_lock);
				return;
			}

			for (p = avl_first(&c->authc_tree); p != NULL;
			    p = next) {
				next = AVL_NEXT(&c->authc_tree, p);

				ASSERT(p->auth_state != NFS_AUTH_INVALID);

				mutex_enter(&p->auth_lock);

				/*
				 * We won't trim recently used and/or WAITING
				 * entries.
				 */
				if (p->auth_time > stale_time ||
				    p->auth_state == NFS_AUTH_WAITING) {
					mutex_exit(&p->auth_lock);
					continue;
				}

				DTRACE_PROBE1(nfsauth__debug__trim__state,
				    auth_state_t, p->auth_state);

				/*
				 * STALE and REFRESHING entries needs to be
				 * marked INVALID only because they are
				 * referenced by some other structures or
				 * threads.  They will be freed later.
				 */
				if (p->auth_state == NFS_AUTH_STALE ||
				    p->auth_state == NFS_AUTH_REFRESHING) {
					p->auth_state = NFS_AUTH_INVALID;
					mutex_exit(&p->auth_lock);

					avl_remove(&c->authc_tree, p);
				} else {
					mutex_exit(&p->auth_lock);

					avl_remove(&c->authc_tree, p);
					nfsauth_free_node(p);
				}
			}
			rw_exit(&c->authc_lock);
		}

		if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
			rw_exit(&exi->exi_cache_lock);
			exi_cache_clnt_reclaim_failed++;
			continue;
		}

		for (c = avl_first(tree); c != NULL; c = nextc) {
			nextc = AVL_NEXT(tree, c);

			if (avl_is_empty(&c->authc_tree) == B_FALSE)
				continue;

			avl_remove(tree, c);

			nfsauth_free_clnt_node(c);
		}

		rw_exit(&exi->exi_cache_lock);
	}
}