xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs_auth.c (revision eb9a1df2aeb866bf1de4494433b6d7e5fa07b3ae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
26  * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/vfs.h>
32 #include <sys/vnode.h>
33 #include <sys/cred.h>
34 #include <sys/cmn_err.h>
35 #include <sys/systm.h>
36 #include <sys/kmem.h>
37 #include <sys/pathname.h>
38 #include <sys/utsname.h>
39 #include <sys/debug.h>
40 #include <sys/door.h>
41 #include <sys/sdt.h>
42 #include <sys/thread.h>
43 #include <sys/avl.h>
44 
45 #include <rpc/types.h>
46 #include <rpc/auth.h>
47 #include <rpc/clnt.h>
48 
49 #include <nfs/nfs.h>
50 #include <nfs/export.h>
51 #include <nfs/nfs_clnt.h>
52 #include <nfs/auth.h>
53 
54 static struct kmem_cache *exi_cache_handle;
55 static void exi_cache_reclaim(void *);
56 static void exi_cache_reclaim_zone(nfs_globals_t *);
57 static void exi_cache_trim(struct exportinfo *exi);
58 
59 extern pri_t minclsyspri;
60 
61 /* NFS auth cache statistics */
62 volatile uint_t nfsauth_cache_hit;
63 volatile uint_t nfsauth_cache_miss;
64 volatile uint_t nfsauth_cache_refresh;
65 volatile uint_t nfsauth_cache_reclaim;
66 volatile uint_t exi_cache_auth_reclaim_failed;
67 volatile uint_t exi_cache_clnt_reclaim_failed;
68 
69 /*
70  * The lifetime of an auth cache entry:
71  * ------------------------------------
72  *
73  * An auth cache entry is created with both the auth_time
74  * and auth_freshness times set to the current time.
75  *
76  * Upon every client access which results in a hit, the
77  * auth_time will be updated.
78  *
79  * If a client access determines that the auth_freshness
80  * indicates that the entry is STALE, then it will be
81  * refreshed. Note that this will explicitly reset
82  * auth_time.
83  *
84  * When the REFRESH successfully occurs, then the
85  * auth_freshness is updated.
86  *
87  * There are two ways for an entry to leave the cache:
88  *
89  * 1) Purged by an action on the export (remove or changed)
90  * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
91  *
92  * For 2) we check the timeout value against auth_time.
93  */
94 
95 /*
96  * Number of seconds until we mark for refresh an auth cache entry.
97  */
98 #define	NFSAUTH_CACHE_REFRESH 600
99 
100 /*
101  * Number of idle seconds until we yield to backpressure
102  * to trim a cache entry.
103  */
104 #define	NFSAUTH_CACHE_TRIM 3600
105 
106 /*
107  * While we could encapuslate the exi_list inside the
108  * exi structure, we can't do that for the auth_list.
109  * So, to keep things looking clean, we keep them both
110  * in these external lists.
111  */
112 typedef struct refreshq_exi_node {
113 	struct exportinfo	*ren_exi;
114 	list_t			ren_authlist;
115 	list_node_t		ren_node;
116 } refreshq_exi_node_t;
117 
118 typedef struct refreshq_auth_node {
119 	struct auth_cache	*ran_auth;
120 	char			*ran_netid;
121 	list_node_t		ran_node;
122 } refreshq_auth_node_t;
123 
124 /*
125  * Used to manipulate things on the refreshq_queue.  Note that the refresh
126  * thread will effectively pop a node off of the queue, at which point it
127  * will no longer need to hold the mutex.
128  */
129 static kmutex_t refreshq_lock;
130 static list_t refreshq_queue;
131 static kcondvar_t refreshq_cv;
132 
133 /*
134  * If there is ever a problem with loading the module, then nfsauth_fini()
135  * needs to be called to remove state.  In that event, since the refreshq
136  * thread has been started, they need to work together to get rid of state.
137  */
138 typedef enum nfsauth_refreshq_thread_state {
139 	REFRESHQ_THREAD_RUNNING,
140 	REFRESHQ_THREAD_FINI_REQ,
141 	REFRESHQ_THREAD_HALTED,
142 	REFRESHQ_THREAD_NEED_CREATE
143 } nfsauth_refreshq_thread_state_t;
144 
145 typedef struct nfsauth_globals {
146 	kmutex_t	mountd_lock;
147 	door_handle_t   mountd_dh;
148 
149 	/*
150 	 * Used to manipulate things on the refreshq_queue.  Note that the
151 	 * refresh thread will effectively pop a node off of the queue,
152 	 * at which point it will no longer need to hold the mutex.
153 	 */
154 	kmutex_t	refreshq_lock;
155 	list_t		refreshq_queue;
156 	kcondvar_t	refreshq_cv;
157 
158 	/*
159 	 * A list_t would be overkill.  These are auth_cache entries which are
160 	 * no longer linked to an exi.  It should be the case that all of their
161 	 * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
162 	 * list is iff their state indicated that they had been placed on the
163 	 * refreshq_queue.
164 	 *
165 	 * Note that while there is no link from the exi or back to the exi,
166 	 * the exi can not go away until these entries are harvested.
167 	 */
168 	struct auth_cache		*refreshq_dead_entries;
169 	nfsauth_refreshq_thread_state_t	refreshq_thread_state;
170 
171 } nfsauth_globals_t;
172 
173 static void nfsauth_free_node(struct auth_cache *);
174 static void nfsauth_refresh_thread(nfsauth_globals_t *);
175 
176 static int nfsauth_cache_compar(const void *, const void *);
177 
178 static nfsauth_globals_t *
179 nfsauth_get_zg(void)
180 {
181 	nfs_globals_t *ng = nfs_srv_getzg();
182 	nfsauth_globals_t *nag = ng->nfs_auth;
183 	ASSERT(nag != NULL);
184 	return (nag);
185 }
186 
187 void
188 mountd_args(uint_t did)
189 {
190 	nfsauth_globals_t *nag;
191 
192 	nag = nfsauth_get_zg();
193 	mutex_enter(&nag->mountd_lock);
194 	if (nag->mountd_dh != NULL)
195 		door_ki_rele(nag->mountd_dh);
196 	nag->mountd_dh = door_ki_lookup(did);
197 	mutex_exit(&nag->mountd_lock);
198 }
199 
200 void
201 nfsauth_init(void)
202 {
203 	exi_cache_handle = kmem_cache_create("exi_cache_handle",
204 	    sizeof (struct auth_cache), 0, NULL, NULL,
205 	    exi_cache_reclaim, NULL, NULL, 0);
206 }
207 
208 void
209 nfsauth_fini(void)
210 {
211 	kmem_cache_destroy(exi_cache_handle);
212 }
213 
214 void
215 nfsauth_zone_init(nfs_globals_t *ng)
216 {
217 	nfsauth_globals_t *nag;
218 
219 	nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
220 
221 	/*
222 	 * mountd can be restarted by smf(5).  We need to make sure
223 	 * the updated door handle will safely make it to mountd_dh.
224 	 */
225 	mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
226 	mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
227 	list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
228 	    offsetof(refreshq_exi_node_t, ren_node));
229 	cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
230 	nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
231 
232 	ng->nfs_auth = nag;
233 }
234 
235 void
236 nfsauth_zone_shutdown(nfs_globals_t *ng)
237 {
238 	refreshq_exi_node_t	*ren;
239 	nfsauth_globals_t	*nag = ng->nfs_auth;
240 
241 	/* Prevent the nfsauth_refresh_thread from getting new work */
242 	mutex_enter(&nag->refreshq_lock);
243 	if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
244 		nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
245 		cv_broadcast(&nag->refreshq_cv);
246 
247 		/* Wait for nfsauth_refresh_thread() to exit */
248 		while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
249 			cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
250 	}
251 	mutex_exit(&nag->refreshq_lock);
252 
253 	/*
254 	 * Walk the exi_list and in turn, walk the auth_lists and free all
255 	 * lists.  In addition, free INVALID auth_cache entries.
256 	 */
257 	while ((ren = list_remove_head(&nag->refreshq_queue))) {
258 		refreshq_auth_node_t *ran;
259 
260 		while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
261 			struct auth_cache *p = ran->ran_auth;
262 			if (p->auth_state == NFS_AUTH_INVALID)
263 				nfsauth_free_node(p);
264 			strfree(ran->ran_netid);
265 			kmem_free(ran, sizeof (*ran));
266 		}
267 
268 		list_destroy(&ren->ren_authlist);
269 		exi_rele(ren->ren_exi);
270 		kmem_free(ren, sizeof (*ren));
271 	}
272 }
273 
274 void
275 nfsauth_zone_fini(nfs_globals_t *ng)
276 {
277 	nfsauth_globals_t *nag = ng->nfs_auth;
278 
279 	ng->nfs_auth = NULL;
280 
281 	list_destroy(&nag->refreshq_queue);
282 	cv_destroy(&nag->refreshq_cv);
283 	mutex_destroy(&nag->refreshq_lock);
284 	mutex_destroy(&nag->mountd_lock);
285 	/* Extra cleanup. */
286 	if (nag->mountd_dh != NULL)
287 		door_ki_rele(nag->mountd_dh);
288 	kmem_free(nag, sizeof (*nag));
289 }
290 
291 /*
292  * Convert the address in a netbuf to
293  * a hash index for the auth_cache table.
294  */
295 static int
296 hash(struct netbuf *a)
297 {
298 	int i, h = 0;
299 
300 	for (i = 0; i < a->len; i++)
301 		h ^= a->buf[i];
302 
303 	return (h & (AUTH_TABLESIZE - 1));
304 }
305 
306 /*
307  * Mask out the components of an
308  * address that do not identify
309  * a host. For socket addresses the
310  * masking gets rid of the port number.
311  */
312 static void
313 addrmask(struct netbuf *addr, struct netbuf *mask)
314 {
315 	int i;
316 
317 	for (i = 0; i < addr->len; i++)
318 		addr->buf[i] &= mask->buf[i];
319 }
320 
321 /*
322  * nfsauth4_access is used for NFS V4 auth checking. Besides doing
323  * the common nfsauth_access(), it will check if the client can
324  * have a limited access to this vnode even if the security flavor
325  * used does not meet the policy.
326  */
327 int
328 nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
329     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
330 {
331 	int access;
332 
333 	access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
334 
335 	/*
336 	 * There are cases that the server needs to allow the client
337 	 * to have a limited view.
338 	 *
339 	 * e.g.
340 	 * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
341 	 * /export/home is shared as "sec=sys,rw"
342 	 *
343 	 * When the client mounts /export with sec=sys, the client
344 	 * would get a limited view with RO access on /export to see
345 	 * "home" only because the client is allowed to access
346 	 * /export/home with auth_sys.
347 	 */
348 	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
349 		/*
350 		 * Allow ro permission with LIMITED view if there is a
351 		 * sub-dir exported under vp.
352 		 */
353 		if (has_visible(exi, vp))
354 			return (NFSAUTH_LIMITED);
355 	}
356 
357 	return (access);
358 }
359 
360 static void
361 sys_log(const char *msg)
362 {
363 	static time_t	tstamp = 0;
364 	time_t		now;
365 
366 	/*
367 	 * msg is shown (at most) once per minute
368 	 */
369 	now = gethrestime_sec();
370 	if ((tstamp + 60) < now) {
371 		tstamp = now;
372 		cmn_err(CE_WARN, msg);
373 	}
374 }
375 
376 /*
377  * Callup to the mountd to get access information in the kernel.
378  */
379 static bool_t
380 nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
381     char *req_netid, int flavor, struct netbuf *addr, int *access,
382     cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
383     gid_t **srv_gids)
384 {
385 	varg_t			  varg = {0};
386 	nfsauth_res_t		  res = {0};
387 	XDR			  xdrs;
388 	size_t			  absz;
389 	caddr_t			  abuf;
390 	int			  last = 0;
391 	door_arg_t		  da;
392 	door_info_t		  di;
393 	door_handle_t		  dh;
394 	uint_t			  ntries = 0;
395 
396 	/*
397 	 * No entry in the cache for this client/flavor
398 	 * so we need to call the nfsauth service in the
399 	 * mount daemon.
400 	 */
401 
402 	varg.vers = V_PROTO;
403 	varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
404 	varg.arg_u.arg.areq.req_client.n_len = addr->len;
405 	varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
406 	varg.arg_u.arg.areq.req_netid = req_netid;
407 	varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
408 	varg.arg_u.arg.areq.req_flavor = flavor;
409 	varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
410 	varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
411 	varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
412 	varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
413 
414 	DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
415 
416 	/*
417 	 * Setup the XDR stream for encoding the arguments. Notice that
418 	 * in addition to the args having variable fields (req_netid and
419 	 * req_path), the argument data structure is itself versioned,
420 	 * so we need to make sure we can size the arguments buffer
421 	 * appropriately to encode all the args. If we can't get sizing
422 	 * info _or_ properly encode the arguments, there's really no
423 	 * point in continuting, so we fail the request.
424 	 */
425 	if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
426 		*access = NFSAUTH_DENIED;
427 		return (FALSE);
428 	}
429 
430 	abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
431 	xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
432 	if (!xdr_varg(&xdrs, &varg)) {
433 		XDR_DESTROY(&xdrs);
434 		goto fail;
435 	}
436 	XDR_DESTROY(&xdrs);
437 
438 	/*
439 	 * Prepare the door arguments
440 	 *
441 	 * We don't know the size of the message the daemon
442 	 * will pass back to us.  By setting rbuf to NULL,
443 	 * we force the door code to allocate a buf of the
444 	 * appropriate size.  We must set rsize > 0, however,
445 	 * else the door code acts as if no response was
446 	 * expected and doesn't pass the data to us.
447 	 */
448 	da.data_ptr = (char *)abuf;
449 	da.data_size = absz;
450 	da.desc_ptr = NULL;
451 	da.desc_num = 0;
452 	da.rbuf = NULL;
453 	da.rsize = 1;
454 
455 retry:
456 	mutex_enter(&nag->mountd_lock);
457 	dh = nag->mountd_dh;
458 	if (dh != NULL)
459 		door_ki_hold(dh);
460 	mutex_exit(&nag->mountd_lock);
461 
462 	if (dh == NULL) {
463 		/*
464 		 * The rendezvous point has not been established yet!
465 		 * This could mean that either mountd(1m) has not yet
466 		 * been started or that _this_ routine nuked the door
467 		 * handle after receiving an EINTR for a REVOKED door.
468 		 *
469 		 * Returning NFSAUTH_DROP will cause the NFS client
470 		 * to retransmit the request, so let's try to be more
471 		 * rescillient and attempt for ntries before we bail.
472 		 */
473 		if (++ntries % NFSAUTH_DR_TRYCNT) {
474 			delay(hz);
475 			goto retry;
476 		}
477 
478 		kmem_free(abuf, absz);
479 
480 		sys_log("nfsauth: mountd has not established door");
481 		*access = NFSAUTH_DROP;
482 		return (FALSE);
483 	}
484 
485 	ntries = 0;
486 
487 	/*
488 	 * Now that we've got what we need, place the call.
489 	 */
490 	switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
491 	case 0:				/* Success */
492 		door_ki_rele(dh);
493 
494 		if (da.data_ptr == NULL && da.data_size == 0) {
495 			/*
496 			 * The door_return that contained the data
497 			 * failed! We're here because of the 2nd
498 			 * door_return (w/o data) such that we can
499 			 * get control of the thread (and exit
500 			 * gracefully).
501 			 */
502 			DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
503 			    door_arg_t *, &da);
504 			goto fail;
505 		}
506 
507 		break;
508 
509 	case EAGAIN:
510 		/*
511 		 * Server out of resources; back off for a bit
512 		 */
513 		door_ki_rele(dh);
514 		delay(hz);
515 		goto retry;
516 		/* NOTREACHED */
517 
518 	case EINTR:
519 		if (!door_ki_info(dh, &di)) {
520 			door_ki_rele(dh);
521 
522 			if (di.di_attributes & DOOR_REVOKED) {
523 				/*
524 				 * The server barfed and revoked
525 				 * the (existing) door on us; we
526 				 * want to wait to give smf(5) a
527 				 * chance to restart mountd(1m)
528 				 * and establish a new door handle.
529 				 */
530 				mutex_enter(&nag->mountd_lock);
531 				if (dh == nag->mountd_dh) {
532 					door_ki_rele(nag->mountd_dh);
533 					nag->mountd_dh = NULL;
534 				}
535 				mutex_exit(&nag->mountd_lock);
536 				delay(hz);
537 				goto retry;
538 			}
539 			/*
540 			 * If the door was _not_ revoked on us,
541 			 * then more than likely we took an INTR,
542 			 * so we need to fail the operation.
543 			 */
544 			goto fail;
545 		}
546 		/*
547 		 * The only failure that can occur from getting
548 		 * the door info is EINVAL, so we let the code
549 		 * below handle it.
550 		 */
551 		/* FALLTHROUGH */
552 
553 	case EBADF:
554 	case EINVAL:
555 	default:
556 		/*
557 		 * If we have a stale door handle, give smf a last
558 		 * chance to start it by sleeping for a little bit.
559 		 * If we're still hosed, we'll fail the call.
560 		 *
561 		 * Since we're going to reacquire the door handle
562 		 * upon the retry, we opt to sleep for a bit and
563 		 * _not_ to clear mountd_dh. If mountd restarted
564 		 * and was able to set mountd_dh, we should see
565 		 * the new instance; if not, we won't get caught
566 		 * up in the retry/DELAY loop.
567 		 */
568 		door_ki_rele(dh);
569 		if (!last) {
570 			delay(hz);
571 			last++;
572 			goto retry;
573 		}
574 		sys_log("nfsauth: stale mountd door handle");
575 		goto fail;
576 	}
577 
578 	ASSERT(da.rbuf != NULL);
579 
580 	/*
581 	 * No door errors encountered; setup the XDR stream for decoding
582 	 * the results. If we fail to decode the results, we've got no
583 	 * other recourse than to fail the request.
584 	 */
585 	xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
586 	if (!xdr_nfsauth_res(&xdrs, &res)) {
587 		xdr_free(xdr_nfsauth_res, (char *)&res);
588 		XDR_DESTROY(&xdrs);
589 		kmem_free(da.rbuf, da.rsize);
590 		goto fail;
591 	}
592 	XDR_DESTROY(&xdrs);
593 	kmem_free(da.rbuf, da.rsize);
594 
595 	DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
596 	switch (res.stat) {
597 		case NFSAUTH_DR_OKAY:
598 			*access = res.ares.auth_perm;
599 			*srv_uid = res.ares.auth_srv_uid;
600 			*srv_gid = res.ares.auth_srv_gid;
601 
602 			if ((*srv_gids_cnt = res.ares.auth_srv_gids.len) != 0) {
603 				*srv_gids = kmem_alloc(*srv_gids_cnt *
604 				    sizeof (gid_t), KM_SLEEP);
605 				bcopy(res.ares.auth_srv_gids.val, *srv_gids,
606 				    *srv_gids_cnt * sizeof (gid_t));
607 			} else {
608 				*srv_gids = NULL;
609 			}
610 
611 			break;
612 
613 		case NFSAUTH_DR_EFAIL:
614 		case NFSAUTH_DR_DECERR:
615 		case NFSAUTH_DR_BADCMD:
616 		default:
617 			xdr_free(xdr_nfsauth_res, (char *)&res);
618 fail:
619 			*access = NFSAUTH_DENIED;
620 			kmem_free(abuf, absz);
621 			return (FALSE);
622 			/* NOTREACHED */
623 	}
624 
625 	xdr_free(xdr_nfsauth_res, (char *)&res);
626 	kmem_free(abuf, absz);
627 
628 	return (TRUE);
629 }
630 
631 static void
632 nfsauth_refresh_thread(nfsauth_globals_t *nag)
633 {
634 	refreshq_exi_node_t	*ren;
635 	refreshq_auth_node_t	*ran;
636 
637 	struct exportinfo	*exi;
638 
639 	int			access;
640 	bool_t			retrieval;
641 
642 	callb_cpr_t		cprinfo;
643 
644 	CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
645 	    "nfsauth_refresh");
646 
647 	for (;;) {
648 		mutex_enter(&nag->refreshq_lock);
649 		if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
650 			/* Keep the hold on the lock! */
651 			break;
652 		}
653 
654 		ren = list_remove_head(&nag->refreshq_queue);
655 		if (ren == NULL) {
656 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
657 			cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
658 			CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
659 			mutex_exit(&nag->refreshq_lock);
660 			continue;
661 		}
662 		mutex_exit(&nag->refreshq_lock);
663 
664 		exi = ren->ren_exi;
665 		ASSERT(exi != NULL);
666 
667 		/*
668 		 * Since the ren was removed from the refreshq_queue above,
669 		 * this is the only thread aware about the ren existence, so we
670 		 * have the exclusive ownership of it and we do not need to
671 		 * protect it by any lock.
672 		 */
673 		while ((ran = list_remove_head(&ren->ren_authlist))) {
674 			uid_t uid;
675 			gid_t gid;
676 			uint_t ngids;
677 			gid_t *gids;
678 			struct auth_cache *p = ran->ran_auth;
679 			char *netid = ran->ran_netid;
680 
681 			ASSERT(p != NULL);
682 			ASSERT(netid != NULL);
683 
684 			kmem_free(ran, sizeof (refreshq_auth_node_t));
685 
686 			mutex_enter(&p->auth_lock);
687 
688 			/*
689 			 * Once the entry goes INVALID, it can not change
690 			 * state.
691 			 *
692 			 * No need to refresh entries also in a case we are
693 			 * just shutting down.
694 			 *
695 			 * In general, there is no need to hold the
696 			 * refreshq_lock to test the refreshq_thread_state.  We
697 			 * do hold it at other places because there is some
698 			 * related thread synchronization (or some other tasks)
699 			 * close to the refreshq_thread_state check.
700 			 *
701 			 * The check for the refreshq_thread_state value here
702 			 * is purely advisory to allow the faster
703 			 * nfsauth_refresh_thread() shutdown.  In a case we
704 			 * will miss such advisory, nothing catastrophic
705 			 * happens: we will just spin longer here before the
706 			 * shutdown.
707 			 */
708 			if (p->auth_state == NFS_AUTH_INVALID ||
709 			    nag->refreshq_thread_state !=
710 			    REFRESHQ_THREAD_RUNNING) {
711 				mutex_exit(&p->auth_lock);
712 
713 				if (p->auth_state == NFS_AUTH_INVALID)
714 					nfsauth_free_node(p);
715 
716 				strfree(netid);
717 
718 				continue;
719 			}
720 
721 			/*
722 			 * Make sure the state is valid.  Note that once we
723 			 * change the state to NFS_AUTH_REFRESHING, no other
724 			 * thread will be able to work on this entry.
725 			 */
726 			ASSERT(p->auth_state == NFS_AUTH_STALE);
727 
728 			p->auth_state = NFS_AUTH_REFRESHING;
729 			mutex_exit(&p->auth_lock);
730 
731 			DTRACE_PROBE2(nfsauth__debug__cache__refresh,
732 			    struct exportinfo *, exi,
733 			    struct auth_cache *, p);
734 
735 			/*
736 			 * The first caching of the access rights
737 			 * is done with the netid pulled out of the
738 			 * request from the client. All subsequent
739 			 * users of the cache may or may not have
740 			 * the same netid. It doesn't matter. So
741 			 * when we refresh, we simply use the netid
742 			 * of the request which triggered the
743 			 * refresh attempt.
744 			 */
745 			retrieval = nfsauth_retrieve(nag, exi, netid,
746 			    p->auth_flavor, &p->auth_clnt->authc_addr, &access,
747 			    p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
748 
749 			/*
750 			 * This can only be set in one other place
751 			 * and the state has to be NFS_AUTH_FRESH.
752 			 */
753 			strfree(netid);
754 
755 			mutex_enter(&p->auth_lock);
756 			if (p->auth_state == NFS_AUTH_INVALID) {
757 				mutex_exit(&p->auth_lock);
758 				nfsauth_free_node(p);
759 				if (retrieval == TRUE)
760 					kmem_free(gids, ngids * sizeof (gid_t));
761 			} else {
762 				/*
763 				 * If we got an error, do not reset the
764 				 * time. This will cause the next access
765 				 * check for the client to reschedule this
766 				 * node.
767 				 */
768 				if (retrieval == TRUE) {
769 					p->auth_access = access;
770 
771 					p->auth_srv_uid = uid;
772 					p->auth_srv_gid = gid;
773 					kmem_free(p->auth_srv_gids,
774 					    p->auth_srv_ngids * sizeof (gid_t));
775 					p->auth_srv_ngids = ngids;
776 					p->auth_srv_gids = gids;
777 
778 					p->auth_freshness = gethrestime_sec();
779 				}
780 				p->auth_state = NFS_AUTH_FRESH;
781 
782 				cv_broadcast(&p->auth_cv);
783 				mutex_exit(&p->auth_lock);
784 			}
785 		}
786 
787 		list_destroy(&ren->ren_authlist);
788 		exi_rele(ren->ren_exi);
789 		kmem_free(ren, sizeof (refreshq_exi_node_t));
790 	}
791 
792 	nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
793 	cv_broadcast(&nag->refreshq_cv);
794 	CALLB_CPR_EXIT(&cprinfo);
795 	DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
796 	zthread_exit();
797 }
798 
799 int
800 nfsauth_cache_clnt_compar(const void *v1, const void *v2)
801 {
802 	int c;
803 
804 	const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
805 	const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
806 
807 	if (a1->authc_addr.len < a2->authc_addr.len)
808 		return (-1);
809 	if (a1->authc_addr.len > a2->authc_addr.len)
810 		return (1);
811 
812 	c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
813 	if (c < 0)
814 		return (-1);
815 	if (c > 0)
816 		return (1);
817 
818 	return (0);
819 }
820 
821 static int
822 nfsauth_cache_compar(const void *v1, const void *v2)
823 {
824 	int c;
825 
826 	const struct auth_cache *a1 = (const struct auth_cache *)v1;
827 	const struct auth_cache *a2 = (const struct auth_cache *)v2;
828 
829 	if (a1->auth_flavor < a2->auth_flavor)
830 		return (-1);
831 	if (a1->auth_flavor > a2->auth_flavor)
832 		return (1);
833 
834 	if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
835 		return (-1);
836 	if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
837 		return (1);
838 
839 	if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
840 		return (-1);
841 	if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
842 		return (1);
843 
844 	if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
845 		return (-1);
846 	if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
847 		return (1);
848 
849 	c = memcmp(crgetgroups(a1->auth_clnt_cred),
850 	    crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
851 	if (c < 0)
852 		return (-1);
853 	if (c > 0)
854 		return (1);
855 
856 	return (0);
857 }
858 
859 /*
860  * Get the access information from the cache or callup to the mountd
861  * to get and cache the access information in the kernel.
862  */
863 static int
864 nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
865     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
866 {
867 	nfsauth_globals_t	*nag;
868 	struct netbuf		*taddrmask;
869 	struct netbuf		addr;	/* temporary copy of client's address */
870 	const struct netbuf	*claddr;
871 	avl_tree_t		*tree;
872 	struct auth_cache	ac;	/* used as a template for avl_find() */
873 	struct auth_cache_clnt	*c;
874 	struct auth_cache_clnt	acc;	/* used as a template for avl_find() */
875 	struct auth_cache	*p = NULL;
876 	int			access;
877 
878 	uid_t			tmpuid;
879 	gid_t			tmpgid;
880 	uint_t			tmpngids;
881 	gid_t			*tmpgids;
882 
883 	avl_index_t		where;	/* used for avl_find()/avl_insert() */
884 
885 	ASSERT(cr != NULL);
886 
887 	ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
888 	nag = nfsauth_get_zg();
889 
890 	/*
891 	 * Now check whether this client already
892 	 * has an entry for this flavor in the cache
893 	 * for this export.
894 	 * Get the caller's address, mask off the
895 	 * parts of the address that do not identify
896 	 * the host (port number, etc), and then hash
897 	 * it to find the chain of cache entries.
898 	 */
899 
900 	claddr = svc_getrpccaller(req->rq_xprt);
901 	addr = *claddr;
902 	if (claddr->len != 0) {
903 		addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
904 		bcopy(claddr->buf, addr.buf, claddr->len);
905 	} else {
906 		addr.buf = NULL;
907 	}
908 
909 	SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
910 	ASSERT(taddrmask != NULL);
911 	addrmask(&addr, taddrmask);
912 
913 	ac.auth_flavor = flavor;
914 	ac.auth_clnt_cred = crdup(cr);
915 
916 	acc.authc_addr = addr;
917 
918 	tree = exi->exi_cache[hash(&addr)];
919 
920 	rw_enter(&exi->exi_cache_lock, RW_READER);
921 	c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
922 
923 	if (c == NULL) {
924 		struct auth_cache_clnt *nc;
925 
926 		rw_exit(&exi->exi_cache_lock);
927 
928 		nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP | KM_NORMALPRI);
929 		if (nc == NULL)
930 			goto retrieve;
931 
932 		/*
933 		 * Initialize the new auth_cache_clnt
934 		 */
935 		nc->authc_addr = addr;
936 		nc->authc_addr.buf = kmem_alloc(addr.maxlen,
937 		    KM_NOSLEEP | KM_NORMALPRI);
938 		if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
939 			kmem_free(nc, sizeof (*nc));
940 			goto retrieve;
941 		}
942 		bcopy(addr.buf, nc->authc_addr.buf, addr.len);
943 		rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
944 		avl_create(&nc->authc_tree, nfsauth_cache_compar,
945 		    sizeof (struct auth_cache),
946 		    offsetof(struct auth_cache, auth_link));
947 
948 		rw_enter(&exi->exi_cache_lock, RW_WRITER);
949 		c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
950 		if (c == NULL) {
951 			avl_insert(tree, nc, where);
952 			rw_downgrade(&exi->exi_cache_lock);
953 			c = nc;
954 		} else {
955 			rw_downgrade(&exi->exi_cache_lock);
956 
957 			avl_destroy(&nc->authc_tree);
958 			rw_destroy(&nc->authc_lock);
959 			kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
960 			kmem_free(nc, sizeof (*nc));
961 		}
962 	}
963 
964 	ASSERT(c != NULL);
965 
966 	rw_enter(&c->authc_lock, RW_READER);
967 	p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
968 
969 	if (p == NULL) {
970 		struct auth_cache *np;
971 
972 		rw_exit(&c->authc_lock);
973 
974 		np = kmem_cache_alloc(exi_cache_handle,
975 		    KM_NOSLEEP | KM_NORMALPRI);
976 		if (np == NULL) {
977 			rw_exit(&exi->exi_cache_lock);
978 			goto retrieve;
979 		}
980 
981 		/*
982 		 * Initialize the new auth_cache
983 		 */
984 		np->auth_clnt = c;
985 		np->auth_flavor = flavor;
986 		np->auth_clnt_cred = ac.auth_clnt_cred;
987 		np->auth_srv_ngids = 0;
988 		np->auth_srv_gids = NULL;
989 		np->auth_time = np->auth_freshness = gethrestime_sec();
990 		np->auth_state = NFS_AUTH_NEW;
991 		mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
992 		cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
993 
994 		rw_enter(&c->authc_lock, RW_WRITER);
995 		rw_exit(&exi->exi_cache_lock);
996 
997 		p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
998 		if (p == NULL) {
999 			avl_insert(&c->authc_tree, np, where);
1000 			rw_downgrade(&c->authc_lock);
1001 			p = np;
1002 		} else {
1003 			rw_downgrade(&c->authc_lock);
1004 
1005 			cv_destroy(&np->auth_cv);
1006 			mutex_destroy(&np->auth_lock);
1007 			crfree(ac.auth_clnt_cred);
1008 			kmem_cache_free(exi_cache_handle, np);
1009 		}
1010 	} else {
1011 		rw_exit(&exi->exi_cache_lock);
1012 		crfree(ac.auth_clnt_cred);
1013 	}
1014 
1015 	mutex_enter(&p->auth_lock);
1016 	rw_exit(&c->authc_lock);
1017 
1018 	/*
1019 	 * If the entry is in the WAITING state then some other thread is just
1020 	 * retrieving the required info.  The entry was either NEW, or the list
1021 	 * of client's supplemental groups is going to be changed (either by
1022 	 * this thread, or by some other thread).  We need to wait until the
1023 	 * nfsauth_retrieve() is done.
1024 	 */
1025 	while (p->auth_state == NFS_AUTH_WAITING)
1026 		cv_wait(&p->auth_cv, &p->auth_lock);
1027 
1028 	/*
1029 	 * Here the entry cannot be in WAITING or INVALID state.
1030 	 */
1031 	ASSERT(p->auth_state != NFS_AUTH_WAITING);
1032 	ASSERT(p->auth_state != NFS_AUTH_INVALID);
1033 
1034 	/*
1035 	 * If the cache entry is not valid yet, we need to retrieve the
1036 	 * info ourselves.
1037 	 */
1038 	if (p->auth_state == NFS_AUTH_NEW) {
1039 		bool_t res;
1040 		/*
1041 		 * NFS_AUTH_NEW is the default output auth_state value in a
1042 		 * case we failed somewhere below.
1043 		 */
1044 		auth_state_t state = NFS_AUTH_NEW;
1045 
1046 		p->auth_state = NFS_AUTH_WAITING;
1047 		mutex_exit(&p->auth_lock);
1048 		kmem_free(addr.buf, addr.maxlen);
1049 		addr = p->auth_clnt->authc_addr;
1050 
1051 		atomic_inc_uint(&nfsauth_cache_miss);
1052 
1053 		res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
1054 		    flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
1055 		    &tmpgids);
1056 
1057 		p->auth_access = access;
1058 		p->auth_time = p->auth_freshness = gethrestime_sec();
1059 
1060 		if (res == TRUE) {
1061 			if (uid != NULL)
1062 				*uid = tmpuid;
1063 			if (gid != NULL)
1064 				*gid = tmpgid;
1065 			if (ngids != NULL && gids != NULL) {
1066 				*ngids = tmpngids;
1067 				*gids = tmpgids;
1068 
1069 				/*
1070 				 * We need a copy of gids for the
1071 				 * auth_cache entry
1072 				 */
1073 				tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1074 				    KM_NOSLEEP | KM_NORMALPRI);
1075 				if (tmpgids != NULL)
1076 					bcopy(*gids, tmpgids,
1077 					    tmpngids * sizeof (gid_t));
1078 			}
1079 
1080 			if (tmpgids != NULL || tmpngids == 0) {
1081 				p->auth_srv_uid = tmpuid;
1082 				p->auth_srv_gid = tmpgid;
1083 				p->auth_srv_ngids = tmpngids;
1084 				p->auth_srv_gids = tmpgids;
1085 
1086 				state = NFS_AUTH_FRESH;
1087 			}
1088 		}
1089 
1090 		/*
1091 		 * Set the auth_state and notify waiters.
1092 		 */
1093 		mutex_enter(&p->auth_lock);
1094 		p->auth_state = state;
1095 		cv_broadcast(&p->auth_cv);
1096 		mutex_exit(&p->auth_lock);
1097 	} else {
1098 		uint_t nach;
1099 		time_t refresh;
1100 
1101 		refresh = gethrestime_sec() - p->auth_freshness;
1102 
1103 		p->auth_time = gethrestime_sec();
1104 
1105 		if (uid != NULL)
1106 			*uid = p->auth_srv_uid;
1107 		if (gid != NULL)
1108 			*gid = p->auth_srv_gid;
1109 		if (ngids != NULL && gids != NULL) {
1110 			if ((*ngids = p->auth_srv_ngids) != 0) {
1111 				size_t sz = *ngids * sizeof (gid_t);
1112 				*gids = kmem_alloc(sz, KM_SLEEP);
1113 				bcopy(p->auth_srv_gids, *gids, sz);
1114 			} else {
1115 				*gids = NULL;
1116 			}
1117 		}
1118 
1119 		access = p->auth_access;
1120 
1121 		if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1122 		    p->auth_state == NFS_AUTH_FRESH) {
1123 			refreshq_auth_node_t *ran;
1124 			uint_t nacr;
1125 
1126 			p->auth_state = NFS_AUTH_STALE;
1127 			mutex_exit(&p->auth_lock);
1128 
1129 			nacr = atomic_inc_uint_nv(&nfsauth_cache_refresh);
1130 			DTRACE_PROBE3(nfsauth__debug__cache__stale,
1131 			    struct exportinfo *, exi,
1132 			    struct auth_cache *, p,
1133 			    uint_t, nacr);
1134 
1135 			ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1136 			    KM_SLEEP);
1137 			ran->ran_auth = p;
1138 			ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1139 
1140 			mutex_enter(&nag->refreshq_lock);
1141 
1142 			if (nag->refreshq_thread_state ==
1143 			    REFRESHQ_THREAD_NEED_CREATE) {
1144 				/* Launch nfsauth refresh thread */
1145 				nag->refreshq_thread_state =
1146 				    REFRESHQ_THREAD_RUNNING;
1147 				(void) zthread_create(NULL, 0,
1148 				    nfsauth_refresh_thread, nag, 0,
1149 				    minclsyspri);
1150 			}
1151 
1152 			/*
1153 			 * We should not add a work queue item if the thread
1154 			 * is not accepting them.
1155 			 */
1156 			if (nag->refreshq_thread_state ==
1157 			    REFRESHQ_THREAD_RUNNING) {
1158 				refreshq_exi_node_t *ren;
1159 
1160 				/*
1161 				 * Is there an existing exi_list?
1162 				 */
1163 				for (ren = list_head(&nag->refreshq_queue);
1164 				    ren != NULL;
1165 				    ren = list_next(&nag->refreshq_queue,
1166 				    ren)) {
1167 					if (ren->ren_exi == exi) {
1168 						list_insert_tail(
1169 						    &ren->ren_authlist, ran);
1170 						break;
1171 					}
1172 				}
1173 
1174 				if (ren == NULL) {
1175 					ren = kmem_alloc(
1176 					    sizeof (refreshq_exi_node_t),
1177 					    KM_SLEEP);
1178 
1179 					exi_hold(exi);
1180 					ren->ren_exi = exi;
1181 
1182 					list_create(&ren->ren_authlist,
1183 					    sizeof (refreshq_auth_node_t),
1184 					    offsetof(refreshq_auth_node_t,
1185 					    ran_node));
1186 
1187 					list_insert_tail(&ren->ren_authlist,
1188 					    ran);
1189 					list_insert_tail(&nag->refreshq_queue,
1190 					    ren);
1191 				}
1192 
1193 				cv_broadcast(&nag->refreshq_cv);
1194 			} else {
1195 				strfree(ran->ran_netid);
1196 				kmem_free(ran, sizeof (refreshq_auth_node_t));
1197 			}
1198 
1199 			mutex_exit(&nag->refreshq_lock);
1200 		} else {
1201 			mutex_exit(&p->auth_lock);
1202 		}
1203 
1204 		nach = atomic_inc_uint_nv(&nfsauth_cache_hit);
1205 		DTRACE_PROBE2(nfsauth__debug__cache__hit,
1206 		    uint_t, nach,
1207 		    time_t, refresh);
1208 
1209 		kmem_free(addr.buf, addr.maxlen);
1210 	}
1211 
1212 	return (access);
1213 
1214 retrieve:
1215 	crfree(ac.auth_clnt_cred);
1216 
1217 	/*
1218 	 * Retrieve the required data without caching.
1219 	 */
1220 
1221 	ASSERT(p == NULL);
1222 
1223 	atomic_inc_uint(&nfsauth_cache_miss);
1224 
1225 	if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
1226 	    &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1227 		if (uid != NULL)
1228 			*uid = tmpuid;
1229 		if (gid != NULL)
1230 			*gid = tmpgid;
1231 		if (ngids != NULL && gids != NULL) {
1232 			*ngids = tmpngids;
1233 			*gids = tmpgids;
1234 		} else {
1235 			kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1236 		}
1237 	}
1238 
1239 	kmem_free(addr.buf, addr.maxlen);
1240 
1241 	return (access);
1242 }
1243 
1244 /*
1245  * Check if the requesting client has access to the filesystem with
1246  * a given nfs flavor number which is an explicitly shared flavor.
1247  */
1248 int
1249 nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1250     int flavor, int perm, cred_t *cr)
1251 {
1252 	int access;
1253 
1254 	if (! (perm & M_4SEC_EXPORTED)) {
1255 		return (NFSAUTH_DENIED);
1256 	}
1257 
1258 	/*
1259 	 * Optimize if there are no lists
1260 	 */
1261 	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1262 		perm &= ~M_4SEC_EXPORTED;
1263 		if (perm == M_RO)
1264 			return (NFSAUTH_RO);
1265 		if (perm == M_RW)
1266 			return (NFSAUTH_RW);
1267 	}
1268 
1269 	access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1270 	    NULL);
1271 
1272 	return (access);
1273 }
1274 
1275 int
1276 nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1277     uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1278 {
1279 	int access, mapaccess;
1280 	struct secinfo *sp;
1281 	int i, flavor, perm;
1282 	int authnone_entry = -1;
1283 
1284 	/*
1285 	 * By default root is mapped to anonymous user.
1286 	 * This might get overriden later in nfsauth_cache_get().
1287 	 */
1288 	if (crgetuid(cr) == 0) {
1289 		if (uid != NULL)
1290 			*uid = exi->exi_export.ex_anon;
1291 		if (gid != NULL)
1292 			*gid = exi->exi_export.ex_anon;
1293 	} else {
1294 		if (uid != NULL)
1295 			*uid = crgetuid(cr);
1296 		if (gid != NULL)
1297 			*gid = crgetgid(cr);
1298 	}
1299 
1300 	if (ngids != NULL)
1301 		*ngids = 0;
1302 	if (gids != NULL)
1303 		*gids = NULL;
1304 
1305 	/*
1306 	 *  Get the nfs flavor number from xprt.
1307 	 */
1308 	flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1309 
1310 	/*
1311 	 * First check the access restrictions on the filesystem.  If
1312 	 * there are no lists associated with this flavor then there's no
1313 	 * need to make an expensive call to the nfsauth service or to
1314 	 * cache anything.
1315 	 */
1316 
1317 	sp = exi->exi_export.ex_secinfo;
1318 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1319 		if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1320 			if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1321 				authnone_entry = i;
1322 			continue;
1323 		}
1324 		break;
1325 	}
1326 
1327 	mapaccess = 0;
1328 
1329 	if (i >= exi->exi_export.ex_seccnt) {
1330 		/*
1331 		 * Flavor not found, but use AUTH_NONE if it exists
1332 		 */
1333 		if (authnone_entry == -1)
1334 			return (NFSAUTH_DENIED);
1335 		flavor = AUTH_NONE;
1336 		mapaccess = NFSAUTH_MAPNONE;
1337 		i = authnone_entry;
1338 	}
1339 
1340 	/*
1341 	 * If the flavor is in the ex_secinfo list, but not an explicitly
1342 	 * shared flavor by the user, it is a result of the nfsv4 server
1343 	 * namespace setup. We will grant an RO permission similar for
1344 	 * a pseudo node except that this node is a shared one.
1345 	 *
1346 	 * e.g. flavor in (flavor) indicates that it is not explictly
1347 	 *	shared by the user:
1348 	 *
1349 	 *		/	(sys, krb5)
1350 	 *		|
1351 	 *		export  #share -o sec=sys (krb5)
1352 	 *		|
1353 	 *		secure  #share -o sec=krb5
1354 	 *
1355 	 *	In this case, when a krb5 request coming in to access
1356 	 *	/export, RO permission is granted.
1357 	 */
1358 	if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1359 		return (mapaccess | NFSAUTH_RO);
1360 
1361 	/*
1362 	 * Optimize if there are no lists.
1363 	 * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1364 	 */
1365 	perm = sp[i].s_flags;
1366 	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1367 	    flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1368 		perm &= ~M_4SEC_EXPORTED;
1369 		if (perm == M_RO)
1370 			return (mapaccess | NFSAUTH_RO);
1371 		if (perm == M_RW)
1372 			return (mapaccess | NFSAUTH_RW);
1373 	}
1374 
1375 	access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1376 
1377 	/*
1378 	 * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1379 	 * the supplemental groups.
1380 	 */
1381 	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1382 		if (ngids != NULL && gids != NULL) {
1383 			kmem_free(*gids, *ngids * sizeof (gid_t));
1384 			*ngids = 0;
1385 			*gids = NULL;
1386 		}
1387 	}
1388 
1389 	/*
1390 	 * Client's security flavor doesn't match with "ro" or
1391 	 * "rw" list. Try again using AUTH_NONE if present.
1392 	 */
1393 	if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1394 		/*
1395 		 * Have we already encountered AUTH_NONE ?
1396 		 */
1397 		if (authnone_entry != -1) {
1398 			mapaccess = NFSAUTH_MAPNONE;
1399 			access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1400 			    NULL, NULL, NULL, NULL);
1401 		} else {
1402 			/*
1403 			 * Check for AUTH_NONE presence.
1404 			 */
1405 			for (; i < exi->exi_export.ex_seccnt; i++) {
1406 				if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1407 					mapaccess = NFSAUTH_MAPNONE;
1408 					access = nfsauth_cache_get(exi, req,
1409 					    AUTH_NONE, cr, NULL, NULL, NULL,
1410 					    NULL);
1411 					break;
1412 				}
1413 			}
1414 		}
1415 	}
1416 
1417 	if (access & NFSAUTH_DENIED)
1418 		access = NFSAUTH_DENIED;
1419 
1420 	return (access | mapaccess);
1421 }
1422 
1423 static void
1424 nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1425 {
1426 	void *cookie = NULL;
1427 	struct auth_cache *node;
1428 
1429 	while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1430 		nfsauth_free_node(node);
1431 	avl_destroy(&p->authc_tree);
1432 
1433 	kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1434 	rw_destroy(&p->authc_lock);
1435 
1436 	kmem_free(p, sizeof (*p));
1437 }
1438 
1439 static void
1440 nfsauth_free_node(struct auth_cache *p)
1441 {
1442 	crfree(p->auth_clnt_cred);
1443 	kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1444 	mutex_destroy(&p->auth_lock);
1445 	cv_destroy(&p->auth_cv);
1446 	kmem_cache_free(exi_cache_handle, p);
1447 }
1448 
1449 /*
1450  * Free the nfsauth cache for a given export
1451  */
1452 void
1453 nfsauth_cache_free(struct exportinfo *exi)
1454 {
1455 	int i;
1456 
1457 	/*
1458 	 * The only way we got here was with an exi_rele, which means that no
1459 	 * auth cache entry is being refreshed.
1460 	 */
1461 
1462 	for (i = 0; i < AUTH_TABLESIZE; i++) {
1463 		avl_tree_t *tree = exi->exi_cache[i];
1464 		void *cookie = NULL;
1465 		struct auth_cache_clnt *node;
1466 
1467 		while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1468 			nfsauth_free_clnt_node(node);
1469 	}
1470 }
1471 
1472 /*
1473  * Called by the kernel memory allocator when memory is low.
1474  * Free unused cache entries. If that's not enough, the VM system
1475  * will call again for some more.
1476  *
1477  * This needs to operate on all zones, so we take a reader lock
1478  * on the list of zones and walk the list.  This is OK here
1479  * becuase exi_cache_trim doesn't block or cause new objects
1480  * to be allocated (basically just frees lots of stuff).
1481  * Use care if nfssrv_globals_rwl is taken as reader in any
1482  * other cases because it will block nfs_server_zone_init
1483  * and nfs_server_zone_fini, which enter as writer.
1484  */
1485 /*ARGSUSED*/
1486 void
1487 exi_cache_reclaim(void *cdrarg)
1488 {
1489 	nfs_globals_t *ng;
1490 
1491 	rw_enter(&nfssrv_globals_rwl, RW_READER);
1492 
1493 	ng = list_head(&nfssrv_globals_list);
1494 	while (ng != NULL) {
1495 		exi_cache_reclaim_zone(ng);
1496 		ng = list_next(&nfssrv_globals_list, ng);
1497 	}
1498 
1499 	rw_exit(&nfssrv_globals_rwl);
1500 }
1501 
1502 static void
1503 exi_cache_reclaim_zone(nfs_globals_t *ng)
1504 {
1505 	int i;
1506 	struct exportinfo *exi;
1507 	nfs_export_t *ne = ng->nfs_export;
1508 
1509 	rw_enter(&ne->exported_lock, RW_READER);
1510 
1511 	for (i = 0; i < EXPTABLESIZE; i++) {
1512 		for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
1513 			exi_cache_trim(exi);
1514 	}
1515 
1516 	rw_exit(&ne->exported_lock);
1517 
1518 	atomic_inc_uint(&nfsauth_cache_reclaim);
1519 }
1520 
1521 static void
1522 exi_cache_trim(struct exportinfo *exi)
1523 {
1524 	struct auth_cache_clnt *c;
1525 	struct auth_cache_clnt *nextc;
1526 	struct auth_cache *p;
1527 	struct auth_cache *next;
1528 	int i;
1529 	time_t stale_time;
1530 	avl_tree_t *tree;
1531 
1532 	for (i = 0; i < AUTH_TABLESIZE; i++) {
1533 		tree = exi->exi_cache[i];
1534 		stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1535 		rw_enter(&exi->exi_cache_lock, RW_READER);
1536 
1537 		/*
1538 		 * Free entries that have not been
1539 		 * used for NFSAUTH_CACHE_TRIM seconds.
1540 		 */
1541 		for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1542 			/*
1543 			 * We are being called by the kmem subsystem to reclaim
1544 			 * memory so don't block if we can't get the lock.
1545 			 */
1546 			if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1547 				exi_cache_auth_reclaim_failed++;
1548 				rw_exit(&exi->exi_cache_lock);
1549 				return;
1550 			}
1551 
1552 			for (p = avl_first(&c->authc_tree); p != NULL;
1553 			    p = next) {
1554 				next = AVL_NEXT(&c->authc_tree, p);
1555 
1556 				ASSERT(p->auth_state != NFS_AUTH_INVALID);
1557 
1558 				mutex_enter(&p->auth_lock);
1559 
1560 				/*
1561 				 * We won't trim recently used and/or WAITING
1562 				 * entries.
1563 				 */
1564 				if (p->auth_time > stale_time ||
1565 				    p->auth_state == NFS_AUTH_WAITING) {
1566 					mutex_exit(&p->auth_lock);
1567 					continue;
1568 				}
1569 
1570 				DTRACE_PROBE1(nfsauth__debug__trim__state,
1571 				    auth_state_t, p->auth_state);
1572 
1573 				/*
1574 				 * STALE and REFRESHING entries needs to be
1575 				 * marked INVALID only because they are
1576 				 * referenced by some other structures or
1577 				 * threads.  They will be freed later.
1578 				 */
1579 				if (p->auth_state == NFS_AUTH_STALE ||
1580 				    p->auth_state == NFS_AUTH_REFRESHING) {
1581 					p->auth_state = NFS_AUTH_INVALID;
1582 					mutex_exit(&p->auth_lock);
1583 
1584 					avl_remove(&c->authc_tree, p);
1585 				} else {
1586 					mutex_exit(&p->auth_lock);
1587 
1588 					avl_remove(&c->authc_tree, p);
1589 					nfsauth_free_node(p);
1590 				}
1591 			}
1592 			rw_exit(&c->authc_lock);
1593 		}
1594 
1595 		if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1596 			rw_exit(&exi->exi_cache_lock);
1597 			exi_cache_clnt_reclaim_failed++;
1598 			continue;
1599 		}
1600 
1601 		for (c = avl_first(tree); c != NULL; c = nextc) {
1602 			nextc = AVL_NEXT(tree, c);
1603 
1604 			if (avl_is_empty(&c->authc_tree) == B_FALSE)
1605 				continue;
1606 
1607 			avl_remove(tree, c);
1608 
1609 			nfsauth_free_clnt_node(c);
1610 		}
1611 
1612 		rw_exit(&exi->exi_cache_lock);
1613 	}
1614 }
1615