xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs_auth.c (revision f44e1126d9eae71c48c5d1de51e24750c6ec20a4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2015 Joyent, Inc.  All rights reserved.
26  * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/vfs.h>
32 #include <sys/vnode.h>
33 #include <sys/cred.h>
34 #include <sys/cmn_err.h>
35 #include <sys/systm.h>
36 #include <sys/kmem.h>
37 #include <sys/pathname.h>
38 #include <sys/utsname.h>
39 #include <sys/debug.h>
40 #include <sys/door.h>
41 #include <sys/sdt.h>
42 #include <sys/thread.h>
43 #include <sys/avl.h>
44 
45 #include <rpc/types.h>
46 #include <rpc/auth.h>
47 #include <rpc/clnt.h>
48 
49 #include <nfs/nfs.h>
50 #include <nfs/export.h>
51 #include <nfs/nfs_clnt.h>
52 #include <nfs/auth.h>
53 
54 static struct kmem_cache *exi_cache_handle;
55 static void exi_cache_reclaim(void *);
56 static void exi_cache_reclaim_zone(nfs_globals_t *);
57 static void exi_cache_trim(struct exportinfo *exi);
58 
59 extern pri_t minclsyspri;
60 
61 /* NFS auth cache statistics */
62 volatile uint_t nfsauth_cache_hit;
63 volatile uint_t nfsauth_cache_miss;
64 volatile uint_t nfsauth_cache_refresh;
65 volatile uint_t nfsauth_cache_reclaim;
66 volatile uint_t exi_cache_auth_reclaim_failed;
67 volatile uint_t exi_cache_clnt_reclaim_failed;
68 
69 /*
70  * The lifetime of an auth cache entry:
71  * ------------------------------------
72  *
73  * An auth cache entry is created with both the auth_time
74  * and auth_freshness times set to the current time.
75  *
76  * Upon every client access which results in a hit, the
77  * auth_time will be updated.
78  *
79  * If a client access determines that the auth_freshness
80  * indicates that the entry is STALE, then it will be
81  * refreshed. Note that this will explicitly reset
82  * auth_time.
83  *
84  * When the REFRESH successfully occurs, then the
85  * auth_freshness is updated.
86  *
87  * There are two ways for an entry to leave the cache:
88  *
89  * 1) Purged by an action on the export (remove or changed)
90  * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
91  *
92  * For 2) we check the timeout value against auth_time.
93  */
94 
95 /*
96  * Number of seconds until we mark for refresh an auth cache entry.
97  */
98 #define	NFSAUTH_CACHE_REFRESH 600
99 
100 /*
101  * Number of idle seconds until we yield to backpressure
102  * to trim a cache entry.
103  */
104 #define	NFSAUTH_CACHE_TRIM 3600
105 
106 /*
107  * While we could encapuslate the exi_list inside the
108  * exi structure, we can't do that for the auth_list.
109  * So, to keep things looking clean, we keep them both
110  * in these external lists.
111  */
112 typedef struct refreshq_exi_node {
113 	struct exportinfo	*ren_exi;
114 	list_t			ren_authlist;
115 	list_node_t		ren_node;
116 } refreshq_exi_node_t;
117 
118 typedef struct refreshq_auth_node {
119 	struct auth_cache	*ran_auth;
120 	char			*ran_netid;
121 	list_node_t		ran_node;
122 } refreshq_auth_node_t;
123 
124 /*
125  * If there is ever a problem with loading the module, then nfsauth_fini()
126  * needs to be called to remove state.  In that event, since the refreshq
127  * thread has been started, they need to work together to get rid of state.
128  */
129 typedef enum nfsauth_refreshq_thread_state {
130 	REFRESHQ_THREAD_RUNNING,
131 	REFRESHQ_THREAD_FINI_REQ,
132 	REFRESHQ_THREAD_HALTED,
133 	REFRESHQ_THREAD_NEED_CREATE
134 } nfsauth_refreshq_thread_state_t;
135 
136 typedef struct nfsauth_globals {
137 	kmutex_t	mountd_lock;
138 	door_handle_t   mountd_dh;
139 
140 	/*
141 	 * Used to manipulate things on the refreshq_queue.  Note that the
142 	 * refresh thread will effectively pop a node off of the queue,
143 	 * at which point it will no longer need to hold the mutex.
144 	 */
145 	kmutex_t	refreshq_lock;
146 	list_t		refreshq_queue;
147 	kcondvar_t	refreshq_cv;
148 
149 	/*
150 	 * A list_t would be overkill.  These are auth_cache entries which are
151 	 * no longer linked to an exi.  It should be the case that all of their
152 	 * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
153 	 * list is iff their state indicated that they had been placed on the
154 	 * refreshq_queue.
155 	 *
156 	 * Note that while there is no link from the exi or back to the exi,
157 	 * the exi can not go away until these entries are harvested.
158 	 */
159 	struct auth_cache		*refreshq_dead_entries;
160 	nfsauth_refreshq_thread_state_t	refreshq_thread_state;
161 
162 } nfsauth_globals_t;
163 
164 static void nfsauth_free_node(struct auth_cache *);
165 static void nfsauth_refresh_thread(nfsauth_globals_t *);
166 
167 static int nfsauth_cache_compar(const void *, const void *);
168 
169 static nfsauth_globals_t *
nfsauth_get_zg(void)170 nfsauth_get_zg(void)
171 {
172 	nfs_globals_t *ng = nfs_srv_getzg();
173 	nfsauth_globals_t *nag = ng->nfs_auth;
174 	ASSERT(nag != NULL);
175 	return (nag);
176 }
177 
178 void
mountd_args(uint_t did)179 mountd_args(uint_t did)
180 {
181 	nfsauth_globals_t *nag;
182 
183 	nag = nfsauth_get_zg();
184 	mutex_enter(&nag->mountd_lock);
185 	if (nag->mountd_dh != NULL)
186 		door_ki_rele(nag->mountd_dh);
187 	nag->mountd_dh = door_ki_lookup(did);
188 	mutex_exit(&nag->mountd_lock);
189 }
190 
191 void
nfsauth_init(void)192 nfsauth_init(void)
193 {
194 	exi_cache_handle = kmem_cache_create("exi_cache_handle",
195 	    sizeof (struct auth_cache), 0, NULL, NULL,
196 	    exi_cache_reclaim, NULL, NULL, 0);
197 }
198 
199 void
nfsauth_fini(void)200 nfsauth_fini(void)
201 {
202 	kmem_cache_destroy(exi_cache_handle);
203 }
204 
205 void
nfsauth_zone_init(nfs_globals_t * ng)206 nfsauth_zone_init(nfs_globals_t *ng)
207 {
208 	nfsauth_globals_t *nag;
209 
210 	nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
211 
212 	/*
213 	 * mountd can be restarted by smf(7).  We need to make sure
214 	 * the updated door handle will safely make it to mountd_dh.
215 	 */
216 	mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
217 	mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
218 	list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
219 	    offsetof(refreshq_exi_node_t, ren_node));
220 	cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
221 	nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
222 
223 	ng->nfs_auth = nag;
224 }
225 
226 void
nfsauth_zone_shutdown(nfs_globals_t * ng)227 nfsauth_zone_shutdown(nfs_globals_t *ng)
228 {
229 	refreshq_exi_node_t	*ren;
230 	nfsauth_globals_t	*nag = ng->nfs_auth;
231 
232 	/* Prevent the nfsauth_refresh_thread from getting new work */
233 	mutex_enter(&nag->refreshq_lock);
234 	if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
235 		nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
236 		cv_broadcast(&nag->refreshq_cv);
237 
238 		/* Wait for nfsauth_refresh_thread() to exit */
239 		while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
240 			cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
241 	}
242 	mutex_exit(&nag->refreshq_lock);
243 
244 	/*
245 	 * Walk the exi_list and in turn, walk the auth_lists and free all
246 	 * lists.  In addition, free INVALID auth_cache entries.
247 	 */
248 	while ((ren = list_remove_head(&nag->refreshq_queue))) {
249 		refreshq_auth_node_t *ran;
250 
251 		while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
252 			struct auth_cache *p = ran->ran_auth;
253 			if (p->auth_state == NFS_AUTH_INVALID)
254 				nfsauth_free_node(p);
255 			strfree(ran->ran_netid);
256 			kmem_free(ran, sizeof (*ran));
257 		}
258 
259 		list_destroy(&ren->ren_authlist);
260 		exi_rele(ren->ren_exi);
261 		kmem_free(ren, sizeof (*ren));
262 	}
263 }
264 
265 void
nfsauth_zone_fini(nfs_globals_t * ng)266 nfsauth_zone_fini(nfs_globals_t *ng)
267 {
268 	nfsauth_globals_t *nag = ng->nfs_auth;
269 
270 	ng->nfs_auth = NULL;
271 
272 	list_destroy(&nag->refreshq_queue);
273 	cv_destroy(&nag->refreshq_cv);
274 	mutex_destroy(&nag->refreshq_lock);
275 	mutex_destroy(&nag->mountd_lock);
276 	/* Extra cleanup. */
277 	if (nag->mountd_dh != NULL)
278 		door_ki_rele(nag->mountd_dh);
279 	kmem_free(nag, sizeof (*nag));
280 }
281 
282 /*
283  * Convert the address in a netbuf to
284  * a hash index for the auth_cache table.
285  */
286 static int
hash(struct netbuf * a)287 hash(struct netbuf *a)
288 {
289 	int i, h = 0;
290 
291 	for (i = 0; i < a->len; i++)
292 		h ^= a->buf[i];
293 
294 	return (h & (AUTH_TABLESIZE - 1));
295 }
296 
297 /*
298  * Mask out the components of an
299  * address that do not identify
300  * a host. For socket addresses the
301  * masking gets rid of the port number.
302  */
303 static void
addrmask(struct netbuf * addr,struct netbuf * mask)304 addrmask(struct netbuf *addr, struct netbuf *mask)
305 {
306 	int i;
307 
308 	for (i = 0; i < addr->len; i++)
309 		addr->buf[i] &= mask->buf[i];
310 }
311 
312 /*
313  * nfsauth4_access is used for NFS V4 auth checking. Besides doing
314  * the common nfsauth_access(), it will check if the client can
315  * have a limited access to this vnode even if the security flavor
316  * used does not meet the policy.
317  */
318 int
nfsauth4_access(struct exportinfo * exi,vnode_t * vp,struct svc_req * req,cred_t * cr,uid_t * uid,gid_t * gid,uint_t * ngids,gid_t ** gids)319 nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
320     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
321 {
322 	int access;
323 
324 	access = nfsauth_access(exi, req, cr, uid, gid, ngids, gids);
325 
326 	/*
327 	 * There are cases that the server needs to allow the client
328 	 * to have a limited view.
329 	 *
330 	 * e.g.
331 	 * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
332 	 * /export/home is shared as "sec=sys,rw"
333 	 *
334 	 * When the client mounts /export with sec=sys, the client
335 	 * would get a limited view with RO access on /export to see
336 	 * "home" only because the client is allowed to access
337 	 * /export/home with auth_sys.
338 	 */
339 	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
340 		/*
341 		 * Allow ro permission with LIMITED view if there is a
342 		 * sub-dir exported under vp.
343 		 */
344 		if (has_visible(exi, vp))
345 			return (NFSAUTH_LIMITED);
346 	}
347 
348 	return (access);
349 }
350 
351 static void
sys_log(const char * msg)352 sys_log(const char *msg)
353 {
354 	static time_t	tstamp = 0;
355 	time_t		now;
356 
357 	/*
358 	 * msg is shown (at most) once per minute
359 	 */
360 	now = gethrestime_sec();
361 	if ((tstamp + 60) < now) {
362 		tstamp = now;
363 		cmn_err(CE_WARN, msg);
364 	}
365 }
366 
367 /*
368  * Callup to the mountd to get access information in the kernel.
369  */
370 static bool_t
nfsauth_retrieve(nfsauth_globals_t * nag,struct exportinfo * exi,char * req_netid,int flavor,struct netbuf * addr,int * access,cred_t * clnt_cred,uid_t * srv_uid,gid_t * srv_gid,uint_t * srv_gids_cnt,gid_t ** srv_gids)371 nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
372     char *req_netid, int flavor, struct netbuf *addr, int *access,
373     cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
374     gid_t **srv_gids)
375 {
376 	varg_t			  varg = {0};
377 	nfsauth_res_t		  res = {0};
378 	XDR			  xdrs;
379 	size_t			  absz;
380 	caddr_t			  abuf;
381 	int			  last = 0;
382 	door_arg_t		  da;
383 	door_info_t		  di;
384 	door_handle_t		  dh;
385 	uint_t			  ntries = 0;
386 
387 	/*
388 	 * No entry in the cache for this client/flavor
389 	 * so we need to call the nfsauth service in the
390 	 * mount daemon.
391 	 */
392 
393 	varg.vers = V_PROTO;
394 	varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
395 	varg.arg_u.arg.areq.req_client.n_len = addr->len;
396 	varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
397 	varg.arg_u.arg.areq.req_netid = req_netid;
398 	varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
399 	varg.arg_u.arg.areq.req_flavor = flavor;
400 	varg.arg_u.arg.areq.req_clnt_uid = crgetuid(clnt_cred);
401 	varg.arg_u.arg.areq.req_clnt_gid = crgetgid(clnt_cred);
402 	varg.arg_u.arg.areq.req_clnt_gids.len = crgetngroups(clnt_cred);
403 	varg.arg_u.arg.areq.req_clnt_gids.val = (gid_t *)crgetgroups(clnt_cred);
404 
405 	DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
406 
407 	/*
408 	 * Setup the XDR stream for encoding the arguments. Notice that
409 	 * in addition to the args having variable fields (req_netid and
410 	 * req_path), the argument data structure is itself versioned,
411 	 * so we need to make sure we can size the arguments buffer
412 	 * appropriately to encode all the args. If we can't get sizing
413 	 * info _or_ properly encode the arguments, there's really no
414 	 * point in continuting, so we fail the request.
415 	 */
416 	if ((absz = xdr_sizeof(xdr_varg, &varg)) == 0) {
417 		*access = NFSAUTH_DENIED;
418 		return (FALSE);
419 	}
420 
421 	abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
422 	xdrmem_create(&xdrs, abuf, absz, XDR_ENCODE);
423 	if (!xdr_varg(&xdrs, &varg)) {
424 		XDR_DESTROY(&xdrs);
425 		goto fail;
426 	}
427 	XDR_DESTROY(&xdrs);
428 
429 	/*
430 	 * Prepare the door arguments
431 	 *
432 	 * We don't know the size of the message the daemon
433 	 * will pass back to us.  By setting rbuf to NULL,
434 	 * we force the door code to allocate a buf of the
435 	 * appropriate size.  We must set rsize > 0, however,
436 	 * else the door code acts as if no response was
437 	 * expected and doesn't pass the data to us.
438 	 */
439 	da.data_ptr = (char *)abuf;
440 	da.data_size = absz;
441 	da.desc_ptr = NULL;
442 	da.desc_num = 0;
443 	da.rbuf = NULL;
444 	da.rsize = 1;
445 
446 retry:
447 	mutex_enter(&nag->mountd_lock);
448 	dh = nag->mountd_dh;
449 	if (dh != NULL)
450 		door_ki_hold(dh);
451 	mutex_exit(&nag->mountd_lock);
452 
453 	if (dh == NULL) {
454 		/*
455 		 * The rendezvous point has not been established yet!
456 		 * This could mean that either mountd(8) has not yet
457 		 * been started or that _this_ routine nuked the door
458 		 * handle after receiving an EINTR for a REVOKED door.
459 		 *
460 		 * Returning NFSAUTH_DROP will cause the NFS client
461 		 * to retransmit the request, so let's try to be more
462 		 * rescillient and attempt for ntries before we bail.
463 		 */
464 		if (++ntries % NFSAUTH_DR_TRYCNT) {
465 			delay(hz);
466 			goto retry;
467 		}
468 
469 		kmem_free(abuf, absz);
470 
471 		sys_log("nfsauth: mountd has not established door");
472 		*access = NFSAUTH_DROP;
473 		return (FALSE);
474 	}
475 
476 	ntries = 0;
477 
478 	/*
479 	 * Now that we've got what we need, place the call.
480 	 */
481 	switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
482 	case 0:				/* Success */
483 		door_ki_rele(dh);
484 
485 		if (da.data_ptr == NULL && da.data_size == 0) {
486 			/*
487 			 * The door_return that contained the data
488 			 * failed! We're here because of the 2nd
489 			 * door_return (w/o data) such that we can
490 			 * get control of the thread (and exit
491 			 * gracefully).
492 			 */
493 			DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
494 			    door_arg_t *, &da);
495 			goto fail;
496 		}
497 
498 		break;
499 
500 	case EAGAIN:
501 		/*
502 		 * Server out of resources; back off for a bit
503 		 */
504 		door_ki_rele(dh);
505 		delay(hz);
506 		goto retry;
507 		/* NOTREACHED */
508 
509 	case EINTR:
510 		if (!door_ki_info(dh, &di)) {
511 			door_ki_rele(dh);
512 
513 			if (di.di_attributes & DOOR_REVOKED) {
514 				/*
515 				 * The server barfed and revoked
516 				 * the (existing) door on us; we
517 				 * want to wait to give smf(7) a
518 				 * chance to restart mountd(8)
519 				 * and establish a new door handle.
520 				 */
521 				mutex_enter(&nag->mountd_lock);
522 				if (dh == nag->mountd_dh) {
523 					door_ki_rele(nag->mountd_dh);
524 					nag->mountd_dh = NULL;
525 				}
526 				mutex_exit(&nag->mountd_lock);
527 				delay(hz);
528 				goto retry;
529 			}
530 			/*
531 			 * If the door was _not_ revoked on us,
532 			 * then more than likely we took an INTR,
533 			 * so we need to fail the operation.
534 			 */
535 			goto fail;
536 		}
537 		/*
538 		 * The only failure that can occur from getting
539 		 * the door info is EINVAL, so we let the code
540 		 * below handle it.
541 		 */
542 		/* FALLTHROUGH */
543 
544 	case EBADF:
545 	case EINVAL:
546 	default:
547 		/*
548 		 * If we have a stale door handle, give smf a last
549 		 * chance to start it by sleeping for a little bit.
550 		 * If we're still hosed, we'll fail the call.
551 		 *
552 		 * Since we're going to reacquire the door handle
553 		 * upon the retry, we opt to sleep for a bit and
554 		 * _not_ to clear mountd_dh. If mountd restarted
555 		 * and was able to set mountd_dh, we should see
556 		 * the new instance; if not, we won't get caught
557 		 * up in the retry/DELAY loop.
558 		 */
559 		door_ki_rele(dh);
560 		if (!last) {
561 			delay(hz);
562 			last++;
563 			goto retry;
564 		}
565 		sys_log("nfsauth: stale mountd door handle");
566 		goto fail;
567 	}
568 
569 	ASSERT(da.rbuf != NULL);
570 
571 	/*
572 	 * No door errors encountered; setup the XDR stream for decoding
573 	 * the results. If we fail to decode the results, we've got no
574 	 * other recourse than to fail the request.
575 	 */
576 	xdrmem_create(&xdrs, da.rbuf, da.rsize, XDR_DECODE);
577 	if (!xdr_nfsauth_res(&xdrs, &res)) {
578 		xdr_free(xdr_nfsauth_res, (char *)&res);
579 		XDR_DESTROY(&xdrs);
580 		kmem_free(da.rbuf, da.rsize);
581 		goto fail;
582 	}
583 	XDR_DESTROY(&xdrs);
584 	kmem_free(da.rbuf, da.rsize);
585 
586 	DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
587 	switch (res.stat) {
588 		case NFSAUTH_DR_OKAY:
589 			*access = res.ares.auth_perm;
590 			*srv_uid = res.ares.auth_srv_uid;
591 			*srv_gid = res.ares.auth_srv_gid;
592 
593 			if ((*srv_gids_cnt = res.ares.auth_srv_gids.len) != 0) {
594 				*srv_gids = kmem_alloc(*srv_gids_cnt *
595 				    sizeof (gid_t), KM_SLEEP);
596 				bcopy(res.ares.auth_srv_gids.val, *srv_gids,
597 				    *srv_gids_cnt * sizeof (gid_t));
598 			} else {
599 				*srv_gids = NULL;
600 			}
601 
602 			break;
603 
604 		case NFSAUTH_DR_EFAIL:
605 		case NFSAUTH_DR_DECERR:
606 		case NFSAUTH_DR_BADCMD:
607 		default:
608 			xdr_free(xdr_nfsauth_res, (char *)&res);
609 fail:
610 			*access = NFSAUTH_DENIED;
611 			kmem_free(abuf, absz);
612 			return (FALSE);
613 			/* NOTREACHED */
614 	}
615 
616 	xdr_free(xdr_nfsauth_res, (char *)&res);
617 	kmem_free(abuf, absz);
618 
619 	return (TRUE);
620 }
621 
622 static void
nfsauth_refresh_thread(nfsauth_globals_t * nag)623 nfsauth_refresh_thread(nfsauth_globals_t *nag)
624 {
625 	refreshq_exi_node_t	*ren;
626 	refreshq_auth_node_t	*ran;
627 
628 	struct exportinfo	*exi;
629 
630 	int			access;
631 	bool_t			retrieval;
632 
633 	callb_cpr_t		cprinfo;
634 
635 	CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
636 	    "nfsauth_refresh");
637 
638 	for (;;) {
639 		mutex_enter(&nag->refreshq_lock);
640 		if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
641 			/* Keep the hold on the lock! */
642 			break;
643 		}
644 
645 		ren = list_remove_head(&nag->refreshq_queue);
646 		if (ren == NULL) {
647 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
648 			cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
649 			CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
650 			mutex_exit(&nag->refreshq_lock);
651 			continue;
652 		}
653 		mutex_exit(&nag->refreshq_lock);
654 
655 		exi = ren->ren_exi;
656 		ASSERT(exi != NULL);
657 
658 		/*
659 		 * Since the ren was removed from the refreshq_queue above,
660 		 * this is the only thread aware about the ren existence, so we
661 		 * have the exclusive ownership of it and we do not need to
662 		 * protect it by any lock.
663 		 */
664 		while ((ran = list_remove_head(&ren->ren_authlist))) {
665 			uid_t uid;
666 			gid_t gid;
667 			uint_t ngids;
668 			gid_t *gids;
669 			struct auth_cache *p = ran->ran_auth;
670 			char *netid = ran->ran_netid;
671 
672 			ASSERT(p != NULL);
673 			ASSERT(netid != NULL);
674 
675 			kmem_free(ran, sizeof (refreshq_auth_node_t));
676 
677 			mutex_enter(&p->auth_lock);
678 
679 			/*
680 			 * Once the entry goes INVALID, it can not change
681 			 * state.
682 			 *
683 			 * No need to refresh entries also in a case we are
684 			 * just shutting down.
685 			 *
686 			 * In general, there is no need to hold the
687 			 * refreshq_lock to test the refreshq_thread_state.  We
688 			 * do hold it at other places because there is some
689 			 * related thread synchronization (or some other tasks)
690 			 * close to the refreshq_thread_state check.
691 			 *
692 			 * The check for the refreshq_thread_state value here
693 			 * is purely advisory to allow the faster
694 			 * nfsauth_refresh_thread() shutdown.  In a case we
695 			 * will miss such advisory, nothing catastrophic
696 			 * happens: we will just spin longer here before the
697 			 * shutdown.
698 			 */
699 			if (p->auth_state == NFS_AUTH_INVALID ||
700 			    nag->refreshq_thread_state !=
701 			    REFRESHQ_THREAD_RUNNING) {
702 				mutex_exit(&p->auth_lock);
703 
704 				if (p->auth_state == NFS_AUTH_INVALID)
705 					nfsauth_free_node(p);
706 
707 				strfree(netid);
708 
709 				continue;
710 			}
711 
712 			/*
713 			 * Make sure the state is valid.  Note that once we
714 			 * change the state to NFS_AUTH_REFRESHING, no other
715 			 * thread will be able to work on this entry.
716 			 */
717 			ASSERT(p->auth_state == NFS_AUTH_STALE);
718 
719 			p->auth_state = NFS_AUTH_REFRESHING;
720 			mutex_exit(&p->auth_lock);
721 
722 			DTRACE_PROBE2(nfsauth__debug__cache__refresh,
723 			    struct exportinfo *, exi,
724 			    struct auth_cache *, p);
725 
726 			/*
727 			 * The first caching of the access rights
728 			 * is done with the netid pulled out of the
729 			 * request from the client. All subsequent
730 			 * users of the cache may or may not have
731 			 * the same netid. It doesn't matter. So
732 			 * when we refresh, we simply use the netid
733 			 * of the request which triggered the
734 			 * refresh attempt.
735 			 */
736 			retrieval = nfsauth_retrieve(nag, exi, netid,
737 			    p->auth_flavor, &p->auth_clnt->authc_addr, &access,
738 			    p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
739 
740 			/*
741 			 * This can only be set in one other place
742 			 * and the state has to be NFS_AUTH_FRESH.
743 			 */
744 			strfree(netid);
745 
746 			mutex_enter(&p->auth_lock);
747 			if (p->auth_state == NFS_AUTH_INVALID) {
748 				mutex_exit(&p->auth_lock);
749 				nfsauth_free_node(p);
750 				if (retrieval == TRUE)
751 					kmem_free(gids, ngids * sizeof (gid_t));
752 			} else {
753 				/*
754 				 * If we got an error, do not reset the
755 				 * time. This will cause the next access
756 				 * check for the client to reschedule this
757 				 * node.
758 				 */
759 				if (retrieval == TRUE) {
760 					p->auth_access = access;
761 
762 					p->auth_srv_uid = uid;
763 					p->auth_srv_gid = gid;
764 					kmem_free(p->auth_srv_gids,
765 					    p->auth_srv_ngids * sizeof (gid_t));
766 					p->auth_srv_ngids = ngids;
767 					p->auth_srv_gids = gids;
768 
769 					p->auth_freshness = gethrestime_sec();
770 				}
771 				p->auth_state = NFS_AUTH_FRESH;
772 
773 				cv_broadcast(&p->auth_cv);
774 				mutex_exit(&p->auth_lock);
775 			}
776 		}
777 
778 		list_destroy(&ren->ren_authlist);
779 		exi_rele(ren->ren_exi);
780 		kmem_free(ren, sizeof (refreshq_exi_node_t));
781 	}
782 
783 	nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
784 	cv_broadcast(&nag->refreshq_cv);
785 	CALLB_CPR_EXIT(&cprinfo);
786 	DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
787 	zthread_exit();
788 }
789 
790 int
nfsauth_cache_clnt_compar(const void * v1,const void * v2)791 nfsauth_cache_clnt_compar(const void *v1, const void *v2)
792 {
793 	int c;
794 
795 	const struct auth_cache_clnt *a1 = (const struct auth_cache_clnt *)v1;
796 	const struct auth_cache_clnt *a2 = (const struct auth_cache_clnt *)v2;
797 
798 	if (a1->authc_addr.len < a2->authc_addr.len)
799 		return (-1);
800 	if (a1->authc_addr.len > a2->authc_addr.len)
801 		return (1);
802 
803 	c = memcmp(a1->authc_addr.buf, a2->authc_addr.buf, a1->authc_addr.len);
804 	if (c < 0)
805 		return (-1);
806 	if (c > 0)
807 		return (1);
808 
809 	return (0);
810 }
811 
812 static int
nfsauth_cache_compar(const void * v1,const void * v2)813 nfsauth_cache_compar(const void *v1, const void *v2)
814 {
815 	int c;
816 
817 	const struct auth_cache *a1 = (const struct auth_cache *)v1;
818 	const struct auth_cache *a2 = (const struct auth_cache *)v2;
819 
820 	if (a1->auth_flavor < a2->auth_flavor)
821 		return (-1);
822 	if (a1->auth_flavor > a2->auth_flavor)
823 		return (1);
824 
825 	if (crgetuid(a1->auth_clnt_cred) < crgetuid(a2->auth_clnt_cred))
826 		return (-1);
827 	if (crgetuid(a1->auth_clnt_cred) > crgetuid(a2->auth_clnt_cred))
828 		return (1);
829 
830 	if (crgetgid(a1->auth_clnt_cred) < crgetgid(a2->auth_clnt_cred))
831 		return (-1);
832 	if (crgetgid(a1->auth_clnt_cred) > crgetgid(a2->auth_clnt_cred))
833 		return (1);
834 
835 	if (crgetngroups(a1->auth_clnt_cred) < crgetngroups(a2->auth_clnt_cred))
836 		return (-1);
837 	if (crgetngroups(a1->auth_clnt_cred) > crgetngroups(a2->auth_clnt_cred))
838 		return (1);
839 
840 	c = memcmp(crgetgroups(a1->auth_clnt_cred),
841 	    crgetgroups(a2->auth_clnt_cred), crgetngroups(a1->auth_clnt_cred));
842 	if (c < 0)
843 		return (-1);
844 	if (c > 0)
845 		return (1);
846 
847 	return (0);
848 }
849 
850 /*
851  * Get the access information from the cache or callup to the mountd
852  * to get and cache the access information in the kernel.
853  */
854 static int
nfsauth_cache_get(struct exportinfo * exi,struct svc_req * req,int flavor,cred_t * cr,uid_t * uid,gid_t * gid,uint_t * ngids,gid_t ** gids)855 nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
856     cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
857 {
858 	nfsauth_globals_t	*nag;
859 	struct netbuf		*taddrmask;
860 	struct netbuf		addr;	/* temporary copy of client's address */
861 	const struct netbuf	*claddr;
862 	avl_tree_t		*tree;
863 	struct auth_cache	ac;	/* used as a template for avl_find() */
864 	struct auth_cache_clnt	*c;
865 	struct auth_cache_clnt	acc;	/* used as a template for avl_find() */
866 	struct auth_cache	*p = NULL;
867 	int			access;
868 
869 	uid_t			tmpuid;
870 	gid_t			tmpgid;
871 	uint_t			tmpngids;
872 	gid_t			*tmpgids;
873 
874 	avl_index_t		where;	/* used for avl_find()/avl_insert() */
875 
876 	ASSERT(cr != NULL);
877 
878 	ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
879 	nag = nfsauth_get_zg();
880 
881 	/*
882 	 * Now check whether this client already
883 	 * has an entry for this flavor in the cache
884 	 * for this export.
885 	 * Get the caller's address, mask off the
886 	 * parts of the address that do not identify
887 	 * the host (port number, etc), and then hash
888 	 * it to find the chain of cache entries.
889 	 */
890 
891 	claddr = svc_getrpccaller(req->rq_xprt);
892 	addr = *claddr;
893 	if (claddr->len != 0) {
894 		addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
895 		bcopy(claddr->buf, addr.buf, claddr->len);
896 	} else {
897 		addr.buf = NULL;
898 	}
899 
900 	SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
901 	ASSERT(taddrmask != NULL);
902 	addrmask(&addr, taddrmask);
903 
904 	acc.authc_addr = addr;
905 
906 	tree = exi->exi_cache[hash(&addr)];
907 
908 	rw_enter(&exi->exi_cache_lock, RW_READER);
909 	c = (struct auth_cache_clnt *)avl_find(tree, &acc, NULL);
910 
911 	if (c == NULL) {
912 		struct auth_cache_clnt *nc;
913 
914 		rw_exit(&exi->exi_cache_lock);
915 
916 		nc = kmem_alloc(sizeof (*nc), KM_NOSLEEP_LAZY);
917 		if (nc == NULL)
918 			goto retrieve;
919 
920 		/*
921 		 * Initialize the new auth_cache_clnt
922 		 */
923 		nc->authc_addr = addr;
924 		nc->authc_addr.buf = kmem_alloc(addr.maxlen, KM_NOSLEEP_LAZY);
925 		if (addr.maxlen != 0 && nc->authc_addr.buf == NULL) {
926 			kmem_free(nc, sizeof (*nc));
927 			goto retrieve;
928 		}
929 		bcopy(addr.buf, nc->authc_addr.buf, addr.len);
930 		rw_init(&nc->authc_lock, NULL, RW_DEFAULT, NULL);
931 		avl_create(&nc->authc_tree, nfsauth_cache_compar,
932 		    sizeof (struct auth_cache),
933 		    offsetof(struct auth_cache, auth_link));
934 
935 		rw_enter(&exi->exi_cache_lock, RW_WRITER);
936 		c = (struct auth_cache_clnt *)avl_find(tree, &acc, &where);
937 		if (c == NULL) {
938 			avl_insert(tree, nc, where);
939 			rw_downgrade(&exi->exi_cache_lock);
940 			c = nc;
941 		} else {
942 			rw_downgrade(&exi->exi_cache_lock);
943 
944 			avl_destroy(&nc->authc_tree);
945 			rw_destroy(&nc->authc_lock);
946 			kmem_free(nc->authc_addr.buf, nc->authc_addr.maxlen);
947 			kmem_free(nc, sizeof (*nc));
948 		}
949 	}
950 
951 	ASSERT(c != NULL);
952 
953 	rw_enter(&c->authc_lock, RW_READER);
954 
955 	ac.auth_flavor = flavor;
956 	ac.auth_clnt_cred = cr;
957 
958 	p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, NULL);
959 
960 	if (p == NULL) {
961 		struct auth_cache *np;
962 
963 		rw_exit(&c->authc_lock);
964 
965 		np = kmem_cache_alloc(exi_cache_handle, KM_NOSLEEP_LAZY);
966 		if (np == NULL) {
967 			rw_exit(&exi->exi_cache_lock);
968 			goto retrieve;
969 		}
970 
971 		/*
972 		 * Initialize the new auth_cache
973 		 */
974 		np->auth_clnt = c;
975 		np->auth_flavor = flavor;
976 		np->auth_clnt_cred = crdup(cr);
977 		np->auth_srv_ngids = 0;
978 		np->auth_srv_gids = NULL;
979 		np->auth_time = np->auth_freshness = gethrestime_sec();
980 		np->auth_state = NFS_AUTH_NEW;
981 		mutex_init(&np->auth_lock, NULL, MUTEX_DEFAULT, NULL);
982 		cv_init(&np->auth_cv, NULL, CV_DEFAULT, NULL);
983 
984 		rw_enter(&c->authc_lock, RW_WRITER);
985 		rw_exit(&exi->exi_cache_lock);
986 
987 		p = (struct auth_cache *)avl_find(&c->authc_tree, &ac, &where);
988 		if (p == NULL) {
989 			avl_insert(&c->authc_tree, np, where);
990 			rw_downgrade(&c->authc_lock);
991 			p = np;
992 		} else {
993 			rw_downgrade(&c->authc_lock);
994 
995 			cv_destroy(&np->auth_cv);
996 			mutex_destroy(&np->auth_lock);
997 			crfree(np->auth_clnt_cred);
998 			kmem_cache_free(exi_cache_handle, np);
999 		}
1000 	} else {
1001 		rw_exit(&exi->exi_cache_lock);
1002 	}
1003 
1004 	mutex_enter(&p->auth_lock);
1005 	rw_exit(&c->authc_lock);
1006 
1007 	/*
1008 	 * If the entry is in the WAITING state then some other thread is just
1009 	 * retrieving the required info.  The entry was either NEW, or the list
1010 	 * of client's supplemental groups is going to be changed (either by
1011 	 * this thread, or by some other thread).  We need to wait until the
1012 	 * nfsauth_retrieve() is done.
1013 	 */
1014 	while (p->auth_state == NFS_AUTH_WAITING)
1015 		cv_wait(&p->auth_cv, &p->auth_lock);
1016 
1017 	/*
1018 	 * Here the entry cannot be in WAITING or INVALID state.
1019 	 */
1020 	ASSERT(p->auth_state != NFS_AUTH_WAITING);
1021 	ASSERT(p->auth_state != NFS_AUTH_INVALID);
1022 
1023 	/*
1024 	 * If the cache entry is not valid yet, we need to retrieve the
1025 	 * info ourselves.
1026 	 */
1027 	if (p->auth_state == NFS_AUTH_NEW) {
1028 		bool_t res;
1029 		/*
1030 		 * NFS_AUTH_NEW is the default output auth_state value in a
1031 		 * case we failed somewhere below.
1032 		 */
1033 		auth_state_t state = NFS_AUTH_NEW;
1034 
1035 		p->auth_state = NFS_AUTH_WAITING;
1036 		mutex_exit(&p->auth_lock);
1037 		kmem_free(addr.buf, addr.maxlen);
1038 		addr = p->auth_clnt->authc_addr;
1039 
1040 		nfsauth_cache_miss++;
1041 
1042 		res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
1043 		    flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
1044 		    &tmpgids);
1045 
1046 		p->auth_access = access;
1047 		p->auth_time = p->auth_freshness = gethrestime_sec();
1048 
1049 		if (res == TRUE) {
1050 			if (uid != NULL)
1051 				*uid = tmpuid;
1052 			if (gid != NULL)
1053 				*gid = tmpgid;
1054 			if (ngids != NULL && gids != NULL) {
1055 				*ngids = tmpngids;
1056 				*gids = tmpgids;
1057 
1058 				/*
1059 				 * We need a copy of gids for the
1060 				 * auth_cache entry
1061 				 */
1062 				tmpgids = kmem_alloc(tmpngids * sizeof (gid_t),
1063 				    KM_NOSLEEP_LAZY);
1064 				if (tmpgids != NULL)
1065 					bcopy(*gids, tmpgids,
1066 					    tmpngids * sizeof (gid_t));
1067 			}
1068 
1069 			if (tmpgids != NULL || tmpngids == 0) {
1070 				p->auth_srv_uid = tmpuid;
1071 				p->auth_srv_gid = tmpgid;
1072 				p->auth_srv_ngids = tmpngids;
1073 				p->auth_srv_gids = tmpgids;
1074 
1075 				state = NFS_AUTH_FRESH;
1076 			}
1077 		}
1078 
1079 		/*
1080 		 * Set the auth_state and notify waiters.
1081 		 */
1082 		mutex_enter(&p->auth_lock);
1083 		p->auth_state = state;
1084 		cv_broadcast(&p->auth_cv);
1085 		mutex_exit(&p->auth_lock);
1086 	} else {
1087 		uint_t nach;
1088 		time_t refresh;
1089 
1090 		refresh = gethrestime_sec() - p->auth_freshness;
1091 
1092 		p->auth_time = gethrestime_sec();
1093 
1094 		if (uid != NULL)
1095 			*uid = p->auth_srv_uid;
1096 		if (gid != NULL)
1097 			*gid = p->auth_srv_gid;
1098 		if (ngids != NULL && gids != NULL) {
1099 			if ((*ngids = p->auth_srv_ngids) != 0) {
1100 				size_t sz = *ngids * sizeof (gid_t);
1101 				*gids = kmem_alloc(sz, KM_SLEEP);
1102 				bcopy(p->auth_srv_gids, *gids, sz);
1103 			} else {
1104 				*gids = NULL;
1105 			}
1106 		}
1107 
1108 		access = p->auth_access;
1109 
1110 		if ((refresh > NFSAUTH_CACHE_REFRESH) &&
1111 		    p->auth_state == NFS_AUTH_FRESH) {
1112 			refreshq_auth_node_t *ran;
1113 			uint_t nacr;
1114 
1115 			p->auth_state = NFS_AUTH_STALE;
1116 			mutex_exit(&p->auth_lock);
1117 
1118 			nacr = ++nfsauth_cache_refresh;
1119 			DTRACE_PROBE3(nfsauth__debug__cache__stale,
1120 			    struct exportinfo *, exi,
1121 			    struct auth_cache *, p,
1122 			    uint_t, nacr);
1123 
1124 			ran = kmem_alloc(sizeof (refreshq_auth_node_t),
1125 			    KM_SLEEP);
1126 			ran->ran_auth = p;
1127 			ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
1128 
1129 			mutex_enter(&nag->refreshq_lock);
1130 
1131 			if (nag->refreshq_thread_state ==
1132 			    REFRESHQ_THREAD_NEED_CREATE) {
1133 				/* Launch nfsauth refresh thread */
1134 				nag->refreshq_thread_state =
1135 				    REFRESHQ_THREAD_RUNNING;
1136 				(void) zthread_create(NULL, 0,
1137 				    nfsauth_refresh_thread, nag, 0,
1138 				    minclsyspri);
1139 			}
1140 
1141 			/*
1142 			 * We should not add a work queue item if the thread
1143 			 * is not accepting them.
1144 			 */
1145 			if (nag->refreshq_thread_state ==
1146 			    REFRESHQ_THREAD_RUNNING) {
1147 				refreshq_exi_node_t *ren;
1148 
1149 				/*
1150 				 * Is there an existing exi_list?
1151 				 */
1152 				for (ren = list_head(&nag->refreshq_queue);
1153 				    ren != NULL;
1154 				    ren = list_next(&nag->refreshq_queue,
1155 				    ren)) {
1156 					if (ren->ren_exi == exi) {
1157 						list_insert_tail(
1158 						    &ren->ren_authlist, ran);
1159 						break;
1160 					}
1161 				}
1162 
1163 				if (ren == NULL) {
1164 					ren = kmem_alloc(
1165 					    sizeof (refreshq_exi_node_t),
1166 					    KM_SLEEP);
1167 
1168 					exi_hold(exi);
1169 					ren->ren_exi = exi;
1170 
1171 					list_create(&ren->ren_authlist,
1172 					    sizeof (refreshq_auth_node_t),
1173 					    offsetof(refreshq_auth_node_t,
1174 					    ran_node));
1175 
1176 					list_insert_tail(&ren->ren_authlist,
1177 					    ran);
1178 					list_insert_tail(&nag->refreshq_queue,
1179 					    ren);
1180 				}
1181 
1182 				cv_broadcast(&nag->refreshq_cv);
1183 			} else {
1184 				strfree(ran->ran_netid);
1185 				kmem_free(ran, sizeof (refreshq_auth_node_t));
1186 			}
1187 
1188 			mutex_exit(&nag->refreshq_lock);
1189 		} else {
1190 			mutex_exit(&p->auth_lock);
1191 		}
1192 
1193 		nach = ++nfsauth_cache_hit;
1194 		DTRACE_PROBE2(nfsauth__debug__cache__hit,
1195 		    uint_t, nach,
1196 		    time_t, refresh);
1197 
1198 		kmem_free(addr.buf, addr.maxlen);
1199 	}
1200 
1201 	return (access);
1202 
1203 retrieve:
1204 
1205 	/*
1206 	 * Retrieve the required data without caching.
1207 	 */
1208 
1209 	ASSERT(p == NULL);
1210 
1211 	nfsauth_cache_miss++;
1212 
1213 	if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
1214 	    &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
1215 		if (uid != NULL)
1216 			*uid = tmpuid;
1217 		if (gid != NULL)
1218 			*gid = tmpgid;
1219 		if (ngids != NULL && gids != NULL) {
1220 			*ngids = tmpngids;
1221 			*gids = tmpgids;
1222 		} else {
1223 			kmem_free(tmpgids, tmpngids * sizeof (gid_t));
1224 		}
1225 	}
1226 
1227 	kmem_free(addr.buf, addr.maxlen);
1228 
1229 	return (access);
1230 }
1231 
1232 /*
1233  * Check if the requesting client has access to the filesystem with
1234  * a given nfs flavor number which is an explicitly shared flavor.
1235  */
1236 int
nfsauth4_secinfo_access(struct exportinfo * exi,struct svc_req * req,int flavor,int perm,cred_t * cr)1237 nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
1238     int flavor, int perm, cred_t *cr)
1239 {
1240 	int access;
1241 
1242 	if (! (perm & M_4SEC_EXPORTED)) {
1243 		return (NFSAUTH_DENIED);
1244 	}
1245 
1246 	/*
1247 	 * Optimize if there are no lists
1248 	 */
1249 	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1250 		perm &= ~M_4SEC_EXPORTED;
1251 		if (perm == M_RO)
1252 			return (NFSAUTH_RO);
1253 		if (perm == M_RW)
1254 			return (NFSAUTH_RW);
1255 	}
1256 
1257 	access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL, NULL,
1258 	    NULL);
1259 
1260 	return (access);
1261 }
1262 
1263 int
nfsauth_access(struct exportinfo * exi,struct svc_req * req,cred_t * cr,uid_t * uid,gid_t * gid,uint_t * ngids,gid_t ** gids)1264 nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
1265     uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
1266 {
1267 	int access, mapaccess;
1268 	struct secinfo *sp;
1269 	int i, flavor, perm;
1270 	int authnone_entry = -1;
1271 
1272 	/*
1273 	 * By default root is mapped to anonymous user.
1274 	 * This might get overriden later in nfsauth_cache_get().
1275 	 */
1276 	if (crgetuid(cr) == 0) {
1277 		if (uid != NULL)
1278 			*uid = exi->exi_export.ex_anon;
1279 		if (gid != NULL)
1280 			*gid = exi->exi_export.ex_anon;
1281 	} else {
1282 		if (uid != NULL)
1283 			*uid = crgetuid(cr);
1284 		if (gid != NULL)
1285 			*gid = crgetgid(cr);
1286 	}
1287 
1288 	if (ngids != NULL)
1289 		*ngids = 0;
1290 	if (gids != NULL)
1291 		*gids = NULL;
1292 
1293 	/*
1294 	 *  Get the nfs flavor number from xprt.
1295 	 */
1296 	flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
1297 
1298 	/*
1299 	 * First check the access restrictions on the filesystem.  If
1300 	 * there are no lists associated with this flavor then there's no
1301 	 * need to make an expensive call to the nfsauth service or to
1302 	 * cache anything.
1303 	 */
1304 
1305 	sp = exi->exi_export.ex_secinfo;
1306 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1307 		if (flavor != sp[i].s_secinfo.sc_nfsnum) {
1308 			if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1309 				authnone_entry = i;
1310 			continue;
1311 		}
1312 		break;
1313 	}
1314 
1315 	mapaccess = 0;
1316 
1317 	if (i >= exi->exi_export.ex_seccnt) {
1318 		/*
1319 		 * Flavor not found, but use AUTH_NONE if it exists
1320 		 */
1321 		if (authnone_entry == -1)
1322 			return (NFSAUTH_DENIED);
1323 		flavor = AUTH_NONE;
1324 		mapaccess = NFSAUTH_MAPNONE;
1325 		i = authnone_entry;
1326 	}
1327 
1328 	/*
1329 	 * If the flavor is in the ex_secinfo list, but not an explicitly
1330 	 * shared flavor by the user, it is a result of the nfsv4 server
1331 	 * namespace setup. We will grant an RO permission similar for
1332 	 * a pseudo node except that this node is a shared one.
1333 	 *
1334 	 * e.g. flavor in (flavor) indicates that it is not explictly
1335 	 *	shared by the user:
1336 	 *
1337 	 *		/	(sys, krb5)
1338 	 *		|
1339 	 *		export  #share -o sec=sys (krb5)
1340 	 *		|
1341 	 *		secure  #share -o sec=krb5
1342 	 *
1343 	 *	In this case, when a krb5 request coming in to access
1344 	 *	/export, RO permission is granted.
1345 	 */
1346 	if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1347 		return (mapaccess | NFSAUTH_RO);
1348 
1349 	/*
1350 	 * Optimize if there are no lists.
1351 	 * We cannot optimize for AUTH_SYS with NGRPS (16) supplemental groups.
1352 	 */
1353 	perm = sp[i].s_flags;
1354 	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0 && (ngroups_max <= NGRPS ||
1355 	    flavor != AUTH_SYS || crgetngroups(cr) < NGRPS)) {
1356 		perm &= ~M_4SEC_EXPORTED;
1357 		if (perm == M_RO)
1358 			return (mapaccess | NFSAUTH_RO);
1359 		if (perm == M_RW)
1360 			return (mapaccess | NFSAUTH_RW);
1361 	}
1362 
1363 	access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid, ngids, gids);
1364 
1365 	/*
1366 	 * For both NFSAUTH_DENIED and NFSAUTH_WRONGSEC we do not care about
1367 	 * the supplemental groups.
1368 	 */
1369 	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
1370 		if (ngids != NULL && gids != NULL) {
1371 			kmem_free(*gids, *ngids * sizeof (gid_t));
1372 			*ngids = 0;
1373 			*gids = NULL;
1374 		}
1375 	}
1376 
1377 	/*
1378 	 * Client's security flavor doesn't match with "ro" or
1379 	 * "rw" list. Try again using AUTH_NONE if present.
1380 	 */
1381 	if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1382 		/*
1383 		 * Have we already encountered AUTH_NONE ?
1384 		 */
1385 		if (authnone_entry != -1) {
1386 			mapaccess = NFSAUTH_MAPNONE;
1387 			access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1388 			    NULL, NULL, NULL, NULL);
1389 		} else {
1390 			/*
1391 			 * Check for AUTH_NONE presence.
1392 			 */
1393 			for (; i < exi->exi_export.ex_seccnt; i++) {
1394 				if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1395 					mapaccess = NFSAUTH_MAPNONE;
1396 					access = nfsauth_cache_get(exi, req,
1397 					    AUTH_NONE, cr, NULL, NULL, NULL,
1398 					    NULL);
1399 					break;
1400 				}
1401 			}
1402 		}
1403 	}
1404 
1405 	if (access & NFSAUTH_DENIED)
1406 		access = NFSAUTH_DENIED;
1407 
1408 	return (access | mapaccess);
1409 }
1410 
1411 static void
nfsauth_free_clnt_node(struct auth_cache_clnt * p)1412 nfsauth_free_clnt_node(struct auth_cache_clnt *p)
1413 {
1414 	void *cookie = NULL;
1415 	struct auth_cache *node;
1416 
1417 	while ((node = avl_destroy_nodes(&p->authc_tree, &cookie)) != NULL)
1418 		nfsauth_free_node(node);
1419 	avl_destroy(&p->authc_tree);
1420 
1421 	kmem_free(p->authc_addr.buf, p->authc_addr.maxlen);
1422 	rw_destroy(&p->authc_lock);
1423 
1424 	kmem_free(p, sizeof (*p));
1425 }
1426 
1427 static void
nfsauth_free_node(struct auth_cache * p)1428 nfsauth_free_node(struct auth_cache *p)
1429 {
1430 	crfree(p->auth_clnt_cred);
1431 	kmem_free(p->auth_srv_gids, p->auth_srv_ngids * sizeof (gid_t));
1432 	mutex_destroy(&p->auth_lock);
1433 	cv_destroy(&p->auth_cv);
1434 	kmem_cache_free(exi_cache_handle, p);
1435 }
1436 
1437 /*
1438  * Free the nfsauth cache for a given export
1439  */
1440 void
nfsauth_cache_free(struct exportinfo * exi)1441 nfsauth_cache_free(struct exportinfo *exi)
1442 {
1443 	int i;
1444 
1445 	/*
1446 	 * The only way we got here was with an exi_rele, which means that no
1447 	 * auth cache entry is being refreshed.
1448 	 */
1449 
1450 	for (i = 0; i < AUTH_TABLESIZE; i++) {
1451 		avl_tree_t *tree = exi->exi_cache[i];
1452 		void *cookie = NULL;
1453 		struct auth_cache_clnt *node;
1454 
1455 		while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
1456 			nfsauth_free_clnt_node(node);
1457 	}
1458 }
1459 
1460 /*
1461  * Called by the kernel memory allocator when memory is low.
1462  * Free unused cache entries. If that's not enough, the VM system
1463  * will call again for some more.
1464  *
1465  * This needs to operate on all zones, so we take a reader lock
1466  * on the list of zones and walk the list.  This is OK here
1467  * becuase exi_cache_trim doesn't block or cause new objects
1468  * to be allocated (basically just frees lots of stuff).
1469  * Use care if nfssrv_globals_rwl is taken as reader in any
1470  * other cases because it will block nfs_server_zone_init
1471  * and nfs_server_zone_fini, which enter as writer.
1472  */
1473 /*ARGSUSED*/
1474 void
exi_cache_reclaim(void * cdrarg)1475 exi_cache_reclaim(void *cdrarg)
1476 {
1477 	nfs_globals_t *ng;
1478 
1479 	rw_enter(&nfssrv_globals_rwl, RW_READER);
1480 
1481 	ng = list_head(&nfssrv_globals_list);
1482 	while (ng != NULL) {
1483 		exi_cache_reclaim_zone(ng);
1484 		ng = list_next(&nfssrv_globals_list, ng);
1485 	}
1486 
1487 	rw_exit(&nfssrv_globals_rwl);
1488 }
1489 
1490 static void
exi_cache_reclaim_zone(nfs_globals_t * ng)1491 exi_cache_reclaim_zone(nfs_globals_t *ng)
1492 {
1493 	int i;
1494 	struct exportinfo *exi;
1495 	nfs_export_t *ne = ng->nfs_export;
1496 
1497 	rw_enter(&ne->exported_lock, RW_READER);
1498 
1499 	for (i = 0; i < EXPTABLESIZE; i++) {
1500 		for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
1501 			exi_cache_trim(exi);
1502 	}
1503 
1504 	rw_exit(&ne->exported_lock);
1505 
1506 	nfsauth_cache_reclaim++;
1507 }
1508 
1509 static void
exi_cache_trim(struct exportinfo * exi)1510 exi_cache_trim(struct exportinfo *exi)
1511 {
1512 	struct auth_cache_clnt *c;
1513 	struct auth_cache_clnt *nextc;
1514 	struct auth_cache *p;
1515 	struct auth_cache *next;
1516 	int i;
1517 	time_t stale_time;
1518 	avl_tree_t *tree;
1519 
1520 	for (i = 0; i < AUTH_TABLESIZE; i++) {
1521 		tree = exi->exi_cache[i];
1522 		stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1523 		rw_enter(&exi->exi_cache_lock, RW_READER);
1524 
1525 		/*
1526 		 * Free entries that have not been
1527 		 * used for NFSAUTH_CACHE_TRIM seconds.
1528 		 */
1529 		for (c = avl_first(tree); c != NULL; c = AVL_NEXT(tree, c)) {
1530 			/*
1531 			 * We are being called by the kmem subsystem to reclaim
1532 			 * memory so don't block if we can't get the lock.
1533 			 */
1534 			if (rw_tryenter(&c->authc_lock, RW_WRITER) == 0) {
1535 				exi_cache_auth_reclaim_failed++;
1536 				rw_exit(&exi->exi_cache_lock);
1537 				return;
1538 			}
1539 
1540 			for (p = avl_first(&c->authc_tree); p != NULL;
1541 			    p = next) {
1542 				next = AVL_NEXT(&c->authc_tree, p);
1543 
1544 				ASSERT(p->auth_state != NFS_AUTH_INVALID);
1545 
1546 				mutex_enter(&p->auth_lock);
1547 
1548 				/*
1549 				 * We won't trim recently used and/or WAITING
1550 				 * entries.
1551 				 */
1552 				if (p->auth_time > stale_time ||
1553 				    p->auth_state == NFS_AUTH_WAITING) {
1554 					mutex_exit(&p->auth_lock);
1555 					continue;
1556 				}
1557 
1558 				DTRACE_PROBE1(nfsauth__debug__trim__state,
1559 				    auth_state_t, p->auth_state);
1560 
1561 				/*
1562 				 * STALE and REFRESHING entries needs to be
1563 				 * marked INVALID only because they are
1564 				 * referenced by some other structures or
1565 				 * threads.  They will be freed later.
1566 				 */
1567 				if (p->auth_state == NFS_AUTH_STALE ||
1568 				    p->auth_state == NFS_AUTH_REFRESHING) {
1569 					p->auth_state = NFS_AUTH_INVALID;
1570 					mutex_exit(&p->auth_lock);
1571 
1572 					avl_remove(&c->authc_tree, p);
1573 				} else {
1574 					mutex_exit(&p->auth_lock);
1575 
1576 					avl_remove(&c->authc_tree, p);
1577 					nfsauth_free_node(p);
1578 				}
1579 			}
1580 			rw_exit(&c->authc_lock);
1581 		}
1582 
1583 		if (rw_tryupgrade(&exi->exi_cache_lock) == 0) {
1584 			rw_exit(&exi->exi_cache_lock);
1585 			exi_cache_clnt_reclaim_failed++;
1586 			continue;
1587 		}
1588 
1589 		for (c = avl_first(tree); c != NULL; c = nextc) {
1590 			nextc = AVL_NEXT(tree, c);
1591 
1592 			if (avl_is_empty(&c->authc_tree) == B_FALSE)
1593 				continue;
1594 
1595 			avl_remove(tree, c);
1596 
1597 			nfsauth_free_clnt_node(c);
1598 		}
1599 
1600 		rw_exit(&exi->exi_cache_lock);
1601 	}
1602 }
1603