xref: /titanic_41/usr/src/uts/common/fs/nfs/nfs_auth.c (revision 32a712da90cea6ff9a05f51e7844944ccfa28d5e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/errno.h>
28 #include <sys/vfs.h>
29 #include <sys/vnode.h>
30 #include <sys/cred.h>
31 #include <sys/cmn_err.h>
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/pathname.h>
35 #include <sys/utsname.h>
36 #include <sys/debug.h>
37 #include <sys/door.h>
38 #include <sys/sdt.h>
39 #include <sys/thread.h>
40 
41 #include <rpc/types.h>
42 #include <rpc/auth.h>
43 #include <rpc/clnt.h>
44 
45 #include <nfs/nfs.h>
46 #include <nfs/export.h>
47 #include <nfs/nfs_clnt.h>
48 #include <nfs/auth.h>
49 
50 #define	EQADDR(a1, a2)  \
51 	(bcmp((char *)(a1)->buf, (char *)(a2)->buf, (a1)->len) == 0 && \
52 	(a1)->len == (a2)->len)
53 
54 static struct knetconfig auth_knconf;
55 static servinfo_t svp;
56 static clinfo_t ci;
57 
58 static struct kmem_cache *exi_cache_handle;
59 static void exi_cache_reclaim(void *);
60 static void exi_cache_trim(struct exportinfo *exi);
61 
62 extern pri_t minclsyspri;
63 
64 int nfsauth_cache_hit;
65 int nfsauth_cache_miss;
66 int nfsauth_cache_refresh;
67 int nfsauth_cache_reclaim;
68 
69 /*
70  * The lifetime of an auth cache entry:
71  * ------------------------------------
72  *
73  * An auth cache entry is created with both the auth_time
74  * and auth_freshness times set to the current time.
75  *
76  * Upon every client access which results in a hit, the
77  * auth_time will be updated.
78  *
79  * If a client access determines that the auth_freshness
80  * indicates that the entry is STALE, then it will be
81  * refreshed. Note that this will explicitly reset
82  * auth_time.
83  *
84  * When the REFRESH successfully occurs, then the
85  * auth_freshness is updated.
86  *
87  * There are two ways for an entry to leave the cache:
88  *
89  * 1) Purged by an action on the export (remove or changed)
90  * 2) Memory backpressure from the kernel (check against NFSAUTH_CACHE_TRIM)
91  *
92  * For 2) we check the timeout value against auth_time.
93  */
94 
95 /*
96  * Number of seconds until we mark for refresh an auth cache entry.
97  */
98 #define	NFSAUTH_CACHE_REFRESH 600
99 
100 /*
101  * Number of idle seconds until we yield to backpressure
102  * to trim a cache entry.
103  */
104 #define	NFSAUTH_CACHE_TRIM 3600
105 
106 /*
107  * While we could encapuslate the exi_list inside the
108  * exi structure, we can't do that for the auth_list.
109  * So, to keep things looking clean, we keep them both
110  * in these external lists.
111  */
112 typedef struct refreshq_exi_node {
113 	struct exportinfo	*ren_exi;
114 	list_t			ren_authlist;
115 	list_node_t		ren_node;
116 } refreshq_exi_node_t;
117 
118 typedef struct refreshq_auth_node {
119 	struct auth_cache	*ran_auth;
120 	list_node_t		ran_node;
121 } refreshq_auth_node_t;
122 
123 /*
124  * Used to manipulate things on the refreshq_queue.
125  * Note that the refresh thread will effectively
126  * pop a node off of the queue, at which point it
127  * will no longer need to hold the mutex.
128  */
129 static kmutex_t refreshq_lock;
130 static list_t refreshq_queue;
131 static kcondvar_t refreshq_cv;
132 
133 /*
134  * A list_t would be overkill. These are auth_cache
135  * entries which are no longer linked to an exi.
136  * It should be the case that all of their states
137  * are NFS_AUTH_INVALID.
138  *
139  * I.e., the only way to be put on this list is
140  * iff their state indicated that they had been placed
141  * on the refreshq_queue.
142  *
143  * Note that while there is no link from the exi or
144  * back to the exi, the exi can not go away until
145  * these entries are harvested.
146  */
147 static struct auth_cache	*refreshq_dead_entries;
148 
149 /*
150  * If there is ever a problem with loading the
151  * module, then nfsauth_fini() needs to be called
152  * to remove state. In that event, since the
153  * refreshq thread has been started, they need to
154  * work together to get rid of state.
155  */
156 typedef enum nfsauth_refreshq_thread_state {
157 	REFRESHQ_THREAD_RUNNING,
158 	REFRESHQ_THREAD_FINI_REQ,
159 	REFRESHQ_THREAD_HALTED
160 } nfsauth_refreshq_thread_state_t;
161 
162 nfsauth_refreshq_thread_state_t
163 refreshq_thread_state = REFRESHQ_THREAD_HALTED;
164 
165 static void nfsauth_free_node(struct auth_cache *);
166 static void nfsauth_remove_dead_entry(struct auth_cache *);
167 static void nfsauth_refresh_thread(void);
168 
169 /*
170  * mountd is a server-side only daemon. This will need to be
171  * revisited if the NFS server is ever made zones-aware.
172  */
173 kmutex_t	mountd_lock;
174 door_handle_t   mountd_dh;
175 
176 void
177 mountd_args(uint_t did)
178 {
179 	mutex_enter(&mountd_lock);
180 	if (mountd_dh)
181 		door_ki_rele(mountd_dh);
182 	mountd_dh = door_ki_lookup(did);
183 	mutex_exit(&mountd_lock);
184 }
185 
186 void
187 nfsauth_init(void)
188 {
189 	/*
190 	 * mountd can be restarted by smf(5). We need to make sure
191 	 * the updated door handle will safely make it to mountd_dh
192 	 */
193 	mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL);
194 
195 	mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
196 	list_create(&refreshq_queue, sizeof (refreshq_exi_node_t),
197 	    offsetof(refreshq_exi_node_t, ren_node));
198 	refreshq_dead_entries = NULL;
199 
200 	cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL);
201 
202 	/*
203 	 * Allocate nfsauth cache handle
204 	 */
205 	exi_cache_handle = kmem_cache_create("exi_cache_handle",
206 	    sizeof (struct auth_cache), 0, NULL, NULL,
207 	    exi_cache_reclaim, NULL, NULL, 0);
208 
209 	refreshq_thread_state = REFRESHQ_THREAD_RUNNING;
210 	(void) zthread_create(NULL, 0, nfsauth_refresh_thread,
211 	    NULL, 0, minclsyspri);
212 }
213 
214 /*
215  * Finalization routine for nfsauth. It is important to call this routine
216  * before destroying the exported_lock.
217  */
218 void
219 nfsauth_fini(void)
220 {
221 	refreshq_exi_node_t	*ren;
222 	refreshq_auth_node_t	*ran;
223 	struct auth_cache	*p;
224 	struct auth_cache	*auth_next;
225 
226 	/*
227 	 * Prevent the refreshq_thread from getting new
228 	 * work.
229 	 */
230 	mutex_enter(&refreshq_lock);
231 	if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
232 		refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
233 		cv_broadcast(&refreshq_cv);
234 
235 		/*
236 		 * Also, wait for nfsauth_refresh_thread() to exit.
237 		 */
238 		while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
239 			cv_wait(&refreshq_cv, &refreshq_lock);
240 		}
241 	}
242 
243 	/*
244 	 * Walk the exi_list and in turn, walk the
245 	 * auth_lists.
246 	 */
247 	while ((ren = list_remove_head(&refreshq_queue))) {
248 		while ((ran = list_remove_head(&ren->ren_authlist))) {
249 			kmem_free(ran, sizeof (refreshq_auth_node_t));
250 		}
251 
252 		list_destroy(&ren->ren_authlist);
253 		exi_rele(ren->ren_exi);
254 		kmem_free(ren, sizeof (refreshq_exi_node_t));
255 	}
256 
257 	/*
258 	 * Okay, now that the lists are deleted, we
259 	 * need to see if there are any dead entries
260 	 * to harvest.
261 	 */
262 	for (p = refreshq_dead_entries; p != NULL; p = auth_next) {
263 		auth_next = p->auth_next;
264 		nfsauth_free_node(p);
265 	}
266 
267 	mutex_exit(&refreshq_lock);
268 
269 	list_destroy(&refreshq_queue);
270 
271 	cv_destroy(&refreshq_cv);
272 	mutex_destroy(&refreshq_lock);
273 
274 	mutex_destroy(&mountd_lock);
275 
276 	/*
277 	 * Deallocate nfsauth cache handle
278 	 */
279 	kmem_cache_destroy(exi_cache_handle);
280 }
281 
282 /*
283  * Convert the address in a netbuf to
284  * a hash index for the auth_cache table.
285  */
286 static int
287 hash(struct netbuf *a)
288 {
289 	int i, h = 0;
290 
291 	for (i = 0; i < a->len; i++)
292 		h ^= a->buf[i];
293 
294 	return (h & (AUTH_TABLESIZE - 1));
295 }
296 
297 /*
298  * Mask out the components of an
299  * address that do not identify
300  * a host. For socket addresses the
301  * masking gets rid of the port number.
302  */
303 static void
304 addrmask(struct netbuf *addr, struct netbuf *mask)
305 {
306 	int i;
307 
308 	for (i = 0; i < addr->len; i++)
309 		addr->buf[i] &= mask->buf[i];
310 }
311 
312 /*
313  * nfsauth4_access is used for NFS V4 auth checking. Besides doing
314  * the common nfsauth_access(), it will check if the client can
315  * have a limited access to this vnode even if the security flavor
316  * used does not meet the policy.
317  */
318 int
319 nfsauth4_access(struct exportinfo *exi, vnode_t *vp, struct svc_req *req,
320     cred_t *cr, uid_t *uid, gid_t *gid)
321 {
322 	int access;
323 
324 	access = nfsauth_access(exi, req, cr, uid, gid);
325 
326 	/*
327 	 * There are cases that the server needs to allow the client
328 	 * to have a limited view.
329 	 *
330 	 * e.g.
331 	 * /export is shared as "sec=sys,rw=dfs-test-4,sec=krb5,rw"
332 	 * /export/home is shared as "sec=sys,rw"
333 	 *
334 	 * When the client mounts /export with sec=sys, the client
335 	 * would get a limited view with RO access on /export to see
336 	 * "home" only because the client is allowed to access
337 	 * /export/home with auth_sys.
338 	 */
339 	if (access & NFSAUTH_DENIED || access & NFSAUTH_WRONGSEC) {
340 		/*
341 		 * Allow ro permission with LIMITED view if there is a
342 		 * sub-dir exported under vp.
343 		 */
344 		if (has_visible(exi, vp))
345 			return (NFSAUTH_LIMITED);
346 	}
347 
348 	return (access);
349 }
350 
351 static void
352 sys_log(const char *msg)
353 {
354 	static time_t	tstamp = 0;
355 	time_t		now;
356 
357 	/*
358 	 * msg is shown (at most) once per minute
359 	 */
360 	now = gethrestime_sec();
361 	if ((tstamp + 60) < now) {
362 		tstamp = now;
363 		cmn_err(CE_WARN, msg);
364 	}
365 }
366 
367 /*
368  * Callup to the mountd to get access information in the kernel.
369  */
370 static bool_t
371 nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor,
372     struct netbuf *addr, int *access, uid_t clnt_uid, gid_t clnt_gid,
373     uid_t *srv_uid, gid_t *srv_gid)
374 {
375 	varg_t			  varg = {0};
376 	nfsauth_res_t		  res = {0};
377 	XDR			  xdrs_a;
378 	XDR			  xdrs_r;
379 	size_t			  absz;
380 	caddr_t			  abuf;
381 	size_t			  rbsz = (size_t)(BYTES_PER_XDR_UNIT * 4);
382 	char			  result[BYTES_PER_XDR_UNIT * 4] = {0};
383 	caddr_t			  rbuf = (caddr_t)&result;
384 	int			  last = 0;
385 	door_arg_t		  da;
386 	door_info_t		  di;
387 	door_handle_t		  dh;
388 	uint_t			  ntries = 0;
389 
390 	/*
391 	 * No entry in the cache for this client/flavor
392 	 * so we need to call the nfsauth service in the
393 	 * mount daemon.
394 	 */
395 retry:
396 	mutex_enter(&mountd_lock);
397 	dh = mountd_dh;
398 	if (dh)
399 		door_ki_hold(dh);
400 	mutex_exit(&mountd_lock);
401 
402 	if (dh == NULL) {
403 		/*
404 		 * The rendezvous point has not been established yet !
405 		 * This could mean that either mountd(1m) has not yet
406 		 * been started or that _this_ routine nuked the door
407 		 * handle after receiving an EINTR for a REVOKED door.
408 		 *
409 		 * Returning NFSAUTH_DROP will cause the NFS client
410 		 * to retransmit the request, so let's try to be more
411 		 * rescillient and attempt for ntries before we bail.
412 		 */
413 		if (++ntries % NFSAUTH_DR_TRYCNT) {
414 			delay(hz);
415 			goto retry;
416 		}
417 
418 		sys_log("nfsauth: mountd has not established door");
419 		*access = NFSAUTH_DROP;
420 		return (FALSE);
421 	}
422 
423 	ntries = 0;
424 	varg.vers = V_PROTO;
425 	varg.arg_u.arg.cmd = NFSAUTH_ACCESS;
426 	varg.arg_u.arg.areq.req_client.n_len = addr->len;
427 	varg.arg_u.arg.areq.req_client.n_bytes = addr->buf;
428 	varg.arg_u.arg.areq.req_netid = req_netid;
429 	varg.arg_u.arg.areq.req_path = exi->exi_export.ex_path;
430 	varg.arg_u.arg.areq.req_flavor = flavor;
431 	varg.arg_u.arg.areq.req_clnt_uid = clnt_uid;
432 	varg.arg_u.arg.areq.req_clnt_gid = clnt_gid;
433 
434 	/*
435 	 * Setup the XDR stream for encoding the arguments. Notice that
436 	 * in addition to the args having variable fields (req_netid and
437 	 * req_path), the argument data structure is itself versioned,
438 	 * so we need to make sure we can size the arguments buffer
439 	 * appropriately to encode all the args. If we can't get sizing
440 	 * info _or_ properly encode the arguments, there's really no
441 	 * point in continuting, so we fail the request.
442 	 */
443 	DTRACE_PROBE1(nfsserv__func__nfsauth__varg, varg_t *, &varg);
444 	if ((absz = xdr_sizeof(xdr_varg, (void *)&varg)) == 0) {
445 		door_ki_rele(dh);
446 		*access = NFSAUTH_DENIED;
447 		return (FALSE);
448 	}
449 
450 	abuf = (caddr_t)kmem_alloc(absz, KM_SLEEP);
451 	xdrmem_create(&xdrs_a, abuf, absz, XDR_ENCODE);
452 	if (!xdr_varg(&xdrs_a, &varg)) {
453 		door_ki_rele(dh);
454 		goto fail;
455 	}
456 	XDR_DESTROY(&xdrs_a);
457 
458 	/*
459 	 * The result (nfsauth_res_t) is always four int's, so we don't
460 	 * have to dynamically size (or allocate) the results buffer.
461 	 * Now that we've got what we need, we prep the door arguments
462 	 * and place the call.
463 	 */
464 	da.data_ptr = (char *)abuf;
465 	da.data_size = absz;
466 	da.desc_ptr = NULL;
467 	da.desc_num = 0;
468 	da.rbuf = (char *)rbuf;
469 	da.rsize = rbsz;
470 
471 	switch (door_ki_upcall_limited(dh, &da, NULL, SIZE_MAX, 0)) {
472 		case 0:				/* Success */
473 			if (da.data_ptr != da.rbuf && da.data_size == 0) {
474 				/*
475 				 * The door_return that contained the data
476 				 * failed ! We're here because of the 2nd
477 				 * door_return (w/o data) such that we can
478 				 * get control of the thread (and exit
479 				 * gracefully).
480 				 */
481 				DTRACE_PROBE1(nfsserv__func__nfsauth__door__nil,
482 				    door_arg_t *, &da);
483 				door_ki_rele(dh);
484 				goto fail;
485 
486 			} else if (rbuf != da.rbuf) {
487 				/*
488 				 * The only time this should be true
489 				 * is iff userland wanted to hand us
490 				 * a bigger response than what we
491 				 * expect; that should not happen
492 				 * (nfsauth_res_t is only 4 int's),
493 				 * but we check nevertheless.
494 				 */
495 				rbuf = da.rbuf;
496 				rbsz = da.rsize;
497 
498 			} else if (rbsz > da.data_size) {
499 				/*
500 				 * We were expecting four int's; but if
501 				 * userland fails in encoding the XDR
502 				 * stream, we detect that here, since
503 				 * the mountd forces down only one byte
504 				 * in such scenario.
505 				 */
506 				door_ki_rele(dh);
507 				goto fail;
508 			}
509 			door_ki_rele(dh);
510 			break;
511 
512 		case EAGAIN:
513 			/*
514 			 * Server out of resources; back off for a bit
515 			 */
516 			door_ki_rele(dh);
517 			kmem_free(abuf, absz);
518 			delay(hz);
519 			goto retry;
520 			/* NOTREACHED */
521 
522 		case EINTR:
523 			if (!door_ki_info(dh, &di)) {
524 				if (di.di_attributes & DOOR_REVOKED) {
525 					/*
526 					 * The server barfed and revoked
527 					 * the (existing) door on us; we
528 					 * want to wait to give smf(5) a
529 					 * chance to restart mountd(1m)
530 					 * and establish a new door handle.
531 					 */
532 					mutex_enter(&mountd_lock);
533 					if (dh == mountd_dh)
534 						mountd_dh = NULL;
535 					mutex_exit(&mountd_lock);
536 					door_ki_rele(dh);
537 					kmem_free(abuf, absz);
538 					delay(hz);
539 					goto retry;
540 				}
541 				/*
542 				 * If the door was _not_ revoked on us,
543 				 * then more than likely we took an INTR,
544 				 * so we need to fail the operation.
545 				 */
546 				door_ki_rele(dh);
547 				goto fail;
548 			}
549 			/*
550 			 * The only failure that can occur from getting
551 			 * the door info is EINVAL, so we let the code
552 			 * below handle it.
553 			 */
554 			/* FALLTHROUGH */
555 
556 		case EBADF:
557 		case EINVAL:
558 		default:
559 			/*
560 			 * If we have a stale door handle, give smf a last
561 			 * chance to start it by sleeping for a little bit.
562 			 * If we're still hosed, we'll fail the call.
563 			 *
564 			 * Since we're going to reacquire the door handle
565 			 * upon the retry, we opt to sleep for a bit and
566 			 * _not_ to clear mountd_dh. If mountd restarted
567 			 * and was able to set mountd_dh, we should see
568 			 * the new instance; if not, we won't get caught
569 			 * up in the retry/DELAY loop.
570 			 */
571 			door_ki_rele(dh);
572 			if (!last) {
573 				delay(hz);
574 				last++;
575 				goto retry;
576 			}
577 			sys_log("nfsauth: stale mountd door handle");
578 			goto fail;
579 	}
580 
581 	/*
582 	 * No door errors encountered; setup the XDR stream for decoding
583 	 * the results. If we fail to decode the results, we've got no
584 	 * other recourse than to fail the request.
585 	 */
586 	xdrmem_create(&xdrs_r, rbuf, rbsz, XDR_DECODE);
587 	if (!xdr_nfsauth_res(&xdrs_r, &res))
588 		goto fail;
589 	XDR_DESTROY(&xdrs_r);
590 
591 	DTRACE_PROBE1(nfsserv__func__nfsauth__results, nfsauth_res_t *, &res);
592 	switch (res.stat) {
593 		case NFSAUTH_DR_OKAY:
594 			*access = res.ares.auth_perm;
595 			*srv_uid = res.ares.auth_srv_uid;
596 			*srv_gid = res.ares.auth_srv_gid;
597 			kmem_free(abuf, absz);
598 			break;
599 
600 		case NFSAUTH_DR_EFAIL:
601 		case NFSAUTH_DR_DECERR:
602 		case NFSAUTH_DR_BADCMD:
603 		default:
604 fail:
605 			*access = NFSAUTH_DENIED;
606 			kmem_free(abuf, absz);
607 			return (FALSE);
608 			/* NOTREACHED */
609 	}
610 
611 	return (TRUE);
612 }
613 
614 static void
615 nfsauth_refresh_thread(void)
616 {
617 	refreshq_exi_node_t	*ren;
618 	refreshq_auth_node_t	*ran;
619 
620 	struct exportinfo	*exi;
621 
622 	int			access;
623 	bool_t			retrieval;
624 
625 	callb_cpr_t		cprinfo;
626 
627 	CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr,
628 	    "nfsauth_refresh");
629 
630 	for (;;) {
631 		mutex_enter(&refreshq_lock);
632 		if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
633 			/* Keep the hold on the lock! */
634 			break;
635 		}
636 
637 		ren = list_remove_head(&refreshq_queue);
638 		if (ren == NULL) {
639 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
640 			cv_wait(&refreshq_cv, &refreshq_lock);
641 			CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock);
642 			mutex_exit(&refreshq_lock);
643 			continue;
644 		}
645 		mutex_exit(&refreshq_lock);
646 
647 		exi = ren->ren_exi;
648 		ASSERT(exi != NULL);
649 
650 		/*
651 		 * Since the ren was removed from the refreshq_queue above,
652 		 * this is the only thread aware about the ren existence, so we
653 		 * have the exclusive ownership of it and we do not need to
654 		 * protect it by any lock.
655 		 */
656 		while ((ran = list_remove_head(&ren->ren_authlist))) {
657 
658 			struct auth_cache *p = ran->ran_auth;
659 
660 			ASSERT(p != NULL);
661 			kmem_free(ran, sizeof (refreshq_auth_node_t));
662 
663 			/*
664 			 * We are shutting down. No need to refresh
665 			 * entries which are about to be nuked.
666 			 *
667 			 * So just throw them away until we are done
668 			 * with this exi node...
669 			 */
670 			if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING)
671 				continue;
672 
673 			mutex_enter(&p->auth_lock);
674 
675 			/*
676 			 * Make sure the state is valid now that
677 			 * we have the lock. Note that once we
678 			 * change the state to NFS_AUTH_REFRESHING,
679 			 * no other thread will be able to work on
680 			 * this entry.
681 			 */
682 			if (p->auth_state != NFS_AUTH_STALE) {
683 				/*
684 				 * Once it goes INVALID, it can not
685 				 * change state.
686 				 */
687 				if (p->auth_state == NFS_AUTH_INVALID) {
688 					mutex_exit(&p->auth_lock);
689 					nfsauth_remove_dead_entry(p);
690 				} else
691 					mutex_exit(&p->auth_lock);
692 
693 				continue;
694 			}
695 
696 			p->auth_state = NFS_AUTH_REFRESHING;
697 			mutex_exit(&p->auth_lock);
698 
699 			DTRACE_PROBE2(nfsauth__debug__cache__refresh,
700 			    struct exportinfo *, exi,
701 			    struct auth_cache *, p);
702 
703 			/*
704 			 * The first caching of the access rights
705 			 * is done with the netid pulled out of the
706 			 * request from the client. All subsequent
707 			 * users of the cache may or may not have
708 			 * the same netid. It doesn't matter. So
709 			 * when we refresh, we simply use the netid
710 			 * of the request which triggered the
711 			 * refresh attempt.
712 			 */
713 			ASSERT(p->auth_netid != NULL);
714 
715 			retrieval = nfsauth_retrieve(exi, p->auth_netid,
716 			    p->auth_flavor, &p->auth_addr, &access,
717 			    p->auth_clnt_uid, p->auth_clnt_gid,
718 			    &p->auth_srv_uid, &p->auth_srv_gid);
719 
720 			/*
721 			 * This can only be set in one other place
722 			 * and the state has to be NFS_AUTH_FRESH.
723 			 */
724 			kmem_free(p->auth_netid, strlen(p->auth_netid) + 1);
725 			p->auth_netid = NULL;
726 
727 			mutex_enter(&p->auth_lock);
728 			if (p->auth_state == NFS_AUTH_INVALID) {
729 				mutex_exit(&p->auth_lock);
730 				nfsauth_remove_dead_entry(p);
731 			} else {
732 				/*
733 				 * If we got an error, do not reset the
734 				 * time. This will cause the next access
735 				 * check for the client to reschedule this
736 				 * node.
737 				 */
738 				if (retrieval == TRUE) {
739 					p->auth_access = access;
740 					p->auth_freshness = gethrestime_sec();
741 				}
742 				p->auth_state = NFS_AUTH_FRESH;
743 				mutex_exit(&p->auth_lock);
744 			}
745 		}
746 
747 		list_destroy(&ren->ren_authlist);
748 		exi_rele(ren->ren_exi);
749 		kmem_free(ren, sizeof (refreshq_exi_node_t));
750 	}
751 
752 	refreshq_thread_state = REFRESHQ_THREAD_HALTED;
753 	cv_broadcast(&refreshq_cv);
754 	CALLB_CPR_EXIT(&cprinfo);
755 	zthread_exit();
756 }
757 
758 /*
759  * Get the access information from the cache or callup to the mountd
760  * to get and cache the access information in the kernel.
761  */
762 static int
763 nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
764     cred_t *cr, uid_t *uid, gid_t *gid)
765 {
766 	struct netbuf		*taddrmask;
767 	struct netbuf		addr;
768 	struct netbuf		*claddr;
769 	struct auth_cache	**head;
770 	struct auth_cache	*p;
771 	int			access;
772 	time_t			refresh;
773 
774 	refreshq_exi_node_t	*ren;
775 	refreshq_auth_node_t	*ran;
776 
777 	uid_t			tmpuid;
778 	gid_t			tmpgid;
779 
780 	ASSERT(cr != NULL);
781 
782 	/*
783 	 * Now check whether this client already
784 	 * has an entry for this flavor in the cache
785 	 * for this export.
786 	 * Get the caller's address, mask off the
787 	 * parts of the address that do not identify
788 	 * the host (port number, etc), and then hash
789 	 * it to find the chain of cache entries.
790 	 */
791 
792 	claddr = svc_getrpccaller(req->rq_xprt);
793 	addr = *claddr;
794 	addr.buf = kmem_alloc(addr.len, KM_SLEEP);
795 	bcopy(claddr->buf, addr.buf, claddr->len);
796 	SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
797 	ASSERT(taddrmask != NULL);
798 	if (taddrmask)
799 		addrmask(&addr, taddrmask);
800 
801 	rw_enter(&exi->exi_cache_lock, RW_READER);
802 	head = &exi->exi_cache[hash(&addr)];
803 	for (p = *head; p; p = p->auth_next) {
804 		if (EQADDR(&addr, &p->auth_addr) && flavor == p->auth_flavor &&
805 		    crgetuid(cr) == p->auth_clnt_uid &&
806 		    crgetgid(cr) == p->auth_clnt_gid)
807 			break;
808 	}
809 
810 	if (p != NULL) {
811 		nfsauth_cache_hit++;
812 
813 		refresh = gethrestime_sec() - p->auth_freshness;
814 		DTRACE_PROBE2(nfsauth__debug__cache__hit,
815 		    int, nfsauth_cache_hit,
816 		    time_t, refresh);
817 
818 		mutex_enter(&p->auth_lock);
819 		if ((refresh > NFSAUTH_CACHE_REFRESH) &&
820 		    p->auth_state == NFS_AUTH_FRESH) {
821 			p->auth_state = NFS_AUTH_STALE;
822 			mutex_exit(&p->auth_lock);
823 
824 			ASSERT(p->auth_netid == NULL);
825 			p->auth_netid =
826 			    strdup(svc_getnetid(req->rq_xprt));
827 
828 			nfsauth_cache_refresh++;
829 
830 			DTRACE_PROBE3(nfsauth__debug__cache__stale,
831 			    struct exportinfo *, exi,
832 			    struct auth_cache *, p,
833 			    int, nfsauth_cache_refresh);
834 
835 			ran = kmem_alloc(sizeof (refreshq_auth_node_t),
836 			    KM_SLEEP);
837 			ran->ran_auth = p;
838 
839 			mutex_enter(&refreshq_lock);
840 			/*
841 			 * We should not add a work queue
842 			 * item if the thread is not
843 			 * accepting them.
844 			 */
845 			if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
846 				/*
847 				 * Is there an existing exi_list?
848 				 */
849 				for (ren = list_head(&refreshq_queue);
850 				    ren != NULL;
851 				    ren = list_next(&refreshq_queue, ren)) {
852 					if (ren->ren_exi == exi) {
853 						list_insert_tail(
854 						    &ren->ren_authlist, ran);
855 						break;
856 					}
857 				}
858 
859 				if (ren == NULL) {
860 					ren = kmem_alloc(
861 					    sizeof (refreshq_exi_node_t),
862 					    KM_SLEEP);
863 
864 					exi_hold(exi);
865 					ren->ren_exi = exi;
866 
867 					list_create(&ren->ren_authlist,
868 					    sizeof (refreshq_auth_node_t),
869 					    offsetof(refreshq_auth_node_t,
870 					    ran_node));
871 
872 					list_insert_tail(&ren->ren_authlist,
873 					    ran);
874 					list_insert_tail(&refreshq_queue, ren);
875 				}
876 
877 				cv_broadcast(&refreshq_cv);
878 			} else {
879 				kmem_free(ran, sizeof (refreshq_auth_node_t));
880 			}
881 
882 			mutex_exit(&refreshq_lock);
883 		} else {
884 			mutex_exit(&p->auth_lock);
885 		}
886 
887 		access = p->auth_access;
888 		if (uid != NULL)
889 			*uid = p->auth_srv_uid;
890 		if (gid != NULL)
891 			*gid = p->auth_srv_gid;
892 
893 		p->auth_time = gethrestime_sec();
894 
895 		rw_exit(&exi->exi_cache_lock);
896 		kmem_free(addr.buf, addr.len);
897 
898 		return (access);
899 	}
900 
901 	rw_exit(&exi->exi_cache_lock);
902 
903 	nfsauth_cache_miss++;
904 
905 	if (!nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor,
906 	    &addr, &access, crgetuid(cr), crgetgid(cr), &tmpuid, &tmpgid)) {
907 		kmem_free(addr.buf, addr.len);
908 		return (access);
909 	}
910 
911 	if (uid != NULL)
912 		*uid = tmpuid;
913 	if (gid != NULL)
914 		*gid = tmpgid;
915 
916 	/*
917 	 * Now cache the result on the cache chain
918 	 * for this export (if there's enough memory)
919 	 */
920 	p = kmem_cache_alloc(exi_cache_handle, KM_NOSLEEP);
921 	if (p != NULL) {
922 		p->auth_addr = addr;
923 		p->auth_flavor = flavor;
924 		p->auth_clnt_uid = crgetuid(cr);
925 		p->auth_clnt_gid = crgetgid(cr);
926 		p->auth_srv_uid = tmpuid;
927 		p->auth_srv_gid = tmpgid;
928 		p->auth_access = access;
929 		p->auth_time = p->auth_freshness = gethrestime_sec();
930 		p->auth_state = NFS_AUTH_FRESH;
931 		p->auth_netid = NULL;
932 		mutex_init(&p->auth_lock, NULL, MUTEX_DEFAULT, NULL);
933 
934 		rw_enter(&exi->exi_cache_lock, RW_WRITER);
935 		p->auth_next = *head;
936 		*head = p;
937 		rw_exit(&exi->exi_cache_lock);
938 	} else {
939 		kmem_free(addr.buf, addr.len);
940 	}
941 
942 	return (access);
943 }
944 
945 /*
946  * Check if the requesting client has access to the filesystem with
947  * a given nfs flavor number which is an explicitly shared flavor.
948  */
949 int
950 nfsauth4_secinfo_access(struct exportinfo *exi, struct svc_req *req,
951 			int flavor, int perm, cred_t *cr)
952 {
953 	int access;
954 
955 	if (! (perm & M_4SEC_EXPORTED)) {
956 		return (NFSAUTH_DENIED);
957 	}
958 
959 	/*
960 	 * Optimize if there are no lists
961 	 */
962 	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
963 		perm &= ~M_4SEC_EXPORTED;
964 		if (perm == M_RO)
965 			return (NFSAUTH_RO);
966 		if (perm == M_RW)
967 			return (NFSAUTH_RW);
968 	}
969 
970 	access = nfsauth_cache_get(exi, req, flavor, cr, NULL, NULL);
971 
972 	return (access);
973 }
974 
975 int
976 nfsauth_access(struct exportinfo *exi, struct svc_req *req, cred_t *cr,
977     uid_t *uid, gid_t *gid)
978 {
979 	int access, mapaccess;
980 	struct secinfo *sp;
981 	int i, flavor, perm;
982 	int authnone_entry = -1;
983 
984 	/*
985 	 *  Get the nfs flavor number from xprt.
986 	 */
987 	flavor = (int)(uintptr_t)req->rq_xprt->xp_cookie;
988 
989 	/*
990 	 * First check the access restrictions on the filesystem.  If
991 	 * there are no lists associated with this flavor then there's no
992 	 * need to make an expensive call to the nfsauth service or to
993 	 * cache anything.
994 	 */
995 
996 	sp = exi->exi_export.ex_secinfo;
997 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
998 		if (flavor != sp[i].s_secinfo.sc_nfsnum) {
999 			if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1000 				authnone_entry = i;
1001 			continue;
1002 		}
1003 		break;
1004 	}
1005 
1006 	mapaccess = 0;
1007 
1008 	if (i >= exi->exi_export.ex_seccnt) {
1009 		/*
1010 		 * Flavor not found, but use AUTH_NONE if it exists
1011 		 */
1012 		if (authnone_entry == -1)
1013 			return (NFSAUTH_DENIED);
1014 		flavor = AUTH_NONE;
1015 		mapaccess = NFSAUTH_MAPNONE;
1016 		i = authnone_entry;
1017 	}
1018 
1019 	/*
1020 	 * By default root is mapped to anonymous user.
1021 	 * This might get overriden later in nfsauth_cache_get().
1022 	 */
1023 	if (crgetuid(cr) == 0) {
1024 		if (uid)
1025 			*uid = exi->exi_export.ex_anon;
1026 		if (gid)
1027 			*gid = exi->exi_export.ex_anon;
1028 	} else {
1029 		if (uid)
1030 			*uid = crgetuid(cr);
1031 		if (gid)
1032 			*gid = crgetgid(cr);
1033 	}
1034 
1035 	/*
1036 	 * If the flavor is in the ex_secinfo list, but not an explicitly
1037 	 * shared flavor by the user, it is a result of the nfsv4 server
1038 	 * namespace setup. We will grant an RO permission similar for
1039 	 * a pseudo node except that this node is a shared one.
1040 	 *
1041 	 * e.g. flavor in (flavor) indicates that it is not explictly
1042 	 *	shared by the user:
1043 	 *
1044 	 *		/	(sys, krb5)
1045 	 *		|
1046 	 *		export  #share -o sec=sys (krb5)
1047 	 *		|
1048 	 *		secure  #share -o sec=krb5
1049 	 *
1050 	 *	In this case, when a krb5 request coming in to access
1051 	 *	/export, RO permission is granted.
1052 	 */
1053 	if (!(sp[i].s_flags & M_4SEC_EXPORTED))
1054 		return (mapaccess | NFSAUTH_RO);
1055 
1056 	/*
1057 	 * Optimize if there are no lists
1058 	 */
1059 	perm = sp[i].s_flags;
1060 	if ((perm & (M_ROOT | M_NONE | M_MAP)) == 0) {
1061 		perm &= ~M_4SEC_EXPORTED;
1062 		if (perm == M_RO)
1063 			return (mapaccess | NFSAUTH_RO);
1064 		if (perm == M_RW)
1065 			return (mapaccess | NFSAUTH_RW);
1066 	}
1067 
1068 	access = nfsauth_cache_get(exi, req, flavor, cr, uid, gid);
1069 
1070 	/*
1071 	 * Client's security flavor doesn't match with "ro" or
1072 	 * "rw" list. Try again using AUTH_NONE if present.
1073 	 */
1074 	if ((access & NFSAUTH_WRONGSEC) && (flavor != AUTH_NONE)) {
1075 		/*
1076 		 * Have we already encountered AUTH_NONE ?
1077 		 */
1078 		if (authnone_entry != -1) {
1079 			mapaccess = NFSAUTH_MAPNONE;
1080 			access = nfsauth_cache_get(exi, req, AUTH_NONE, cr,
1081 			    NULL, NULL);
1082 		} else {
1083 			/*
1084 			 * Check for AUTH_NONE presence.
1085 			 */
1086 			for (; i < exi->exi_export.ex_seccnt; i++) {
1087 				if (sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1088 					mapaccess = NFSAUTH_MAPNONE;
1089 					access = nfsauth_cache_get(exi, req,
1090 					    AUTH_NONE, cr, NULL, NULL);
1091 					break;
1092 				}
1093 			}
1094 		}
1095 	}
1096 
1097 	if (access & NFSAUTH_DENIED)
1098 		access = NFSAUTH_DENIED;
1099 
1100 	return (access | mapaccess);
1101 }
1102 
1103 static void
1104 nfsauth_free_node(struct auth_cache *p)
1105 {
1106 	if (p->auth_netid != NULL)
1107 		kmem_free(p->auth_netid, strlen(p->auth_netid) + 1);
1108 	kmem_free(p->auth_addr.buf, p->auth_addr.len);
1109 	mutex_destroy(&p->auth_lock);
1110 	kmem_cache_free(exi_cache_handle, (void *)p);
1111 }
1112 
1113 /*
1114  * Remove the dead entry from the refreshq_dead_entries
1115  * list.
1116  */
1117 static void
1118 nfsauth_remove_dead_entry(struct auth_cache *dead)
1119 {
1120 	struct auth_cache	*p;
1121 	struct auth_cache	*prev;
1122 	struct auth_cache	*next;
1123 
1124 	mutex_enter(&refreshq_lock);
1125 	prev = NULL;
1126 	for (p = refreshq_dead_entries; p != NULL; p = next) {
1127 		next = p->auth_next;
1128 
1129 		if (p == dead) {
1130 			if (prev == NULL)
1131 				refreshq_dead_entries = next;
1132 			else
1133 				prev->auth_next = next;
1134 
1135 			nfsauth_free_node(dead);
1136 			break;
1137 		}
1138 
1139 		prev = p;
1140 	}
1141 	mutex_exit(&refreshq_lock);
1142 }
1143 
1144 /*
1145  * Free the nfsauth cache for a given export
1146  */
1147 void
1148 nfsauth_cache_free(struct exportinfo *exi)
1149 {
1150 	int i;
1151 	struct auth_cache *p, *next;
1152 
1153 	for (i = 0; i < AUTH_TABLESIZE; i++) {
1154 		for (p = exi->exi_cache[i]; p; p = next) {
1155 			next = p->auth_next;
1156 
1157 			/*
1158 			 * The only way we got here
1159 			 * was with an exi_rele, which
1160 			 * means that no auth cache entry
1161 			 * is being refreshed.
1162 			 */
1163 			nfsauth_free_node(p);
1164 		}
1165 	}
1166 }
1167 
1168 /*
1169  * Called by the kernel memory allocator when
1170  * memory is low. Free unused cache entries.
1171  * If that's not enough, the VM system will
1172  * call again for some more.
1173  */
1174 /*ARGSUSED*/
1175 void
1176 exi_cache_reclaim(void *cdrarg)
1177 {
1178 	int i;
1179 	struct exportinfo *exi;
1180 
1181 	rw_enter(&exported_lock, RW_READER);
1182 
1183 	for (i = 0; i < EXPTABLESIZE; i++) {
1184 		for (exi = exptable[i]; exi; exi = exi->fid_hash.next) {
1185 			exi_cache_trim(exi);
1186 		}
1187 	}
1188 	nfsauth_cache_reclaim++;
1189 
1190 	rw_exit(&exported_lock);
1191 }
1192 
1193 void
1194 exi_cache_trim(struct exportinfo *exi)
1195 {
1196 	struct auth_cache *p;
1197 	struct auth_cache *prev, *next;
1198 	int i;
1199 	time_t stale_time;
1200 
1201 	stale_time = gethrestime_sec() - NFSAUTH_CACHE_TRIM;
1202 
1203 	rw_enter(&exi->exi_cache_lock, RW_WRITER);
1204 
1205 	for (i = 0; i < AUTH_TABLESIZE; i++) {
1206 
1207 		/*
1208 		 * Free entries that have not been
1209 		 * used for NFSAUTH_CACHE_TRIM seconds.
1210 		 */
1211 		prev = NULL;
1212 		for (p = exi->exi_cache[i]; p; p = next) {
1213 			next = p->auth_next;
1214 			if (p->auth_time > stale_time) {
1215 				prev = p;
1216 				continue;
1217 			}
1218 
1219 			mutex_enter(&p->auth_lock);
1220 			DTRACE_PROBE1(nfsauth__debug__trim__state,
1221 			    auth_state_t, p->auth_state);
1222 
1223 			if (p->auth_state != NFS_AUTH_FRESH) {
1224 				p->auth_state = NFS_AUTH_INVALID;
1225 				mutex_exit(&p->auth_lock);
1226 
1227 				mutex_enter(&refreshq_lock);
1228 				p->auth_next = refreshq_dead_entries;
1229 				refreshq_dead_entries = p;
1230 				mutex_exit(&refreshq_lock);
1231 			} else {
1232 				mutex_exit(&p->auth_lock);
1233 				nfsauth_free_node(p);
1234 			}
1235 
1236 			if (prev == NULL)
1237 				exi->exi_cache[i] = next;
1238 			else
1239 				prev->auth_next = next;
1240 		}
1241 	}
1242 
1243 	rw_exit(&exi->exi_cache_lock);
1244 }
1245