xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c (revision 3e2c06821003697f97716f7c084864c5bf606aa3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/systm.h>
27 #include <rpc/auth.h>
28 #include <rpc/clnt.h>
29 #include <nfs/nfs4_kprot.h>
30 #include <nfs/nfs4.h>
31 #include <nfs/lm.h>
32 #include <sys/cmn_err.h>
33 #include <sys/disp.h>
34 #include <sys/sdt.h>
35 
36 #include <sys/pathname.h>
37 
38 #include <sys/strsubr.h>
39 #include <sys/ddi.h>
40 
41 #include <sys/vnode.h>
42 #include <sys/sdt.h>
43 #include <inet/common.h>
44 #include <inet/ip.h>
45 #include <inet/ip6.h>
46 
47 #define	MAX_READ_DELEGATIONS 5
48 
49 krwlock_t rfs4_deleg_policy_lock;
50 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
51 static int rfs4_deleg_wlp = 5;
52 kmutex_t rfs4_deleg_lock;
53 static int rfs4_deleg_disabled;
54 static int rfs4_max_setup_cb_tries = 5;
55 
56 #ifdef DEBUG
57 
58 static int rfs4_test_cbgetattr_fail = 0;
59 int rfs4_cb_null;
60 int rfs4_cb_debug;
61 int rfs4_deleg_debug;
62 
63 #endif
64 
65 static void rfs4_recall_file(rfs4_file_t *,
66     void (*recall)(rfs4_deleg_state_t *, bool_t),
67     bool_t, rfs4_client_t *);
68 static	void		rfs4_revoke_file(rfs4_file_t *);
69 static	void		rfs4_cb_chflush(rfs4_cbinfo_t *);
70 static	CLIENT		*rfs4_cb_getch(rfs4_cbinfo_t *);
71 static	void		rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
73     open_delegation_type4, int *);
74 
75 /*
76  * Convert a universal address to an transport specific
77  * address using inet_pton.
78  */
79 static int
80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
81 {
82 	int dots = 0, i, j, len, k;
83 	unsigned char c;
84 	in_port_t port = 0;
85 
86 	len = strlen(ua);
87 
88 	for (i = len-1; i >= 0; i--) {
89 
90 		if (ua[i] == '.')
91 			dots++;
92 
93 		if (dots == 2) {
94 
95 			ua[i] = '\0';
96 			/*
97 			 * We use k to remember were to stick '.' back, since
98 			 * ua was kmem_allocateded from the pool len+1.
99 			 */
100 			k = i;
101 			if (inet_pton(af, ua, ap) == 1) {
102 
103 				c = 0;
104 
105 				for (j = i+1; j < len; j++) {
106 					if (ua[j] == '.') {
107 						port = c << 8;
108 						c = 0;
109 					} else if (ua[j] >= '0' &&
110 					    ua[j] <= '9') {
111 						c *= 10;
112 						c += ua[j] - '0';
113 					} else {
114 						ua[k] = '.';
115 						return (EINVAL);
116 					}
117 				}
118 				port += c;
119 
120 
121 				/* reset to network order */
122 				if (af == AF_INET) {
123 					*(uint32_t *)ap =
124 					    htonl(*(uint32_t *)ap);
125 					*pp = htons(port);
126 				} else {
127 					int ix;
128 					uint16_t *sap;
129 
130 					for (sap = ap, ix = 0; ix <
131 					    sizeof (struct in6_addr) /
132 					    sizeof (uint16_t); ix++)
133 						sap[ix] = htons(sap[ix]);
134 
135 					*pp = htons(port);
136 				}
137 
138 				ua[k] = '.';
139 				return (0);
140 			} else {
141 				ua[k] = '.';
142 				return (EINVAL);
143 			}
144 		}
145 	}
146 
147 	return (EINVAL);
148 }
149 
150 /*
151  * Update the delegation policy with the
152  * value of "new_policy"
153  */
154 void
155 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
156 {
157 	rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
158 	rfs4_deleg_policy = new_policy;
159 	rw_exit(&rfs4_deleg_policy_lock);
160 }
161 
162 void
163 rfs4_hold_deleg_policy(void)
164 {
165 	rw_enter(&rfs4_deleg_policy_lock, RW_READER);
166 }
167 
168 void
169 rfs4_rele_deleg_policy(void)
170 {
171 	rw_exit(&rfs4_deleg_policy_lock);
172 }
173 
174 
175 /*
176  * This free function is to be used when the client struct is being
177  * released and nothing at all is needed of the callback info any
178  * longer.
179  */
180 void
181 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
182 {
183 	char *addr = cbp->cb_callback.cb_location.r_addr;
184 	char *netid = cbp->cb_callback.cb_location.r_netid;
185 
186 	/* Free old address if any */
187 
188 	if (addr)
189 		kmem_free(addr, strlen(addr) + 1);
190 	if (netid)
191 		kmem_free(netid, strlen(netid) + 1);
192 
193 	addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
194 	netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
195 
196 	if (addr)
197 		kmem_free(addr, strlen(addr) + 1);
198 	if (netid)
199 		kmem_free(netid, strlen(netid) + 1);
200 
201 	if (cbp->cb_chc_free) {
202 		rfs4_cb_chflush(cbp);
203 	}
204 }
205 
206 /*
207  * The server uses this to check the callback path supplied by the
208  * client.  The callback connection is marked "in progress" while this
209  * work is going on and then eventually marked either OK or FAILED.
210  * This work can be done as part of a separate thread and at the end
211  * of this the thread will exit or it may be done such that the caller
212  * will continue with other work.
213  */
214 static void
215 rfs4_do_cb_null(rfs4_client_t *cp)
216 {
217 	struct timeval tv;
218 	CLIENT *ch;
219 	rfs4_cbstate_t newstate;
220 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
221 
222 	mutex_enter(cbp->cb_lock);
223 	/* If another thread is doing CB_NULL RPC then return */
224 	if (cbp->cb_nullcaller == TRUE) {
225 		mutex_exit(cbp->cb_lock);
226 		rfs4_client_rele(cp);
227 		return;
228 	}
229 
230 	/* Mark the cbinfo as having a thread in the NULL callback */
231 	cbp->cb_nullcaller = TRUE;
232 
233 	/*
234 	 * Are there other threads still using the cbinfo client
235 	 * handles?  If so, this thread must wait before going and
236 	 * mucking aroiund with the callback information
237 	 */
238 	while (cbp->cb_refcnt != 0)
239 		cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
240 
241 	/*
242 	 * This thread itself may find that new callback info has
243 	 * arrived and is set up to handle this case and redrive the
244 	 * call to the client's callback server.
245 	 */
246 retry:
247 	if (cbp->cb_newer.cb_new == TRUE &&
248 	    cbp->cb_newer.cb_confirmed == TRUE) {
249 		char *addr = cbp->cb_callback.cb_location.r_addr;
250 		char *netid = cbp->cb_callback.cb_location.r_netid;
251 
252 		/*
253 		 * Free the old stuff if it exists; may be the first
254 		 * time through this path
255 		 */
256 		if (addr)
257 			kmem_free(addr, strlen(addr) + 1);
258 		if (netid)
259 			kmem_free(netid, strlen(netid) + 1);
260 
261 		/* Move over the addr/netid */
262 		cbp->cb_callback.cb_location.r_addr =
263 		    cbp->cb_newer.cb_callback.cb_location.r_addr;
264 		cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
265 		cbp->cb_callback.cb_location.r_netid =
266 		    cbp->cb_newer.cb_callback.cb_location.r_netid;
267 		cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
268 
269 		/* Get the program number */
270 		cbp->cb_callback.cb_program =
271 		    cbp->cb_newer.cb_callback.cb_program;
272 		cbp->cb_newer.cb_callback.cb_program = 0;
273 
274 		/* Don't forget the protocol's "cb_ident" field */
275 		cbp->cb_ident = cbp->cb_newer.cb_ident;
276 		cbp->cb_newer.cb_ident = 0;
277 
278 		/* no longer new */
279 		cbp->cb_newer.cb_new = FALSE;
280 		cbp->cb_newer.cb_confirmed = FALSE;
281 
282 		/* get rid of the old client handles that may exist */
283 		rfs4_cb_chflush(cbp);
284 
285 		cbp->cb_state = CB_NONE;
286 		cbp->cb_timefailed = 0; /* reset the clock */
287 		cbp->cb_notified_of_cb_path_down = TRUE;
288 	}
289 
290 	if (cbp->cb_state != CB_NONE) {
291 		cv_broadcast(cbp->cb_cv);	/* let the others know */
292 		cbp->cb_nullcaller = FALSE;
293 		mutex_exit(cbp->cb_lock);
294 		rfs4_client_rele(cp);
295 		return;
296 	}
297 
298 	/* mark rfs4_client_t as CALLBACK NULL in progress */
299 	cbp->cb_state = CB_INPROG;
300 	mutex_exit(cbp->cb_lock);
301 
302 	/* get/generate a client handle */
303 	if ((ch = rfs4_cb_getch(cbp)) == NULL) {
304 		mutex_enter(cbp->cb_lock);
305 		cbp->cb_state = CB_BAD;
306 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
307 		goto retry;
308 	}
309 
310 
311 	tv.tv_sec = 30;
312 	tv.tv_usec = 0;
313 	if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
314 		newstate = CB_BAD;
315 	} else {
316 		newstate = CB_OK;
317 #ifdef	DEBUG
318 		rfs4_cb_null++;
319 #endif
320 	}
321 
322 	/* Check to see if the client has specified new callback info */
323 	mutex_enter(cbp->cb_lock);
324 	rfs4_cb_freech(cbp, ch, TRUE);
325 	if (cbp->cb_newer.cb_new == TRUE &&
326 	    cbp->cb_newer.cb_confirmed == TRUE) {
327 		goto retry;	/* give the CB_NULL another chance */
328 	}
329 
330 	cbp->cb_state = newstate;
331 	if (cbp->cb_state == CB_BAD)
332 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
333 
334 	cv_broadcast(cbp->cb_cv);	/* start up the other threads */
335 	cbp->cb_nullcaller = FALSE;
336 	mutex_exit(cbp->cb_lock);
337 
338 	rfs4_client_rele(cp);
339 }
340 
341 /*
342  * Given a client struct, inspect the callback info to see if the
343  * callback path is up and available.
344  *
345  * If new callback path is available and no one has set it up then
346  * try to set it up. If setup is not successful after 5 tries (5 secs)
347  * then gives up and returns NULL.
348  *
349  * If callback path is being initialized, then wait for the CB_NULL RPC
350  * call to occur.
351  */
352 static rfs4_cbinfo_t *
353 rfs4_cbinfo_hold(rfs4_client_t *cp)
354 {
355 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
356 	int retries = 0;
357 
358 	mutex_enter(cbp->cb_lock);
359 
360 	while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
361 		/*
362 		 * Looks like a new callback path may be available and
363 		 * noone has set it up.
364 		 */
365 		mutex_exit(cbp->cb_lock);
366 		rfs4_dbe_hold(cp->rc_dbe);
367 		rfs4_do_cb_null(cp); /* caller will release client hold */
368 
369 		mutex_enter(cbp->cb_lock);
370 		/*
371 		 * If callback path is no longer new, or it's being setup
372 		 * then stop and wait for it to be done.
373 		 */
374 		if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
375 			break;
376 		mutex_exit(cbp->cb_lock);
377 
378 		if (++retries >= rfs4_max_setup_cb_tries)
379 			return (NULL);
380 		delay(hz);
381 		mutex_enter(cbp->cb_lock);
382 	}
383 
384 	/* Is there a thread working on doing the CB_NULL RPC? */
385 	if (cbp->cb_nullcaller == TRUE)
386 		cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
387 
388 	/* If the callback path is not okay (up and running), just quit */
389 	if (cbp->cb_state != CB_OK) {
390 		mutex_exit(cbp->cb_lock);
391 		return (NULL);
392 	}
393 
394 	/* Let someone know we are using the current callback info */
395 	cbp->cb_refcnt++;
396 	mutex_exit(cbp->cb_lock);
397 	return (cbp);
398 }
399 
400 /*
401  * The caller is done with the callback info.  It may be that the
402  * caller's RPC failed and the NFSv4 client has actually provided new
403  * callback information.  If so, let the caller know so they can
404  * advantage of this and maybe retry the RPC that originally failed.
405  */
406 static int
407 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
408 {
409 	int cb_new = FALSE;
410 
411 	mutex_enter(cbp->cb_lock);
412 
413 	/* The caller gets a chance to mark the callback info as bad */
414 	if (newstate != CB_NOCHANGE)
415 		cbp->cb_state = newstate;
416 	if (newstate == CB_FAILED) {
417 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
418 		cbp->cb_notified_of_cb_path_down = FALSE;
419 	}
420 
421 	cbp->cb_refcnt--;	/* no longer using the information */
422 
423 	/*
424 	 * A thread may be waiting on this one to finish and if so,
425 	 * let it know that it is okay to do the CB_NULL to the
426 	 * client's callback server.
427 	 */
428 	if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
429 		cv_broadcast(cbp->cb_cv_nullcaller);
430 
431 	/*
432 	 * If this is the last thread to use the callback info and
433 	 * there is new callback information to try and no thread is
434 	 * there ready to do the CB_NULL, then return true to teh
435 	 * caller so they can do the CB_NULL
436 	 */
437 	if (cbp->cb_refcnt == 0 &&
438 	    cbp->cb_nullcaller == FALSE &&
439 	    cbp->cb_newer.cb_new == TRUE &&
440 	    cbp->cb_newer.cb_confirmed == TRUE)
441 		cb_new = TRUE;
442 
443 	mutex_exit(cbp->cb_lock);
444 
445 	return (cb_new);
446 }
447 
448 /*
449  * Given the information in the callback info struct, create a client
450  * handle that can be used by the server for its callback path.
451  */
452 static CLIENT *
453 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
454 {
455 	struct knetconfig knc;
456 	vnode_t *vp;
457 	struct sockaddr_in addr4;
458 	struct sockaddr_in6 addr6;
459 	void *addr, *taddr;
460 	in_port_t *pp;
461 	int af;
462 	char *devnam;
463 	struct netbuf nb;
464 	int size;
465 	CLIENT *ch = NULL;
466 	int useresvport = 0;
467 
468 	mutex_enter(cbp->cb_lock);
469 
470 	if (cbp->cb_callback.cb_location.r_netid == NULL ||
471 	    cbp->cb_callback.cb_location.r_addr == NULL) {
472 		goto cb_init_out;
473 	}
474 
475 	if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
476 		knc.knc_semantics = NC_TPI_COTS;
477 		knc.knc_protofmly = "inet";
478 		knc.knc_proto = "tcp";
479 		devnam = "/dev/tcp";
480 		af = AF_INET;
481 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
482 	    == 0) {
483 		knc.knc_semantics = NC_TPI_CLTS;
484 		knc.knc_protofmly = "inet";
485 		knc.knc_proto = "udp";
486 		devnam = "/dev/udp";
487 		af = AF_INET;
488 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
489 	    == 0) {
490 		knc.knc_semantics = NC_TPI_COTS;
491 		knc.knc_protofmly = "inet6";
492 		knc.knc_proto = "tcp";
493 		devnam = "/dev/tcp6";
494 		af = AF_INET6;
495 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
496 	    == 0) {
497 		knc.knc_semantics = NC_TPI_CLTS;
498 		knc.knc_protofmly = "inet6";
499 		knc.knc_proto = "udp";
500 		devnam = "/dev/udp6";
501 		af = AF_INET6;
502 	} else {
503 		goto cb_init_out;
504 	}
505 
506 	if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
507 
508 		goto cb_init_out;
509 	}
510 
511 	if (vp->v_type != VCHR) {
512 		VN_RELE(vp);
513 		goto cb_init_out;
514 	}
515 
516 	knc.knc_rdev = vp->v_rdev;
517 
518 	VN_RELE(vp);
519 
520 	if (af == AF_INET) {
521 		size = sizeof (addr4);
522 		bzero(&addr4, size);
523 		addr4.sin_family = (sa_family_t)af;
524 		addr = &addr4.sin_addr;
525 		pp = &addr4.sin_port;
526 		taddr = &addr4;
527 	} else /* AF_INET6 */ {
528 		size = sizeof (addr6);
529 		bzero(&addr6, size);
530 		addr6.sin6_family = (sa_family_t)af;
531 		addr = &addr6.sin6_addr;
532 		pp = &addr6.sin6_port;
533 		taddr = &addr6;
534 	}
535 
536 	if (uaddr2sockaddr(af,
537 	    cbp->cb_callback.cb_location.r_addr, addr, pp)) {
538 
539 		goto cb_init_out;
540 	}
541 
542 
543 	nb.maxlen = nb.len = size;
544 	nb.buf = (char *)taddr;
545 
546 	if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
547 	    NFS_CB, 0, 0, curthread->t_cred, &ch)) {
548 
549 		ch = NULL;
550 	}
551 
552 	/* turn off reserved port usage */
553 	(void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
554 
555 cb_init_out:
556 	mutex_exit(cbp->cb_lock);
557 	return (ch);
558 }
559 
560 /*
561  * Iterate over the client handle cache and
562  * destroy it.
563  */
564 static void
565 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
566 {
567 	CLIENT *ch;
568 
569 	while (cbp->cb_chc_free) {
570 		cbp->cb_chc_free--;
571 		ch = cbp->cb_chc[cbp->cb_chc_free];
572 		cbp->cb_chc[cbp->cb_chc_free] = NULL;
573 		if (ch) {
574 			if (ch->cl_auth)
575 				auth_destroy(ch->cl_auth);
576 			clnt_destroy(ch);
577 		}
578 	}
579 }
580 
581 /*
582  * Return a client handle, either from a the small
583  * rfs4_client_t cache or one that we just created.
584  */
585 static CLIENT *
586 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
587 {
588 	CLIENT *cbch = NULL;
589 	uint32_t zilch = 0;
590 
591 	mutex_enter(cbp->cb_lock);
592 
593 	if (cbp->cb_chc_free) {
594 		cbp->cb_chc_free--;
595 		cbch = cbp->cb_chc[ cbp->cb_chc_free ];
596 		mutex_exit(cbp->cb_lock);
597 		(void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
598 		return (cbch);
599 	}
600 
601 	mutex_exit(cbp->cb_lock);
602 
603 	/* none free so make it now */
604 	cbch = rfs4_cbch_init(cbp);
605 
606 	return (cbch);
607 }
608 
609 /*
610  * Return the client handle to the small cache or
611  * destroy it.
612  */
613 static void
614 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
615 {
616 	if (lockheld == FALSE)
617 		mutex_enter(cbp->cb_lock);
618 
619 	if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
620 		cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
621 		if (lockheld == FALSE)
622 			mutex_exit(cbp->cb_lock);
623 		return;
624 	}
625 	if (lockheld == FALSE)
626 		mutex_exit(cbp->cb_lock);
627 
628 	/*
629 	 * cache maxed out of free entries, obliterate
630 	 * this client handle, destroy it, throw it away.
631 	 */
632 	if (ch->cl_auth)
633 		auth_destroy(ch->cl_auth);
634 	clnt_destroy(ch);
635 }
636 
637 /*
638  * With the supplied callback information - initialize the client
639  * callback data.  If there is a callback in progress, save the
640  * callback info so that a thread can pick it up in the future.
641  */
642 void
643 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
644 {
645 	char *addr = NULL;
646 	char *netid = NULL;
647 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
648 	size_t len;
649 
650 	/* Set the call back for the client */
651 	if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
652 	    cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
653 		len = strlen(cb->cb_location.r_addr) + 1;
654 		addr = kmem_alloc(len, KM_SLEEP);
655 		bcopy(cb->cb_location.r_addr, addr, len);
656 		len = strlen(cb->cb_location.r_netid) + 1;
657 		netid = kmem_alloc(len, KM_SLEEP);
658 		bcopy(cb->cb_location.r_netid, netid, len);
659 	}
660 	/* ready to save the new information but first free old, if exists */
661 	mutex_enter(cbp->cb_lock);
662 
663 	cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
664 
665 	if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
666 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
667 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
668 	cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
669 
670 	if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
671 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
672 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
673 	cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
674 
675 	cbp->cb_newer.cb_ident = cb_ident;
676 
677 	if (addr && *addr && netid && *netid) {
678 		cbp->cb_newer.cb_new = TRUE;
679 		cbp->cb_newer.cb_confirmed = FALSE;
680 	} else {
681 		cbp->cb_newer.cb_new = FALSE;
682 		cbp->cb_newer.cb_confirmed = FALSE;
683 	}
684 
685 	mutex_exit(cbp->cb_lock);
686 }
687 
688 /*
689  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
690  * information may have been provided on SETCLIENTID and this call
691  * marks that information as confirmed and then starts a thread to
692  * test the callback path.
693  */
694 void
695 rfs4_deleg_cb_check(rfs4_client_t *cp)
696 {
697 	if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
698 		return;
699 
700 	cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
701 
702 	rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
703 
704 	(void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
705 	    minclsyspri);
706 }
707 
708 static void
709 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
710 {
711 	CB_RECALL4args	*rec_argp;
712 
713 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
714 	if (rec_argp->fh.nfs_fh4_val)
715 		kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
716 }
717 
718 /* ARGSUSED */
719 static void
720 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
721 {
722 	CB_GETATTR4args *argp;
723 
724 	argp = &argop->nfs_cb_argop4_u.opcbgetattr;
725 	if (argp->fh.nfs_fh4_val)
726 		kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
727 }
728 
729 static void
730 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
731 {
732 	int i, arglen;
733 	nfs_cb_argop4 *argop;
734 
735 	/*
736 	 * First free any special args alloc'd for specific ops.
737 	 */
738 	arglen = args->array_len;
739 	argop = args->array;
740 	for (i = 0; i < arglen; i++, argop++) {
741 
742 		switch (argop->argop) {
743 		case OP_CB_RECALL:
744 			rfs4args_cb_recall_free(argop);
745 			break;
746 
747 		case OP_CB_GETATTR:
748 			rfs4args_cb_getattr_free(argop);
749 			break;
750 
751 		default:
752 			return;
753 		}
754 	}
755 
756 	if (args->tag.utf8string_len > 0)
757 		UTF8STRING_FREE(args->tag)
758 
759 	kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
760 	if (resp)
761 		(void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
762 }
763 
764 /*
765  * General callback routine for the server to the client.
766  */
767 static enum clnt_stat
768 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
769     CB_COMPOUND4res *res, struct timeval timeout)
770 {
771 	rfs4_cbinfo_t *cbp;
772 	CLIENT *ch;
773 	/* start with this in case cb_getch() fails */
774 	enum clnt_stat	stat = RPC_FAILED;
775 
776 	res->tag.utf8string_val = NULL;
777 	res->array = NULL;
778 
779 retry:
780 	cbp = rfs4_cbinfo_hold(cp);
781 	if (cbp == NULL)
782 		return (stat);
783 
784 	/* get a client handle */
785 	if ((ch = rfs4_cb_getch(cbp)) != NULL) {
786 		/*
787 		 * reset the cb_ident since it may have changed in
788 		 * rfs4_cbinfo_hold()
789 		 */
790 		args->callback_ident = cbp->cb_ident;
791 
792 		stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
793 		    (caddr_t)args, xdr_CB_COMPOUND4res,
794 		    (caddr_t)res, timeout);
795 
796 		/* free client handle */
797 		rfs4_cb_freech(cbp, ch, FALSE);
798 	}
799 
800 	/*
801 	 * If the rele says that there may be new callback info then
802 	 * retry this sequence and it may succeed as a result of the
803 	 * new callback path
804 	 */
805 	if (rfs4_cbinfo_rele(cbp,
806 	    (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
807 		goto retry;
808 
809 	return (stat);
810 }
811 
812 /*
813  * Used by the NFSv4 server to get attributes for a file while
814  * handling the case where a file has been write delegated.  For the
815  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
816  * not undertaken.  This call site is maintained in case the server is
817  * updated in the future to handle write delegation space guarantees.
818  */
819 nfsstat4
820 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
821 {
822 
823 	int error;
824 
825 	error = VOP_GETATTR(vp, vap, flag, cr, NULL);
826 	return (puterrno4(error));
827 }
828 
829 /*
830  * This is used everywhere in the v2/v3 server to allow the
831  * integration of all NFS versions and the support of delegation.  For
832  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
833  * in the future to provide space guarantees for write delegations
834  * then this call site should be expanded to interact with the client.
835  */
836 int
837 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
838 {
839 	return (VOP_GETATTR(vp, vap, flag, cr, NULL));
840 }
841 
842 /*
843  * Place the actual cb_recall otw call to client.
844  */
845 static void
846 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
847 {
848 	CB_COMPOUND4args	cb4_args;
849 	CB_COMPOUND4res		cb4_res;
850 	CB_RECALL4args		*rec_argp;
851 	CB_RECALL4res		*rec_resp;
852 	nfs_cb_argop4		*argop;
853 	int			numops;
854 	int			argoplist_size;
855 	struct timeval		timeout;
856 	nfs_fh4			*fhp;
857 	enum clnt_stat		call_stat;
858 
859 	/*
860 	 * set up the compound args
861 	 */
862 	numops = 1;	/* CB_RECALL only */
863 
864 	argoplist_size = numops * sizeof (nfs_cb_argop4);
865 	argop = kmem_zalloc(argoplist_size, KM_SLEEP);
866 	argop->argop = OP_CB_RECALL;
867 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
868 
869 	(void) str_to_utf8("cb_recall", &cb4_args.tag);
870 	cb4_args.minorversion = CB4_MINORVERSION;
871 	/* cb4_args.callback_ident is set in rfs4_do_callback() */
872 	cb4_args.array_len = numops;
873 	cb4_args.array = argop;
874 
875 	/*
876 	 * fill in the args struct
877 	 */
878 	bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
879 	rec_argp->truncate = trunc;
880 
881 	fhp = &dsp->rds_finfo->rf_filehandle;
882 	rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
883 	    fhp->nfs_fh4_len, KM_SLEEP);
884 	nfs_fh4_copy(fhp, &rec_argp->fh);
885 
886 	/* Keep track of when we did this for observability */
887 	dsp->rds_time_recalled = gethrestime_sec();
888 
889 	/*
890 	 * Set up the timeout for the callback and make the actual call.
891 	 * Timeout will be 80% of the lease period for this server.
892 	 */
893 	timeout.tv_sec = (rfs4_lease_time * 80) / 100;
894 	timeout.tv_usec = 0;
895 
896 	DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
897 	    rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
898 
899 	call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
900 	    timeout);
901 
902 	rec_resp = (cb4_res.array_len == 0) ? NULL :
903 	    &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
904 
905 	DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
906 	    rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
907 
908 	if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
909 		rfs4_return_deleg(dsp, TRUE);
910 	}
911 
912 	rfs4freeargres(&cb4_args, &cb4_res);
913 }
914 
915 struct recall_arg {
916 	rfs4_deleg_state_t *dsp;
917 	void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
918 	bool_t trunc;
919 };
920 
921 static void
922 do_recall(struct recall_arg *arg)
923 {
924 	rfs4_deleg_state_t *dsp = arg->dsp;
925 	rfs4_file_t *fp = dsp->rds_finfo;
926 	callb_cpr_t cpr_info;
927 	kmutex_t cpr_lock;
928 
929 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
930 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
931 
932 	/*
933 	 * It is possible that before this thread starts
934 	 * the client has send us a return_delegation, and
935 	 * if that is the case we do not need to send the
936 	 * recall callback.
937 	 */
938 	if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
939 		DTRACE_PROBE3(nfss__i__recall,
940 		    struct recall_arg *, arg,
941 		    struct rfs4_deleg_state_t *, dsp,
942 		    struct rfs4_file_t *, fp);
943 
944 		if (arg->recall)
945 			(void) (*arg->recall)(dsp, arg->trunc);
946 	}
947 
948 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
949 	/*
950 	 * Recall count may go negative if the parent thread that is
951 	 * creating the individual callback threads does not modify
952 	 * the recall_count field before the callback thread actually
953 	 * gets a response from the CB_RECALL
954 	 */
955 	fp->rf_dinfo.rd_recall_count--;
956 	if (fp->rf_dinfo.rd_recall_count == 0)
957 		cv_signal(fp->rf_dinfo.rd_recall_cv);
958 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
959 
960 	mutex_enter(&cpr_lock);
961 	CALLB_CPR_EXIT(&cpr_info);
962 	mutex_destroy(&cpr_lock);
963 
964 	rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
965 
966 	kmem_free(arg, sizeof (struct recall_arg));
967 }
968 
969 struct master_recall_args {
970     rfs4_file_t *fp;
971     void (*recall)(rfs4_deleg_state_t *, bool_t);
972     bool_t trunc;
973 };
974 
975 static void
976 do_recall_file(struct master_recall_args *map)
977 {
978 	rfs4_file_t *fp = map->fp;
979 	rfs4_deleg_state_t *dsp;
980 	struct recall_arg *arg;
981 	callb_cpr_t cpr_info;
982 	kmutex_t cpr_lock;
983 	int32_t recall_count;
984 
985 	rfs4_dbe_lock(fp->rf_dbe);
986 
987 	/* Recall already in progress ? */
988 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
989 	if (fp->rf_dinfo.rd_recall_count != 0) {
990 		mutex_exit(fp->rf_dinfo.rd_recall_lock);
991 		rfs4_dbe_rele_nolock(fp->rf_dbe);
992 		rfs4_dbe_unlock(fp->rf_dbe);
993 		kmem_free(map, sizeof (struct master_recall_args));
994 		return;
995 	}
996 
997 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
998 
999 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
1000 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,	"v4RecallFile");
1001 
1002 	recall_count = 0;
1003 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1004 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1005 
1006 		rfs4_dbe_lock(dsp->rds_dbe);
1007 		/*
1008 		 * if this delegation state
1009 		 * is being reaped skip it
1010 		 */
1011 		if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1012 			rfs4_dbe_unlock(dsp->rds_dbe);
1013 			continue;
1014 		}
1015 
1016 		/* hold for receiving thread */
1017 		rfs4_dbe_hold(dsp->rds_dbe);
1018 		rfs4_dbe_unlock(dsp->rds_dbe);
1019 
1020 		arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1021 		arg->recall = map->recall;
1022 		arg->trunc = map->trunc;
1023 		arg->dsp = dsp;
1024 
1025 		recall_count++;
1026 
1027 		(void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1028 		    minclsyspri);
1029 	}
1030 
1031 	rfs4_dbe_unlock(fp->rf_dbe);
1032 
1033 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
1034 	/*
1035 	 * Recall count may go negative if the parent thread that is
1036 	 * creating the individual callback threads does not modify
1037 	 * the recall_count field before the callback thread actually
1038 	 * gets a response from the CB_RECALL
1039 	 */
1040 	fp->rf_dinfo.rd_recall_count += recall_count;
1041 	while (fp->rf_dinfo.rd_recall_count)
1042 		cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1043 
1044 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
1045 
1046 	DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1047 	rfs4_file_rele(fp);
1048 	kmem_free(map, sizeof (struct master_recall_args));
1049 	mutex_enter(&cpr_lock);
1050 	CALLB_CPR_EXIT(&cpr_info);
1051 	mutex_destroy(&cpr_lock);
1052 }
1053 
1054 static void
1055 rfs4_recall_file(rfs4_file_t *fp,
1056     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1057     bool_t trunc, rfs4_client_t *cp)
1058 {
1059 	struct master_recall_args *args;
1060 
1061 	rfs4_dbe_lock(fp->rf_dbe);
1062 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1063 		rfs4_dbe_unlock(fp->rf_dbe);
1064 		return;
1065 	}
1066 	rfs4_dbe_hold(fp->rf_dbe);	/* hold for new thread */
1067 
1068 	/*
1069 	 * Mark the time we started the recall processing.
1070 	 * If it has been previously recalled, do not reset the
1071 	 * timer since this is used for the revocation decision.
1072 	 */
1073 	if (fp->rf_dinfo.rd_time_recalled == 0)
1074 		fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1075 	fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1076 	/* Client causing recall not always available */
1077 	if (cp)
1078 		fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1079 
1080 	rfs4_dbe_unlock(fp->rf_dbe);
1081 
1082 	args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1083 	args->fp = fp;
1084 	args->recall = recall;
1085 	args->trunc = trunc;
1086 
1087 	(void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1088 	    minclsyspri);
1089 }
1090 
1091 void
1092 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1093 {
1094 	time_t elapsed1, elapsed2;
1095 
1096 	if (fp->rf_dinfo.rd_time_recalled != 0) {
1097 		elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1098 		elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1099 		/* First check to see if a revocation should occur */
1100 		if (elapsed1 > rfs4_lease_time &&
1101 		    elapsed2 > rfs4_lease_time) {
1102 			rfs4_revoke_file(fp);
1103 			return;
1104 		}
1105 		/*
1106 		 * Next check to see if a recall should be done again
1107 		 * so quickly.
1108 		 */
1109 		if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1110 			return;
1111 	}
1112 	rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1113 }
1114 
1115 /*
1116  * rfs4_check_recall is called from rfs4_do_open to determine if the current
1117  * open conflicts with the delegation.
1118  * Return true if we need recall otherwise false.
1119  * Assumes entry locks for sp and sp->rs_finfo are held.
1120  */
1121 bool_t
1122 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1123 {
1124 	open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1125 
1126 	switch (dtype) {
1127 	case OPEN_DELEGATE_NONE:
1128 		/* Not currently delegated so there is nothing to do */
1129 		return (FALSE);
1130 	case OPEN_DELEGATE_READ:
1131 		/*
1132 		 * If the access is only asking for READ then there is
1133 		 * no conflict and nothing to do.  If it is asking
1134 		 * for write, then there will be conflict and the read
1135 		 * delegation should be recalled.
1136 		 */
1137 		if (access == OPEN4_SHARE_ACCESS_READ)
1138 			return (FALSE);
1139 		else
1140 			return (TRUE);
1141 	case OPEN_DELEGATE_WRITE:
1142 		/* Check to see if this client has the delegation */
1143 		return (rfs4_is_deleg(sp));
1144 	}
1145 
1146 	return (FALSE);
1147 }
1148 
1149 /*
1150  * Return the "best" allowable delegation available given the current
1151  * delegation type and the desired access and deny modes on the file.
1152  * At the point that this routine is called we know that the access and
1153  * deny modes are consistent with the file modes.
1154  */
1155 static open_delegation_type4
1156 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1157 {
1158 	open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1159 	uint32_t access = sp->rs_share_access;
1160 	uint32_t deny = sp->rs_share_deny;
1161 	int readcnt = 0;
1162 	int writecnt = 0;
1163 
1164 	switch (dtype) {
1165 	case OPEN_DELEGATE_NONE:
1166 		/*
1167 		 * Determine if more than just this OPEN have the file
1168 		 * open and if so, no delegation may be provided to
1169 		 * the client.
1170 		 */
1171 		if (access & OPEN4_SHARE_ACCESS_WRITE)
1172 			writecnt++;
1173 		if (access & OPEN4_SHARE_ACCESS_READ)
1174 			readcnt++;
1175 
1176 		if (fp->rf_access_read > readcnt ||
1177 		    fp->rf_access_write > writecnt)
1178 			return (OPEN_DELEGATE_NONE);
1179 
1180 		/*
1181 		 * If the client is going to write, or if the client
1182 		 * has exclusive access, return a write delegation.
1183 		 */
1184 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1185 		    (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1186 			return (OPEN_DELEGATE_WRITE);
1187 		/*
1188 		 * If we don't want to write or we've haven't denied read
1189 		 * access to others, return a read delegation.
1190 		 */
1191 		if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1192 		    (deny & ~OPEN4_SHARE_DENY_READ))
1193 			return (OPEN_DELEGATE_READ);
1194 
1195 		/* Shouldn't get here */
1196 		return (OPEN_DELEGATE_NONE);
1197 
1198 	case OPEN_DELEGATE_READ:
1199 		/*
1200 		 * If the file is delegated for read but we wan't to
1201 		 * write or deny others to read then we can't delegate
1202 		 * the file. We shouldn't get here since the delegation should
1203 		 * have been recalled already.
1204 		 */
1205 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1206 		    (deny & OPEN4_SHARE_DENY_READ))
1207 			return (OPEN_DELEGATE_NONE);
1208 		return (OPEN_DELEGATE_READ);
1209 
1210 	case OPEN_DELEGATE_WRITE:
1211 		return (OPEN_DELEGATE_WRITE);
1212 	}
1213 
1214 	/* Shouldn't get here */
1215 	return (OPEN_DELEGATE_NONE);
1216 }
1217 
1218 /*
1219  * Given the desired delegation type and the "history" of the file
1220  * determine the actual delegation type to return.
1221  */
1222 static open_delegation_type4
1223 rfs4_delegation_policy(open_delegation_type4 dtype,
1224     rfs4_dinfo_t *dinfo, clientid4 cid)
1225 {
1226 	time_t elapsed;
1227 
1228 	if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1229 		return (OPEN_DELEGATE_NONE);
1230 
1231 	/*
1232 	 * Has this file/delegation ever been recalled?  If not then
1233 	 * no further checks for a delegation race need to be done.
1234 	 * However if a recall has occurred, then check to see if a
1235 	 * client has caused its own delegation recall to occur.  If
1236 	 * not, then has a delegation for this file been returned
1237 	 * recently?  If so, then do not assign a new delegation to
1238 	 * avoid a "delegation race" between the original client and
1239 	 * the new/conflicting client.
1240 	 */
1241 	if (dinfo->rd_ever_recalled == TRUE) {
1242 		if (dinfo->rd_conflicted_client != cid) {
1243 			elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1244 			if (elapsed < rfs4_lease_time)
1245 				return (OPEN_DELEGATE_NONE);
1246 		}
1247 	}
1248 
1249 	/* Limit the number of read grants */
1250 	if (dtype == OPEN_DELEGATE_READ &&
1251 	    dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1252 		return (OPEN_DELEGATE_NONE);
1253 
1254 	/*
1255 	 * Should consider limiting total number of read/write
1256 	 * delegations the server will permit.
1257 	 */
1258 
1259 	return (dtype);
1260 }
1261 
1262 /*
1263  * Try and grant a delegation for an open give the state. The routine
1264  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1265  *
1266  * The state and associate file entry must be locked
1267  */
1268 rfs4_deleg_state_t *
1269 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1270 {
1271 	rfs4_file_t *fp = sp->rs_finfo;
1272 	open_delegation_type4 dtype;
1273 	int no_delegation;
1274 
1275 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1276 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1277 
1278 	/* Is the server even providing delegations? */
1279 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1280 		return (NULL);
1281 
1282 	/* Check to see if delegations have been temporarily disabled */
1283 	mutex_enter(&rfs4_deleg_lock);
1284 	no_delegation = rfs4_deleg_disabled;
1285 	mutex_exit(&rfs4_deleg_lock);
1286 
1287 	if (no_delegation)
1288 		return (NULL);
1289 
1290 	/* Don't grant a delegation if a deletion is impending. */
1291 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1292 		return (NULL);
1293 	}
1294 
1295 	/*
1296 	 * Don't grant a delegation if there are any lock manager
1297 	 * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1298 	 * if there are only read locks we should be able to grant a
1299 	 * read-only delegation), but it's good enough for now.
1300 	 *
1301 	 * MT safety: the lock manager checks for conflicting delegations
1302 	 * before processing a lock request.  That check will block until
1303 	 * we are done here.  So if the lock manager acquires a lock after
1304 	 * we decide to grant the delegation, the delegation will get
1305 	 * immediately recalled (if there's a conflict), so we're safe.
1306 	 */
1307 	if (lm_vp_active(fp->rf_vp)) {
1308 		return (NULL);
1309 	}
1310 
1311 	/*
1312 	 * Based on the type of delegation request passed in, take the
1313 	 * appropriate action (DELEG_NONE is handled above)
1314 	 */
1315 	switch (dreq) {
1316 
1317 	case DELEG_READ:
1318 	case DELEG_WRITE:
1319 		/*
1320 		 * The server "must" grant the delegation in this case.
1321 		 * Client is using open previous
1322 		 */
1323 		dtype = (open_delegation_type4)dreq;
1324 		*recall = 1;
1325 		break;
1326 	case DELEG_ANY:
1327 		/*
1328 		 * If a valid callback path does not exist, no delegation may
1329 		 * be granted.
1330 		 */
1331 		if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1332 			return (NULL);
1333 
1334 		/*
1335 		 * If the original operation which caused time_rm_delayed
1336 		 * to be set hasn't been retried and completed for one
1337 		 * full lease period, clear it and allow delegations to
1338 		 * get granted again.
1339 		 */
1340 		if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1341 		    gethrestime_sec() >
1342 		    fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1343 			fp->rf_dinfo.rd_time_rm_delayed = 0;
1344 
1345 		/*
1346 		 * If we are waiting for a delegation to be returned then
1347 		 * don't delegate this file. We do this for correctness as
1348 		 * well as if the file is being recalled we would likely
1349 		 * recall this file again.
1350 		 */
1351 
1352 		if (fp->rf_dinfo.rd_time_recalled != 0 ||
1353 		    fp->rf_dinfo.rd_time_rm_delayed != 0)
1354 			return (NULL);
1355 
1356 		/* Get the "best" delegation candidate */
1357 		dtype = rfs4_check_delegation(sp, fp);
1358 
1359 		if (dtype == OPEN_DELEGATE_NONE)
1360 			return (NULL);
1361 
1362 		/*
1363 		 * Based on policy and the history of the file get the
1364 		 * actual delegation.
1365 		 */
1366 		dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1367 		    sp->rs_owner->ro_client->rc_clientid);
1368 
1369 		if (dtype == OPEN_DELEGATE_NONE)
1370 			return (NULL);
1371 		break;
1372 	default:
1373 		return (NULL);
1374 	}
1375 
1376 	/* set the delegation for the state */
1377 	return (rfs4_deleg_state(sp, dtype, recall));
1378 }
1379 
1380 void
1381 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1382     nfsace4 *ace,  int recall)
1383 {
1384 	open_write_delegation4 *wp;
1385 	open_read_delegation4 *rp;
1386 	nfs_space_limit4 *spl;
1387 	nfsace4 nace;
1388 
1389 	/*
1390 	 * We need to allocate a new copy of the who string.
1391 	 * this string will be freed by the rfs4_op_open dis_resfree
1392 	 * routine. We need to do this allocation since replays will
1393 	 * be allocated and rfs4_compound can't tell the difference from
1394 	 * a replay and an inital open. N.B. if an ace is passed in, it
1395 	 * the caller's responsibility to free it.
1396 	 */
1397 
1398 	if (ace == NULL) {
1399 		/*
1400 		 * Default is to deny all access, the client will have
1401 		 * to contact the server.  XXX Do we want to actually
1402 		 * set a deny for every one, or do we simply want to
1403 		 * construct an entity that will match no one?
1404 		 */
1405 		nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1406 		nace.flag = 0;
1407 		nace.access_mask = ACE4_VALID_MASK_BITS;
1408 		(void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1409 	} else {
1410 		nace.type = ace->type;
1411 		nace.flag = ace->flag;
1412 		nace.access_mask = ace->access_mask;
1413 		(void) utf8_copy(&ace->who, &nace.who);
1414 	}
1415 
1416 	dp->delegation_type = dsp->rds_dtype;
1417 
1418 	switch (dsp->rds_dtype) {
1419 	case OPEN_DELEGATE_NONE:
1420 		break;
1421 	case OPEN_DELEGATE_READ:
1422 		rp = &dp->open_delegation4_u.read;
1423 		rp->stateid = dsp->rds_delegid.stateid;
1424 		rp->recall = (bool_t)recall;
1425 		rp->permissions = nace;
1426 		break;
1427 	case OPEN_DELEGATE_WRITE:
1428 		wp = &dp->open_delegation4_u.write;
1429 		wp->stateid = dsp->rds_delegid.stateid;
1430 		wp->recall = (bool_t)recall;
1431 		spl = &wp->space_limit;
1432 		spl->limitby = NFS_LIMIT_SIZE;
1433 		spl->nfs_space_limit4_u.filesize = 0;
1434 		wp->permissions = nace;
1435 		break;
1436 	}
1437 }
1438 
1439 /*
1440  * Check if the file is delegated via the provided file struct.
1441  * Return TRUE if it is delegated.  This is intended for use by
1442  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1443  *
1444  * Note that if the file is found to have a delegation, it is
1445  * recalled, unless the clientid of the caller matches the clientid of the
1446  * delegation. If the caller has specified, there is a slight delay
1447  * inserted in the hopes that the delegation will be returned quickly.
1448  */
1449 bool_t
1450 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1451     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1452 {
1453 	rfs4_deleg_state_t *dsp;
1454 
1455 	/* Is delegation enabled? */
1456 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1457 		return (FALSE);
1458 
1459 	/* do we have a delegation on this file? */
1460 	rfs4_dbe_lock(fp->rf_dbe);
1461 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1462 		if (is_rm)
1463 			fp->rf_dinfo.rd_hold_grant++;
1464 		rfs4_dbe_unlock(fp->rf_dbe);
1465 		return (FALSE);
1466 	}
1467 	/*
1468 	 * do we have a write delegation on this file or are we
1469 	 * requesting write access to a file with any type of existing
1470 	 * delegation?
1471 	 */
1472 	if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1473 		if (cp != NULL) {
1474 			dsp = list_head(&fp->rf_delegstatelist);
1475 			if (dsp == NULL) {
1476 				rfs4_dbe_unlock(fp->rf_dbe);
1477 				return (FALSE);
1478 			}
1479 			/*
1480 			 * Does the requestor already own the delegation?
1481 			 */
1482 			if (dsp->rds_client->rc_clientid == *(cp)) {
1483 				rfs4_dbe_unlock(fp->rf_dbe);
1484 				return (FALSE);
1485 			}
1486 		}
1487 
1488 		rfs4_dbe_unlock(fp->rf_dbe);
1489 		rfs4_recall_deleg(fp, trunc, NULL);
1490 
1491 		if (!do_delay) {
1492 			rfs4_dbe_lock(fp->rf_dbe);
1493 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1494 			rfs4_dbe_unlock(fp->rf_dbe);
1495 			return (TRUE);
1496 		}
1497 
1498 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
1499 
1500 		rfs4_dbe_lock(fp->rf_dbe);
1501 		if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1502 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1503 			rfs4_dbe_unlock(fp->rf_dbe);
1504 			return (TRUE);
1505 		}
1506 	}
1507 	if (is_rm)
1508 		fp->rf_dinfo.rd_hold_grant++;
1509 	rfs4_dbe_unlock(fp->rf_dbe);
1510 	return (FALSE);
1511 }
1512 
1513 /*
1514  * Check if the file is delegated in the case of a v2 or v3 access.
1515  * Return TRUE if it is delegated which in turn means that v2 should
1516  * drop the request and in the case of v3 JUKEBOX should be returned.
1517  */
1518 bool_t
1519 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1520 {
1521 	rfs4_file_t *fp;
1522 	bool_t create = FALSE;
1523 	bool_t rc = FALSE;
1524 
1525 	rfs4_hold_deleg_policy();
1526 
1527 	/* Is delegation enabled? */
1528 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1529 		fp = rfs4_findfile(vp, NULL, &create);
1530 		if (fp != NULL) {
1531 			if (rfs4_check_delegated_byfp(mode, fp, trunc,
1532 			    TRUE, FALSE, NULL)) {
1533 				rc = TRUE;
1534 			}
1535 			rfs4_file_rele(fp);
1536 		}
1537 	}
1538 	rfs4_rele_deleg_policy();
1539 	return (rc);
1540 }
1541 
1542 /*
1543  * Release a hold on the hold_grant counter which
1544  * prevents delegation from being granted while a remove
1545  * or a rename is in progress.
1546  */
1547 void
1548 rfs4_clear_dont_grant(rfs4_file_t *fp)
1549 {
1550 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1551 		return;
1552 	rfs4_dbe_lock(fp->rf_dbe);
1553 	ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1554 	fp->rf_dinfo.rd_hold_grant--;
1555 	fp->rf_dinfo.rd_time_rm_delayed = 0;
1556 	rfs4_dbe_unlock(fp->rf_dbe);
1557 }
1558 
1559 /*
1560  * State support for delegation.
1561  * Set the state delegation type for this state;
1562  * This routine is called from open via rfs4_grant_delegation and the entry
1563  * locks on sp and sp->rs_finfo are assumed.
1564  */
1565 static rfs4_deleg_state_t *
1566 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1567 {
1568 	rfs4_file_t *fp = sp->rs_finfo;
1569 	bool_t create = TRUE;
1570 	rfs4_deleg_state_t *dsp;
1571 	vnode_t *vp;
1572 	int open_prev = *recall;
1573 	int ret;
1574 	int fflags = 0;
1575 
1576 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1577 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1578 
1579 	/* Shouldn't happen */
1580 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1581 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1582 	    dtype != OPEN_DELEGATE_READ)) {
1583 		return (NULL);
1584 	}
1585 
1586 	/* Unlock to avoid deadlock */
1587 	rfs4_dbe_unlock(fp->rf_dbe);
1588 	rfs4_dbe_unlock(sp->rs_dbe);
1589 
1590 	dsp = rfs4_finddeleg(sp, &create);
1591 
1592 	rfs4_dbe_lock(sp->rs_dbe);
1593 	rfs4_dbe_lock(fp->rf_dbe);
1594 
1595 	if (dsp == NULL)
1596 		return (NULL);
1597 
1598 	/*
1599 	 * It is possible that since we dropped the lock
1600 	 * in order to call finddeleg, the rfs4_file_t
1601 	 * was marked such that we should not grant a
1602 	 * delegation, if so bail out.
1603 	 */
1604 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1605 		rfs4_deleg_state_rele(dsp);
1606 		return (NULL);
1607 	}
1608 
1609 	if (create == FALSE) {
1610 		if (sp->rs_owner->ro_client == dsp->rds_client &&
1611 		    dsp->rds_dtype == dtype) {
1612 			return (dsp);
1613 		} else {
1614 			rfs4_deleg_state_rele(dsp);
1615 			return (NULL);
1616 		}
1617 	}
1618 
1619 	/*
1620 	 * Check that this file has not been delegated to another
1621 	 * client
1622 	 */
1623 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1624 	    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1625 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1626 	    dtype != OPEN_DELEGATE_READ)) {
1627 		rfs4_deleg_state_rele(dsp);
1628 		return (NULL);
1629 	}
1630 
1631 	vp = fp->rf_vp;
1632 	/* vnevent_support returns 0 if file system supports vnevents */
1633 	if (vnevent_support(vp, NULL)) {
1634 		rfs4_deleg_state_rele(dsp);
1635 		return (NULL);
1636 	}
1637 
1638 	/* Calculate the fflags for this OPEN. */
1639 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1640 		fflags |= FREAD;
1641 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1642 		fflags |= FWRITE;
1643 
1644 	*recall = 0;
1645 	/*
1646 	 * Before granting a delegation we need to know if anyone else has
1647 	 * opened the file in a conflicting mode.  However, first we need to
1648 	 * know how we opened the file to check the counts properly.
1649 	 */
1650 	if (dtype == OPEN_DELEGATE_READ) {
1651 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1652 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1653 		    vn_is_mapped(vp, V_WRITE)) {
1654 			if (open_prev) {
1655 				*recall = 1;
1656 			} else {
1657 				rfs4_deleg_state_rele(dsp);
1658 				return (NULL);
1659 			}
1660 		}
1661 		ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1662 		    rfs4_mon_hold, rfs4_mon_rele);
1663 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1664 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1665 		    vn_is_mapped(vp, V_WRITE)) {
1666 			if (open_prev) {
1667 				*recall = 1;
1668 			} else {
1669 				(void) fem_uninstall(vp, deleg_rdops,
1670 				    (void *)fp);
1671 				rfs4_deleg_state_rele(dsp);
1672 				return (NULL);
1673 			}
1674 		}
1675 		/*
1676 		 * Because a client can hold onto a delegation after the
1677 		 * file has been closed, we need to keep track of the
1678 		 * access to this file.  Otherwise the CIFS server would
1679 		 * not know about the client accessing the file and could
1680 		 * inappropriately grant an OPLOCK.
1681 		 * fem_install() returns EBUSY when asked to install a
1682 		 * OPUNIQ monitor more than once.  Therefore, check the
1683 		 * return code because we only want this done once.
1684 		 */
1685 		if (ret == 0)
1686 			vn_open_upgrade(vp, FREAD);
1687 	} else { /* WRITE */
1688 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1689 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1690 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1691 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1692 		    vn_is_mapped(vp, V_RDORWR)) {
1693 			if (open_prev) {
1694 				*recall = 1;
1695 			} else {
1696 				rfs4_deleg_state_rele(dsp);
1697 				return (NULL);
1698 			}
1699 		}
1700 		ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1701 		    rfs4_mon_hold, rfs4_mon_rele);
1702 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1703 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1704 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1705 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1706 		    vn_is_mapped(vp, V_RDORWR)) {
1707 			if (open_prev) {
1708 				*recall = 1;
1709 			} else {
1710 				(void) fem_uninstall(vp, deleg_wrops,
1711 				    (void *)fp);
1712 				rfs4_deleg_state_rele(dsp);
1713 				return (NULL);
1714 			}
1715 		}
1716 		/*
1717 		 * Because a client can hold onto a delegation after the
1718 		 * file has been closed, we need to keep track of the
1719 		 * access to this file.  Otherwise the CIFS server would
1720 		 * not know about the client accessing the file and could
1721 		 * inappropriately grant an OPLOCK.
1722 		 * fem_install() returns EBUSY when asked to install a
1723 		 * OPUNIQ monitor more than once.  Therefore, check the
1724 		 * return code because we only want this done once.
1725 		 */
1726 		if (ret == 0)
1727 			vn_open_upgrade(vp, FREAD|FWRITE);
1728 	}
1729 	/* Place on delegation list for file */
1730 	ASSERT(!list_link_active(&dsp->rds_node));
1731 	list_insert_tail(&fp->rf_delegstatelist, dsp);
1732 
1733 	dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1734 
1735 	/* Update delegation stats for this file */
1736 	fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1737 
1738 	/* reset since this is a new delegation */
1739 	fp->rf_dinfo.rd_conflicted_client = 0;
1740 	fp->rf_dinfo.rd_ever_recalled = FALSE;
1741 
1742 	if (dtype == OPEN_DELEGATE_READ)
1743 		fp->rf_dinfo.rd_rdgrants++;
1744 	else
1745 		fp->rf_dinfo.rd_wrgrants++;
1746 
1747 	return (dsp);
1748 }
1749 
1750 /*
1751  * State routine for the server when a delegation is returned.
1752  */
1753 void
1754 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1755 {
1756 	rfs4_file_t *fp = dsp->rds_finfo;
1757 	open_delegation_type4 dtypewas;
1758 
1759 	rfs4_dbe_lock(fp->rf_dbe);
1760 
1761 	/* nothing to do if no longer on list */
1762 	if (!list_link_active(&dsp->rds_node)) {
1763 		rfs4_dbe_unlock(fp->rf_dbe);
1764 		return;
1765 	}
1766 
1767 	/* Remove state from recall list */
1768 	list_remove(&fp->rf_delegstatelist, dsp);
1769 
1770 	if (list_is_empty(&fp->rf_delegstatelist)) {
1771 		dtypewas = fp->rf_dinfo.rd_dtype;
1772 		fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1773 		rfs4_dbe_cv_broadcast(fp->rf_dbe);
1774 
1775 		/* if file system was unshared, the vp will be NULL */
1776 		if (fp->rf_vp != NULL) {
1777 			/*
1778 			 * Once a delegation is no longer held by any client,
1779 			 * the monitor is uninstalled.  At this point, the
1780 			 * client must send OPEN otw, so we don't need the
1781 			 * reference on the vnode anymore.  The open
1782 			 * downgrade removes the reference put on earlier.
1783 			 */
1784 			if (dtypewas == OPEN_DELEGATE_READ) {
1785 				(void) fem_uninstall(fp->rf_vp, deleg_rdops,
1786 				    (void *)fp);
1787 				vn_open_downgrade(fp->rf_vp, FREAD);
1788 			} else if (dtypewas == OPEN_DELEGATE_WRITE) {
1789 				(void) fem_uninstall(fp->rf_vp, deleg_wrops,
1790 				    (void *)fp);
1791 				vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1792 			}
1793 		}
1794 	}
1795 
1796 	switch (dsp->rds_dtype) {
1797 	case OPEN_DELEGATE_READ:
1798 		fp->rf_dinfo.rd_rdgrants--;
1799 		break;
1800 	case OPEN_DELEGATE_WRITE:
1801 		fp->rf_dinfo.rd_wrgrants--;
1802 		break;
1803 	default:
1804 		break;
1805 	}
1806 
1807 	/* used in the policy decision */
1808 	fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1809 
1810 	/*
1811 	 * reset the time_recalled field so future delegations are not
1812 	 * accidentally revoked
1813 	 */
1814 	if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1815 		fp->rf_dinfo.rd_time_recalled = 0;
1816 
1817 	rfs4_dbe_unlock(fp->rf_dbe);
1818 
1819 	rfs4_dbe_lock(dsp->rds_dbe);
1820 
1821 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
1822 
1823 	if (revoked == TRUE)
1824 		dsp->rds_time_revoked = gethrestime_sec();
1825 
1826 	rfs4_dbe_invalidate(dsp->rds_dbe);
1827 
1828 	rfs4_dbe_unlock(dsp->rds_dbe);
1829 
1830 	if (revoked == TRUE) {
1831 		rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1832 		dsp->rds_client->rc_deleg_revoked++;	/* observability */
1833 		rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1834 	}
1835 }
1836 
1837 static void
1838 rfs4_revoke_file(rfs4_file_t *fp)
1839 {
1840 	rfs4_deleg_state_t *dsp;
1841 
1842 	/*
1843 	 * The lock for rfs4_file_t must be held when traversing the
1844 	 * delegation list but that lock needs to be released to call
1845 	 * rfs4_return_deleg()
1846 	 */
1847 	rfs4_dbe_lock(fp->rf_dbe);
1848 	while (dsp = list_head(&fp->rf_delegstatelist)) {
1849 		rfs4_dbe_hold(dsp->rds_dbe);
1850 		rfs4_dbe_unlock(fp->rf_dbe);
1851 		rfs4_return_deleg(dsp, TRUE);
1852 		rfs4_deleg_state_rele(dsp);
1853 		rfs4_dbe_lock(fp->rf_dbe);
1854 	}
1855 	rfs4_dbe_unlock(fp->rf_dbe);
1856 }
1857 
1858 /*
1859  * A delegation is assumed to be present on the file associated with
1860  * "sp".  Check to see if the delegation matches is associated with
1861  * the same client as referenced by "sp".  If it is not, TRUE is
1862  * returned.  If the delegation DOES match the client (or no
1863  * delegation is present), return FALSE.
1864  * Assume the state entry and file entry are locked.
1865  */
1866 bool_t
1867 rfs4_is_deleg(rfs4_state_t *sp)
1868 {
1869 	rfs4_deleg_state_t *dsp;
1870 	rfs4_file_t *fp = sp->rs_finfo;
1871 	rfs4_client_t *cp = sp->rs_owner->ro_client;
1872 
1873 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1874 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1875 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1876 		if (cp != dsp->rds_client) {
1877 			return (TRUE);
1878 		}
1879 	}
1880 	return (FALSE);
1881 }
1882 
1883 void
1884 rfs4_disable_delegation(void)
1885 {
1886 	mutex_enter(&rfs4_deleg_lock);
1887 	rfs4_deleg_disabled++;
1888 	mutex_exit(&rfs4_deleg_lock);
1889 }
1890 
1891 void
1892 rfs4_enable_delegation(void)
1893 {
1894 	mutex_enter(&rfs4_deleg_lock);
1895 	ASSERT(rfs4_deleg_disabled > 0);
1896 	rfs4_deleg_disabled--;
1897 	mutex_exit(&rfs4_deleg_lock);
1898 }
1899 
1900 void
1901 rfs4_mon_hold(void *arg)
1902 {
1903 	rfs4_file_t *fp = arg;
1904 
1905 	rfs4_dbe_hold(fp->rf_dbe);
1906 }
1907 
1908 void
1909 rfs4_mon_rele(void *arg)
1910 {
1911 	rfs4_file_t *fp = arg;
1912 
1913 	rfs4_dbe_rele_nolock(fp->rf_dbe);
1914 }
1915