xref: /titanic_41/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c (revision 3dabdd6ebae8bdf3ae7bc5787556261a9a12a2b6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 #include <sys/systm.h>
29 #include <rpc/auth.h>
30 #include <rpc/clnt.h>
31 #include <nfs/nfs4_kprot.h>
32 #include <nfs/nfs4.h>
33 #include <nfs/lm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/disp.h>
36 #include <sys/sdt.h>
37 
38 #include <sys/pathname.h>
39 
40 #include <sys/strsubr.h>
41 #include <sys/ddi.h>
42 
43 #include <sys/vnode.h>
44 #include <sys/sdt.h>
45 #include <inet/common.h>
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
48 
49 #define	MAX_READ_DELEGATIONS 5
50 
51 krwlock_t rfs4_deleg_policy_lock;
52 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
53 static int rfs4_deleg_wlp = 5;
54 kmutex_t rfs4_deleg_lock;
55 static int rfs4_deleg_disabled;
56 static int rfs4_max_setup_cb_tries = 5;
57 
58 #ifdef DEBUG
59 
60 static int rfs4_test_cbgetattr_fail = 0;
61 int rfs4_cb_null;
62 int rfs4_cb_debug;
63 int rfs4_deleg_debug;
64 
65 #endif
66 
67 static void rfs4_recall_file(rfs4_file_t *,
68     void (*recall)(rfs4_deleg_state_t *, bool_t),
69     bool_t, rfs4_client_t *);
70 static	void		rfs4_revoke_file(rfs4_file_t *);
71 static	void		rfs4_cb_chflush(rfs4_cbinfo_t *);
72 static	CLIENT		*rfs4_cb_getch(rfs4_cbinfo_t *);
73 static	void		rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
75     open_delegation_type4, int *);
76 
77 /*
78  * Convert a universal address to an transport specific
79  * address using inet_pton.
80  */
81 static int
uaddr2sockaddr(int af,char * ua,void * ap,in_port_t * pp)82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
83 {
84 	int dots = 0, i, j, len, k;
85 	unsigned char c;
86 	in_port_t port = 0;
87 
88 	len = strlen(ua);
89 
90 	for (i = len-1; i >= 0; i--) {
91 
92 		if (ua[i] == '.')
93 			dots++;
94 
95 		if (dots == 2) {
96 
97 			ua[i] = '\0';
98 			/*
99 			 * We use k to remember were to stick '.' back, since
100 			 * ua was kmem_allocateded from the pool len+1.
101 			 */
102 			k = i;
103 			if (inet_pton(af, ua, ap) == 1) {
104 
105 				c = 0;
106 
107 				for (j = i+1; j < len; j++) {
108 					if (ua[j] == '.') {
109 						port = c << 8;
110 						c = 0;
111 					} else if (ua[j] >= '0' &&
112 					    ua[j] <= '9') {
113 						c *= 10;
114 						c += ua[j] - '0';
115 					} else {
116 						ua[k] = '.';
117 						return (EINVAL);
118 					}
119 				}
120 				port += c;
121 
122 				*pp = htons(port);
123 
124 				ua[k] = '.';
125 				return (0);
126 			} else {
127 				ua[k] = '.';
128 				return (EINVAL);
129 			}
130 		}
131 	}
132 
133 	return (EINVAL);
134 }
135 
136 /*
137  * Update the delegation policy with the
138  * value of "new_policy"
139  */
140 void
rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)141 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
142 {
143 	rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
144 	rfs4_deleg_policy = new_policy;
145 	rw_exit(&rfs4_deleg_policy_lock);
146 }
147 
148 void
rfs4_hold_deleg_policy(void)149 rfs4_hold_deleg_policy(void)
150 {
151 	rw_enter(&rfs4_deleg_policy_lock, RW_READER);
152 }
153 
154 void
rfs4_rele_deleg_policy(void)155 rfs4_rele_deleg_policy(void)
156 {
157 	rw_exit(&rfs4_deleg_policy_lock);
158 }
159 
160 
161 /*
162  * This free function is to be used when the client struct is being
163  * released and nothing at all is needed of the callback info any
164  * longer.
165  */
166 void
rfs4_cbinfo_free(rfs4_cbinfo_t * cbp)167 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
168 {
169 	char *addr = cbp->cb_callback.cb_location.r_addr;
170 	char *netid = cbp->cb_callback.cb_location.r_netid;
171 
172 	/* Free old address if any */
173 
174 	if (addr)
175 		kmem_free(addr, strlen(addr) + 1);
176 	if (netid)
177 		kmem_free(netid, strlen(netid) + 1);
178 
179 	addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
180 	netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
181 
182 	if (addr)
183 		kmem_free(addr, strlen(addr) + 1);
184 	if (netid)
185 		kmem_free(netid, strlen(netid) + 1);
186 
187 	if (cbp->cb_chc_free) {
188 		rfs4_cb_chflush(cbp);
189 	}
190 }
191 
192 /*
193  * The server uses this to check the callback path supplied by the
194  * client.  The callback connection is marked "in progress" while this
195  * work is going on and then eventually marked either OK or FAILED.
196  * This work can be done as part of a separate thread and at the end
197  * of this the thread will exit or it may be done such that the caller
198  * will continue with other work.
199  */
200 static void
rfs4_do_cb_null(rfs4_client_t * cp)201 rfs4_do_cb_null(rfs4_client_t *cp)
202 {
203 	struct timeval tv;
204 	CLIENT *ch;
205 	rfs4_cbstate_t newstate;
206 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
207 
208 	mutex_enter(cbp->cb_lock);
209 	/* If another thread is doing CB_NULL RPC then return */
210 	if (cbp->cb_nullcaller == TRUE) {
211 		mutex_exit(cbp->cb_lock);
212 		rfs4_client_rele(cp);
213 		return;
214 	}
215 
216 	/* Mark the cbinfo as having a thread in the NULL callback */
217 	cbp->cb_nullcaller = TRUE;
218 
219 	/*
220 	 * Are there other threads still using the cbinfo client
221 	 * handles?  If so, this thread must wait before going and
222 	 * mucking aroiund with the callback information
223 	 */
224 	while (cbp->cb_refcnt != 0)
225 		cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
226 
227 	/*
228 	 * This thread itself may find that new callback info has
229 	 * arrived and is set up to handle this case and redrive the
230 	 * call to the client's callback server.
231 	 */
232 retry:
233 	if (cbp->cb_newer.cb_new == TRUE &&
234 	    cbp->cb_newer.cb_confirmed == TRUE) {
235 		char *addr = cbp->cb_callback.cb_location.r_addr;
236 		char *netid = cbp->cb_callback.cb_location.r_netid;
237 
238 		/*
239 		 * Free the old stuff if it exists; may be the first
240 		 * time through this path
241 		 */
242 		if (addr)
243 			kmem_free(addr, strlen(addr) + 1);
244 		if (netid)
245 			kmem_free(netid, strlen(netid) + 1);
246 
247 		/* Move over the addr/netid */
248 		cbp->cb_callback.cb_location.r_addr =
249 		    cbp->cb_newer.cb_callback.cb_location.r_addr;
250 		cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
251 		cbp->cb_callback.cb_location.r_netid =
252 		    cbp->cb_newer.cb_callback.cb_location.r_netid;
253 		cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
254 
255 		/* Get the program number */
256 		cbp->cb_callback.cb_program =
257 		    cbp->cb_newer.cb_callback.cb_program;
258 		cbp->cb_newer.cb_callback.cb_program = 0;
259 
260 		/* Don't forget the protocol's "cb_ident" field */
261 		cbp->cb_ident = cbp->cb_newer.cb_ident;
262 		cbp->cb_newer.cb_ident = 0;
263 
264 		/* no longer new */
265 		cbp->cb_newer.cb_new = FALSE;
266 		cbp->cb_newer.cb_confirmed = FALSE;
267 
268 		/* get rid of the old client handles that may exist */
269 		rfs4_cb_chflush(cbp);
270 
271 		cbp->cb_state = CB_NONE;
272 		cbp->cb_timefailed = 0; /* reset the clock */
273 		cbp->cb_notified_of_cb_path_down = TRUE;
274 	}
275 
276 	if (cbp->cb_state != CB_NONE) {
277 		cv_broadcast(cbp->cb_cv);	/* let the others know */
278 		cbp->cb_nullcaller = FALSE;
279 		mutex_exit(cbp->cb_lock);
280 		rfs4_client_rele(cp);
281 		return;
282 	}
283 
284 	/* mark rfs4_client_t as CALLBACK NULL in progress */
285 	cbp->cb_state = CB_INPROG;
286 	mutex_exit(cbp->cb_lock);
287 
288 	/* get/generate a client handle */
289 	if ((ch = rfs4_cb_getch(cbp)) == NULL) {
290 		mutex_enter(cbp->cb_lock);
291 		cbp->cb_state = CB_BAD;
292 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
293 		goto retry;
294 	}
295 
296 
297 	tv.tv_sec = 30;
298 	tv.tv_usec = 0;
299 	if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
300 		newstate = CB_BAD;
301 	} else {
302 		newstate = CB_OK;
303 #ifdef	DEBUG
304 		rfs4_cb_null++;
305 #endif
306 	}
307 
308 	/* Check to see if the client has specified new callback info */
309 	mutex_enter(cbp->cb_lock);
310 	rfs4_cb_freech(cbp, ch, TRUE);
311 	if (cbp->cb_newer.cb_new == TRUE &&
312 	    cbp->cb_newer.cb_confirmed == TRUE) {
313 		goto retry;	/* give the CB_NULL another chance */
314 	}
315 
316 	cbp->cb_state = newstate;
317 	if (cbp->cb_state == CB_BAD)
318 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
319 
320 	cv_broadcast(cbp->cb_cv);	/* start up the other threads */
321 	cbp->cb_nullcaller = FALSE;
322 	mutex_exit(cbp->cb_lock);
323 
324 	rfs4_client_rele(cp);
325 }
326 
327 /*
328  * Given a client struct, inspect the callback info to see if the
329  * callback path is up and available.
330  *
331  * If new callback path is available and no one has set it up then
332  * try to set it up. If setup is not successful after 5 tries (5 secs)
333  * then gives up and returns NULL.
334  *
335  * If callback path is being initialized, then wait for the CB_NULL RPC
336  * call to occur.
337  */
338 static rfs4_cbinfo_t *
rfs4_cbinfo_hold(rfs4_client_t * cp)339 rfs4_cbinfo_hold(rfs4_client_t *cp)
340 {
341 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
342 	int retries = 0;
343 
344 	mutex_enter(cbp->cb_lock);
345 
346 	while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
347 		/*
348 		 * Looks like a new callback path may be available and
349 		 * noone has set it up.
350 		 */
351 		mutex_exit(cbp->cb_lock);
352 		rfs4_dbe_hold(cp->rc_dbe);
353 		rfs4_do_cb_null(cp); /* caller will release client hold */
354 
355 		mutex_enter(cbp->cb_lock);
356 		/*
357 		 * If callback path is no longer new, or it's being setup
358 		 * then stop and wait for it to be done.
359 		 */
360 		if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
361 			break;
362 		mutex_exit(cbp->cb_lock);
363 
364 		if (++retries >= rfs4_max_setup_cb_tries)
365 			return (NULL);
366 		delay(hz);
367 		mutex_enter(cbp->cb_lock);
368 	}
369 
370 	/* Is there a thread working on doing the CB_NULL RPC? */
371 	if (cbp->cb_nullcaller == TRUE)
372 		cv_wait(cbp->cb_cv, cbp->cb_lock);  /* if so, wait on it */
373 
374 	/* If the callback path is not okay (up and running), just quit */
375 	if (cbp->cb_state != CB_OK) {
376 		mutex_exit(cbp->cb_lock);
377 		return (NULL);
378 	}
379 
380 	/* Let someone know we are using the current callback info */
381 	cbp->cb_refcnt++;
382 	mutex_exit(cbp->cb_lock);
383 	return (cbp);
384 }
385 
386 /*
387  * The caller is done with the callback info.  It may be that the
388  * caller's RPC failed and the NFSv4 client has actually provided new
389  * callback information.  If so, let the caller know so they can
390  * advantage of this and maybe retry the RPC that originally failed.
391  */
392 static int
rfs4_cbinfo_rele(rfs4_cbinfo_t * cbp,rfs4_cbstate_t newstate)393 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
394 {
395 	int cb_new = FALSE;
396 
397 	mutex_enter(cbp->cb_lock);
398 
399 	/* The caller gets a chance to mark the callback info as bad */
400 	if (newstate != CB_NOCHANGE)
401 		cbp->cb_state = newstate;
402 	if (newstate == CB_FAILED) {
403 		cbp->cb_timefailed = gethrestime_sec(); /* observability */
404 		cbp->cb_notified_of_cb_path_down = FALSE;
405 	}
406 
407 	cbp->cb_refcnt--;	/* no longer using the information */
408 
409 	/*
410 	 * A thread may be waiting on this one to finish and if so,
411 	 * let it know that it is okay to do the CB_NULL to the
412 	 * client's callback server.
413 	 */
414 	if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
415 		cv_broadcast(cbp->cb_cv_nullcaller);
416 
417 	/*
418 	 * If this is the last thread to use the callback info and
419 	 * there is new callback information to try and no thread is
420 	 * there ready to do the CB_NULL, then return true to teh
421 	 * caller so they can do the CB_NULL
422 	 */
423 	if (cbp->cb_refcnt == 0 &&
424 	    cbp->cb_nullcaller == FALSE &&
425 	    cbp->cb_newer.cb_new == TRUE &&
426 	    cbp->cb_newer.cb_confirmed == TRUE)
427 		cb_new = TRUE;
428 
429 	mutex_exit(cbp->cb_lock);
430 
431 	return (cb_new);
432 }
433 
434 /*
435  * Given the information in the callback info struct, create a client
436  * handle that can be used by the server for its callback path.
437  */
438 static CLIENT *
rfs4_cbch_init(rfs4_cbinfo_t * cbp)439 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
440 {
441 	struct knetconfig knc;
442 	vnode_t *vp;
443 	struct sockaddr_in addr4;
444 	struct sockaddr_in6 addr6;
445 	void *addr, *taddr;
446 	in_port_t *pp;
447 	int af;
448 	char *devnam;
449 	struct netbuf nb;
450 	int size;
451 	CLIENT *ch = NULL;
452 	int useresvport = 0;
453 
454 	mutex_enter(cbp->cb_lock);
455 
456 	if (cbp->cb_callback.cb_location.r_netid == NULL ||
457 	    cbp->cb_callback.cb_location.r_addr == NULL) {
458 		goto cb_init_out;
459 	}
460 
461 	if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
462 		knc.knc_semantics = NC_TPI_COTS;
463 		knc.knc_protofmly = "inet";
464 		knc.knc_proto = "tcp";
465 		devnam = "/dev/tcp";
466 		af = AF_INET;
467 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
468 	    == 0) {
469 		knc.knc_semantics = NC_TPI_CLTS;
470 		knc.knc_protofmly = "inet";
471 		knc.knc_proto = "udp";
472 		devnam = "/dev/udp";
473 		af = AF_INET;
474 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
475 	    == 0) {
476 		knc.knc_semantics = NC_TPI_COTS;
477 		knc.knc_protofmly = "inet6";
478 		knc.knc_proto = "tcp";
479 		devnam = "/dev/tcp6";
480 		af = AF_INET6;
481 	} else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
482 	    == 0) {
483 		knc.knc_semantics = NC_TPI_CLTS;
484 		knc.knc_protofmly = "inet6";
485 		knc.knc_proto = "udp";
486 		devnam = "/dev/udp6";
487 		af = AF_INET6;
488 	} else {
489 		goto cb_init_out;
490 	}
491 
492 	if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
493 
494 		goto cb_init_out;
495 	}
496 
497 	if (vp->v_type != VCHR) {
498 		VN_RELE(vp);
499 		goto cb_init_out;
500 	}
501 
502 	knc.knc_rdev = vp->v_rdev;
503 
504 	VN_RELE(vp);
505 
506 	if (af == AF_INET) {
507 		size = sizeof (addr4);
508 		bzero(&addr4, size);
509 		addr4.sin_family = (sa_family_t)af;
510 		addr = &addr4.sin_addr;
511 		pp = &addr4.sin_port;
512 		taddr = &addr4;
513 	} else /* AF_INET6 */ {
514 		size = sizeof (addr6);
515 		bzero(&addr6, size);
516 		addr6.sin6_family = (sa_family_t)af;
517 		addr = &addr6.sin6_addr;
518 		pp = &addr6.sin6_port;
519 		taddr = &addr6;
520 	}
521 
522 	if (uaddr2sockaddr(af,
523 	    cbp->cb_callback.cb_location.r_addr, addr, pp)) {
524 
525 		goto cb_init_out;
526 	}
527 
528 
529 	nb.maxlen = nb.len = size;
530 	nb.buf = (char *)taddr;
531 
532 	if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
533 	    NFS_CB, 0, 0, curthread->t_cred, &ch)) {
534 
535 		ch = NULL;
536 	}
537 
538 	/* turn off reserved port usage */
539 	(void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
540 
541 cb_init_out:
542 	mutex_exit(cbp->cb_lock);
543 	return (ch);
544 }
545 
546 /*
547  * Iterate over the client handle cache and
548  * destroy it.
549  */
550 static void
rfs4_cb_chflush(rfs4_cbinfo_t * cbp)551 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
552 {
553 	CLIENT *ch;
554 
555 	while (cbp->cb_chc_free) {
556 		cbp->cb_chc_free--;
557 		ch = cbp->cb_chc[cbp->cb_chc_free];
558 		cbp->cb_chc[cbp->cb_chc_free] = NULL;
559 		if (ch) {
560 			if (ch->cl_auth)
561 				auth_destroy(ch->cl_auth);
562 			clnt_destroy(ch);
563 		}
564 	}
565 }
566 
567 /*
568  * Return a client handle, either from a the small
569  * rfs4_client_t cache or one that we just created.
570  */
571 static CLIENT *
rfs4_cb_getch(rfs4_cbinfo_t * cbp)572 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
573 {
574 	CLIENT *cbch = NULL;
575 	uint32_t zilch = 0;
576 
577 	mutex_enter(cbp->cb_lock);
578 
579 	if (cbp->cb_chc_free) {
580 		cbp->cb_chc_free--;
581 		cbch = cbp->cb_chc[ cbp->cb_chc_free ];
582 		mutex_exit(cbp->cb_lock);
583 		(void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
584 		return (cbch);
585 	}
586 
587 	mutex_exit(cbp->cb_lock);
588 
589 	/* none free so make it now */
590 	cbch = rfs4_cbch_init(cbp);
591 
592 	return (cbch);
593 }
594 
595 /*
596  * Return the client handle to the small cache or
597  * destroy it.
598  */
599 static void
rfs4_cb_freech(rfs4_cbinfo_t * cbp,CLIENT * ch,bool_t lockheld)600 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
601 {
602 	if (lockheld == FALSE)
603 		mutex_enter(cbp->cb_lock);
604 
605 	if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
606 		cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
607 		if (lockheld == FALSE)
608 			mutex_exit(cbp->cb_lock);
609 		return;
610 	}
611 	if (lockheld == FALSE)
612 		mutex_exit(cbp->cb_lock);
613 
614 	/*
615 	 * cache maxed out of free entries, obliterate
616 	 * this client handle, destroy it, throw it away.
617 	 */
618 	if (ch->cl_auth)
619 		auth_destroy(ch->cl_auth);
620 	clnt_destroy(ch);
621 }
622 
623 /*
624  * With the supplied callback information - initialize the client
625  * callback data.  If there is a callback in progress, save the
626  * callback info so that a thread can pick it up in the future.
627  */
628 void
rfs4_client_setcb(rfs4_client_t * cp,cb_client4 * cb,uint32_t cb_ident)629 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
630 {
631 	char *addr = NULL;
632 	char *netid = NULL;
633 	rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
634 	size_t len;
635 
636 	/* Set the call back for the client */
637 	if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
638 	    cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
639 		len = strlen(cb->cb_location.r_addr) + 1;
640 		addr = kmem_alloc(len, KM_SLEEP);
641 		bcopy(cb->cb_location.r_addr, addr, len);
642 		len = strlen(cb->cb_location.r_netid) + 1;
643 		netid = kmem_alloc(len, KM_SLEEP);
644 		bcopy(cb->cb_location.r_netid, netid, len);
645 	}
646 	/* ready to save the new information but first free old, if exists */
647 	mutex_enter(cbp->cb_lock);
648 
649 	cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
650 
651 	if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
652 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
653 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
654 	cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
655 
656 	if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
657 		kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
658 		    strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
659 	cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
660 
661 	cbp->cb_newer.cb_ident = cb_ident;
662 
663 	if (addr && *addr && netid && *netid) {
664 		cbp->cb_newer.cb_new = TRUE;
665 		cbp->cb_newer.cb_confirmed = FALSE;
666 	} else {
667 		cbp->cb_newer.cb_new = FALSE;
668 		cbp->cb_newer.cb_confirmed = FALSE;
669 	}
670 
671 	mutex_exit(cbp->cb_lock);
672 }
673 
674 /*
675  * The server uses this when processing SETCLIENTID_CONFIRM.  Callback
676  * information may have been provided on SETCLIENTID and this call
677  * marks that information as confirmed and then starts a thread to
678  * test the callback path.
679  */
680 void
rfs4_deleg_cb_check(rfs4_client_t * cp)681 rfs4_deleg_cb_check(rfs4_client_t *cp)
682 {
683 	if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
684 		return;
685 
686 	cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
687 
688 	rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
689 
690 	(void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
691 	    minclsyspri);
692 }
693 
694 static void
rfs4args_cb_recall_free(nfs_cb_argop4 * argop)695 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
696 {
697 	CB_RECALL4args	*rec_argp;
698 
699 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
700 	if (rec_argp->fh.nfs_fh4_val)
701 		kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
702 }
703 
704 /* ARGSUSED */
705 static void
rfs4args_cb_getattr_free(nfs_cb_argop4 * argop)706 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
707 {
708 	CB_GETATTR4args *argp;
709 
710 	argp = &argop->nfs_cb_argop4_u.opcbgetattr;
711 	if (argp->fh.nfs_fh4_val)
712 		kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
713 }
714 
715 static void
rfs4freeargres(CB_COMPOUND4args * args,CB_COMPOUND4res * resp)716 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
717 {
718 	int i, arglen;
719 	nfs_cb_argop4 *argop;
720 
721 	/*
722 	 * First free any special args alloc'd for specific ops.
723 	 */
724 	arglen = args->array_len;
725 	argop = args->array;
726 	for (i = 0; i < arglen; i++, argop++) {
727 
728 		switch (argop->argop) {
729 		case OP_CB_RECALL:
730 			rfs4args_cb_recall_free(argop);
731 			break;
732 
733 		case OP_CB_GETATTR:
734 			rfs4args_cb_getattr_free(argop);
735 			break;
736 
737 		default:
738 			return;
739 		}
740 	}
741 
742 	if (args->tag.utf8string_len > 0)
743 		UTF8STRING_FREE(args->tag)
744 
745 	kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
746 	if (resp)
747 		(void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
748 }
749 
750 /*
751  * General callback routine for the server to the client.
752  */
753 static enum clnt_stat
rfs4_do_callback(rfs4_client_t * cp,CB_COMPOUND4args * args,CB_COMPOUND4res * res,struct timeval timeout)754 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
755     CB_COMPOUND4res *res, struct timeval timeout)
756 {
757 	rfs4_cbinfo_t *cbp;
758 	CLIENT *ch;
759 	/* start with this in case cb_getch() fails */
760 	enum clnt_stat	stat = RPC_FAILED;
761 
762 	res->tag.utf8string_val = NULL;
763 	res->array = NULL;
764 
765 retry:
766 	cbp = rfs4_cbinfo_hold(cp);
767 	if (cbp == NULL)
768 		return (stat);
769 
770 	/* get a client handle */
771 	if ((ch = rfs4_cb_getch(cbp)) != NULL) {
772 		/*
773 		 * reset the cb_ident since it may have changed in
774 		 * rfs4_cbinfo_hold()
775 		 */
776 		args->callback_ident = cbp->cb_ident;
777 
778 		stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
779 		    (caddr_t)args, xdr_CB_COMPOUND4res,
780 		    (caddr_t)res, timeout);
781 
782 		/* free client handle */
783 		rfs4_cb_freech(cbp, ch, FALSE);
784 	}
785 
786 	/*
787 	 * If the rele says that there may be new callback info then
788 	 * retry this sequence and it may succeed as a result of the
789 	 * new callback path
790 	 */
791 	if (rfs4_cbinfo_rele(cbp,
792 	    (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
793 		goto retry;
794 
795 	return (stat);
796 }
797 
798 /*
799  * Used by the NFSv4 server to get attributes for a file while
800  * handling the case where a file has been write delegated.  For the
801  * time being, VOP_GETATTR() is called and CB_GETATTR processing is
802  * not undertaken.  This call site is maintained in case the server is
803  * updated in the future to handle write delegation space guarantees.
804  */
805 nfsstat4
rfs4_vop_getattr(vnode_t * vp,vattr_t * vap,int flag,cred_t * cr)806 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
807 {
808 
809 	int error;
810 
811 	error = VOP_GETATTR(vp, vap, flag, cr, NULL);
812 	return (puterrno4(error));
813 }
814 
815 /*
816  * This is used everywhere in the v2/v3 server to allow the
817  * integration of all NFS versions and the support of delegation.  For
818  * now, just call the VOP_GETATTR().  If the NFSv4 server is enhanced
819  * in the future to provide space guarantees for write delegations
820  * then this call site should be expanded to interact with the client.
821  */
822 int
rfs4_delegated_getattr(vnode_t * vp,vattr_t * vap,int flag,cred_t * cr)823 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
824 {
825 	return (VOP_GETATTR(vp, vap, flag, cr, NULL));
826 }
827 
828 /*
829  * Place the actual cb_recall otw call to client.
830  */
831 static void
rfs4_do_cb_recall(rfs4_deleg_state_t * dsp,bool_t trunc)832 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
833 {
834 	CB_COMPOUND4args	cb4_args;
835 	CB_COMPOUND4res		cb4_res;
836 	CB_RECALL4args		*rec_argp;
837 	CB_RECALL4res		*rec_resp;
838 	nfs_cb_argop4		*argop;
839 	int			numops;
840 	int			argoplist_size;
841 	struct timeval		timeout;
842 	nfs_fh4			*fhp;
843 	enum clnt_stat		call_stat;
844 
845 	/*
846 	 * set up the compound args
847 	 */
848 	numops = 1;	/* CB_RECALL only */
849 
850 	argoplist_size = numops * sizeof (nfs_cb_argop4);
851 	argop = kmem_zalloc(argoplist_size, KM_SLEEP);
852 	argop->argop = OP_CB_RECALL;
853 	rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
854 
855 	(void) str_to_utf8("cb_recall", &cb4_args.tag);
856 	cb4_args.minorversion = CB4_MINORVERSION;
857 	/* cb4_args.callback_ident is set in rfs4_do_callback() */
858 	cb4_args.array_len = numops;
859 	cb4_args.array = argop;
860 
861 	/*
862 	 * fill in the args struct
863 	 */
864 	bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
865 	rec_argp->truncate = trunc;
866 
867 	fhp = &dsp->rds_finfo->rf_filehandle;
868 	rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
869 	    fhp->nfs_fh4_len, KM_SLEEP);
870 	nfs_fh4_copy(fhp, &rec_argp->fh);
871 
872 	/* Keep track of when we did this for observability */
873 	dsp->rds_time_recalled = gethrestime_sec();
874 
875 	/*
876 	 * Set up the timeout for the callback and make the actual call.
877 	 * Timeout will be 80% of the lease period for this server.
878 	 */
879 	timeout.tv_sec = (rfs4_lease_time * 80) / 100;
880 	timeout.tv_usec = 0;
881 
882 	DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
883 	    rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
884 
885 	call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
886 	    timeout);
887 
888 	rec_resp = (cb4_res.array_len == 0) ? NULL :
889 	    &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
890 
891 	DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
892 	    rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
893 
894 	if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
895 		rfs4_return_deleg(dsp, TRUE);
896 	}
897 
898 	rfs4freeargres(&cb4_args, &cb4_res);
899 }
900 
901 struct recall_arg {
902 	rfs4_deleg_state_t *dsp;
903 	void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
904 	bool_t trunc;
905 };
906 
907 static void
do_recall(struct recall_arg * arg)908 do_recall(struct recall_arg *arg)
909 {
910 	rfs4_deleg_state_t *dsp = arg->dsp;
911 	rfs4_file_t *fp = dsp->rds_finfo;
912 	callb_cpr_t cpr_info;
913 	kmutex_t cpr_lock;
914 
915 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
916 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
917 
918 	/*
919 	 * It is possible that before this thread starts
920 	 * the client has send us a return_delegation, and
921 	 * if that is the case we do not need to send the
922 	 * recall callback.
923 	 */
924 	if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
925 		DTRACE_PROBE3(nfss__i__recall,
926 		    struct recall_arg *, arg,
927 		    struct rfs4_deleg_state_t *, dsp,
928 		    struct rfs4_file_t *, fp);
929 
930 		if (arg->recall)
931 			(void) (*arg->recall)(dsp, arg->trunc);
932 	}
933 
934 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
935 	/*
936 	 * Recall count may go negative if the parent thread that is
937 	 * creating the individual callback threads does not modify
938 	 * the recall_count field before the callback thread actually
939 	 * gets a response from the CB_RECALL
940 	 */
941 	fp->rf_dinfo.rd_recall_count--;
942 	if (fp->rf_dinfo.rd_recall_count == 0)
943 		cv_signal(fp->rf_dinfo.rd_recall_cv);
944 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
945 
946 	mutex_enter(&cpr_lock);
947 	CALLB_CPR_EXIT(&cpr_info);
948 	mutex_destroy(&cpr_lock);
949 
950 	rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
951 
952 	kmem_free(arg, sizeof (struct recall_arg));
953 }
954 
955 struct master_recall_args {
956     rfs4_file_t *fp;
957     void (*recall)(rfs4_deleg_state_t *, bool_t);
958     bool_t trunc;
959 };
960 
961 static void
do_recall_file(struct master_recall_args * map)962 do_recall_file(struct master_recall_args *map)
963 {
964 	rfs4_file_t *fp = map->fp;
965 	rfs4_deleg_state_t *dsp;
966 	struct recall_arg *arg;
967 	callb_cpr_t cpr_info;
968 	kmutex_t cpr_lock;
969 	int32_t recall_count;
970 
971 	rfs4_dbe_lock(fp->rf_dbe);
972 
973 	/* Recall already in progress ? */
974 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
975 	if (fp->rf_dinfo.rd_recall_count != 0) {
976 		mutex_exit(fp->rf_dinfo.rd_recall_lock);
977 		rfs4_dbe_rele_nolock(fp->rf_dbe);
978 		rfs4_dbe_unlock(fp->rf_dbe);
979 		kmem_free(map, sizeof (struct master_recall_args));
980 		return;
981 	}
982 
983 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
984 
985 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
986 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,	"v4RecallFile");
987 
988 	recall_count = 0;
989 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
990 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
991 
992 		rfs4_dbe_lock(dsp->rds_dbe);
993 		/*
994 		 * if this delegation state
995 		 * is being reaped skip it
996 		 */
997 		if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
998 			rfs4_dbe_unlock(dsp->rds_dbe);
999 			continue;
1000 		}
1001 
1002 		/* hold for receiving thread */
1003 		rfs4_dbe_hold(dsp->rds_dbe);
1004 		rfs4_dbe_unlock(dsp->rds_dbe);
1005 
1006 		arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1007 		arg->recall = map->recall;
1008 		arg->trunc = map->trunc;
1009 		arg->dsp = dsp;
1010 
1011 		recall_count++;
1012 
1013 		(void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1014 		    minclsyspri);
1015 	}
1016 
1017 	rfs4_dbe_unlock(fp->rf_dbe);
1018 
1019 	mutex_enter(fp->rf_dinfo.rd_recall_lock);
1020 	/*
1021 	 * Recall count may go negative if the parent thread that is
1022 	 * creating the individual callback threads does not modify
1023 	 * the recall_count field before the callback thread actually
1024 	 * gets a response from the CB_RECALL
1025 	 */
1026 	fp->rf_dinfo.rd_recall_count += recall_count;
1027 	while (fp->rf_dinfo.rd_recall_count)
1028 		cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1029 
1030 	mutex_exit(fp->rf_dinfo.rd_recall_lock);
1031 
1032 	DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1033 	rfs4_file_rele(fp);
1034 	kmem_free(map, sizeof (struct master_recall_args));
1035 	mutex_enter(&cpr_lock);
1036 	CALLB_CPR_EXIT(&cpr_info);
1037 	mutex_destroy(&cpr_lock);
1038 }
1039 
1040 static void
rfs4_recall_file(rfs4_file_t * fp,void (* recall)(rfs4_deleg_state_t *,bool_t trunc),bool_t trunc,rfs4_client_t * cp)1041 rfs4_recall_file(rfs4_file_t *fp,
1042     void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1043     bool_t trunc, rfs4_client_t *cp)
1044 {
1045 	struct master_recall_args *args;
1046 
1047 	rfs4_dbe_lock(fp->rf_dbe);
1048 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1049 		rfs4_dbe_unlock(fp->rf_dbe);
1050 		return;
1051 	}
1052 	rfs4_dbe_hold(fp->rf_dbe);	/* hold for new thread */
1053 
1054 	/*
1055 	 * Mark the time we started the recall processing.
1056 	 * If it has been previously recalled, do not reset the
1057 	 * timer since this is used for the revocation decision.
1058 	 */
1059 	if (fp->rf_dinfo.rd_time_recalled == 0)
1060 		fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1061 	fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1062 	/* Client causing recall not always available */
1063 	if (cp)
1064 		fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1065 
1066 	rfs4_dbe_unlock(fp->rf_dbe);
1067 
1068 	args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1069 	args->fp = fp;
1070 	args->recall = recall;
1071 	args->trunc = trunc;
1072 
1073 	(void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1074 	    minclsyspri);
1075 }
1076 
1077 void
rfs4_recall_deleg(rfs4_file_t * fp,bool_t trunc,rfs4_client_t * cp)1078 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1079 {
1080 	time_t elapsed1, elapsed2;
1081 
1082 	if (fp->rf_dinfo.rd_time_recalled != 0) {
1083 		elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1084 		elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1085 		/* First check to see if a revocation should occur */
1086 		if (elapsed1 > rfs4_lease_time &&
1087 		    elapsed2 > rfs4_lease_time) {
1088 			rfs4_revoke_file(fp);
1089 			return;
1090 		}
1091 		/*
1092 		 * Next check to see if a recall should be done again
1093 		 * so quickly.
1094 		 */
1095 		if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1096 			return;
1097 	}
1098 	rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1099 }
1100 
1101 /*
1102  * rfs4_check_recall is called from rfs4_do_open to determine if the current
1103  * open conflicts with the delegation.
1104  * Return true if we need recall otherwise false.
1105  * Assumes entry locks for sp and sp->rs_finfo are held.
1106  */
1107 bool_t
rfs4_check_recall(rfs4_state_t * sp,uint32_t access)1108 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1109 {
1110 	open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1111 
1112 	switch (dtype) {
1113 	case OPEN_DELEGATE_NONE:
1114 		/* Not currently delegated so there is nothing to do */
1115 		return (FALSE);
1116 	case OPEN_DELEGATE_READ:
1117 		/*
1118 		 * If the access is only asking for READ then there is
1119 		 * no conflict and nothing to do.  If it is asking
1120 		 * for write, then there will be conflict and the read
1121 		 * delegation should be recalled.
1122 		 */
1123 		if (access == OPEN4_SHARE_ACCESS_READ)
1124 			return (FALSE);
1125 		else
1126 			return (TRUE);
1127 	case OPEN_DELEGATE_WRITE:
1128 		/* Check to see if this client has the delegation */
1129 		return (rfs4_is_deleg(sp));
1130 	}
1131 
1132 	return (FALSE);
1133 }
1134 
1135 /*
1136  * Return the "best" allowable delegation available given the current
1137  * delegation type and the desired access and deny modes on the file.
1138  * At the point that this routine is called we know that the access and
1139  * deny modes are consistent with the file modes.
1140  */
1141 static open_delegation_type4
rfs4_check_delegation(rfs4_state_t * sp,rfs4_file_t * fp)1142 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1143 {
1144 	open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1145 	uint32_t access = sp->rs_share_access;
1146 	uint32_t deny = sp->rs_share_deny;
1147 	int readcnt = 0;
1148 	int writecnt = 0;
1149 
1150 	switch (dtype) {
1151 	case OPEN_DELEGATE_NONE:
1152 		/*
1153 		 * Determine if more than just this OPEN have the file
1154 		 * open and if so, no delegation may be provided to
1155 		 * the client.
1156 		 */
1157 		if (access & OPEN4_SHARE_ACCESS_WRITE)
1158 			writecnt++;
1159 		if (access & OPEN4_SHARE_ACCESS_READ)
1160 			readcnt++;
1161 
1162 		if (fp->rf_access_read > readcnt ||
1163 		    fp->rf_access_write > writecnt)
1164 			return (OPEN_DELEGATE_NONE);
1165 
1166 		/*
1167 		 * If the client is going to write, or if the client
1168 		 * has exclusive access, return a write delegation.
1169 		 */
1170 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1171 		    (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1172 			return (OPEN_DELEGATE_WRITE);
1173 		/*
1174 		 * If we don't want to write or we've haven't denied read
1175 		 * access to others, return a read delegation.
1176 		 */
1177 		if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1178 		    (deny & ~OPEN4_SHARE_DENY_READ))
1179 			return (OPEN_DELEGATE_READ);
1180 
1181 		/* Shouldn't get here */
1182 		return (OPEN_DELEGATE_NONE);
1183 
1184 	case OPEN_DELEGATE_READ:
1185 		/*
1186 		 * If the file is delegated for read but we wan't to
1187 		 * write or deny others to read then we can't delegate
1188 		 * the file. We shouldn't get here since the delegation should
1189 		 * have been recalled already.
1190 		 */
1191 		if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1192 		    (deny & OPEN4_SHARE_DENY_READ))
1193 			return (OPEN_DELEGATE_NONE);
1194 		return (OPEN_DELEGATE_READ);
1195 
1196 	case OPEN_DELEGATE_WRITE:
1197 		return (OPEN_DELEGATE_WRITE);
1198 	}
1199 
1200 	/* Shouldn't get here */
1201 	return (OPEN_DELEGATE_NONE);
1202 }
1203 
1204 /*
1205  * Given the desired delegation type and the "history" of the file
1206  * determine the actual delegation type to return.
1207  */
1208 static open_delegation_type4
rfs4_delegation_policy(open_delegation_type4 dtype,rfs4_dinfo_t * dinfo,clientid4 cid)1209 rfs4_delegation_policy(open_delegation_type4 dtype,
1210     rfs4_dinfo_t *dinfo, clientid4 cid)
1211 {
1212 	time_t elapsed;
1213 
1214 	if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1215 		return (OPEN_DELEGATE_NONE);
1216 
1217 	/*
1218 	 * Has this file/delegation ever been recalled?  If not then
1219 	 * no further checks for a delegation race need to be done.
1220 	 * However if a recall has occurred, then check to see if a
1221 	 * client has caused its own delegation recall to occur.  If
1222 	 * not, then has a delegation for this file been returned
1223 	 * recently?  If so, then do not assign a new delegation to
1224 	 * avoid a "delegation race" between the original client and
1225 	 * the new/conflicting client.
1226 	 */
1227 	if (dinfo->rd_ever_recalled == TRUE) {
1228 		if (dinfo->rd_conflicted_client != cid) {
1229 			elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1230 			if (elapsed < rfs4_lease_time)
1231 				return (OPEN_DELEGATE_NONE);
1232 		}
1233 	}
1234 
1235 	/* Limit the number of read grants */
1236 	if (dtype == OPEN_DELEGATE_READ &&
1237 	    dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1238 		return (OPEN_DELEGATE_NONE);
1239 
1240 	/*
1241 	 * Should consider limiting total number of read/write
1242 	 * delegations the server will permit.
1243 	 */
1244 
1245 	return (dtype);
1246 }
1247 
1248 /*
1249  * Try and grant a delegation for an open give the state. The routine
1250  * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1251  *
1252  * The state and associate file entry must be locked
1253  */
1254 rfs4_deleg_state_t *
rfs4_grant_delegation(delegreq_t dreq,rfs4_state_t * sp,int * recall)1255 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1256 {
1257 	rfs4_file_t *fp = sp->rs_finfo;
1258 	open_delegation_type4 dtype;
1259 	int no_delegation;
1260 
1261 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1262 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1263 
1264 	/* Is the server even providing delegations? */
1265 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1266 		return (NULL);
1267 
1268 	/* Check to see if delegations have been temporarily disabled */
1269 	mutex_enter(&rfs4_deleg_lock);
1270 	no_delegation = rfs4_deleg_disabled;
1271 	mutex_exit(&rfs4_deleg_lock);
1272 
1273 	if (no_delegation)
1274 		return (NULL);
1275 
1276 	/* Don't grant a delegation if a deletion is impending. */
1277 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1278 		return (NULL);
1279 	}
1280 
1281 	/*
1282 	 * Don't grant a delegation if there are any lock manager
1283 	 * (NFSv2/v3) locks for the file.  This is a bit of a hack (e.g.,
1284 	 * if there are only read locks we should be able to grant a
1285 	 * read-only delegation), but it's good enough for now.
1286 	 *
1287 	 * MT safety: the lock manager checks for conflicting delegations
1288 	 * before processing a lock request.  That check will block until
1289 	 * we are done here.  So if the lock manager acquires a lock after
1290 	 * we decide to grant the delegation, the delegation will get
1291 	 * immediately recalled (if there's a conflict), so we're safe.
1292 	 */
1293 	if (lm_vp_active(fp->rf_vp)) {
1294 		return (NULL);
1295 	}
1296 
1297 	/*
1298 	 * Based on the type of delegation request passed in, take the
1299 	 * appropriate action (DELEG_NONE is handled above)
1300 	 */
1301 	switch (dreq) {
1302 
1303 	case DELEG_READ:
1304 	case DELEG_WRITE:
1305 		/*
1306 		 * The server "must" grant the delegation in this case.
1307 		 * Client is using open previous
1308 		 */
1309 		dtype = (open_delegation_type4)dreq;
1310 		*recall = 1;
1311 		break;
1312 	case DELEG_ANY:
1313 		/*
1314 		 * If a valid callback path does not exist, no delegation may
1315 		 * be granted.
1316 		 */
1317 		if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1318 			return (NULL);
1319 
1320 		/*
1321 		 * If the original operation which caused time_rm_delayed
1322 		 * to be set hasn't been retried and completed for one
1323 		 * full lease period, clear it and allow delegations to
1324 		 * get granted again.
1325 		 */
1326 		if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1327 		    gethrestime_sec() >
1328 		    fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1329 			fp->rf_dinfo.rd_time_rm_delayed = 0;
1330 
1331 		/*
1332 		 * If we are waiting for a delegation to be returned then
1333 		 * don't delegate this file. We do this for correctness as
1334 		 * well as if the file is being recalled we would likely
1335 		 * recall this file again.
1336 		 */
1337 
1338 		if (fp->rf_dinfo.rd_time_recalled != 0 ||
1339 		    fp->rf_dinfo.rd_time_rm_delayed != 0)
1340 			return (NULL);
1341 
1342 		/* Get the "best" delegation candidate */
1343 		dtype = rfs4_check_delegation(sp, fp);
1344 
1345 		if (dtype == OPEN_DELEGATE_NONE)
1346 			return (NULL);
1347 
1348 		/*
1349 		 * Based on policy and the history of the file get the
1350 		 * actual delegation.
1351 		 */
1352 		dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1353 		    sp->rs_owner->ro_client->rc_clientid);
1354 
1355 		if (dtype == OPEN_DELEGATE_NONE)
1356 			return (NULL);
1357 		break;
1358 	default:
1359 		return (NULL);
1360 	}
1361 
1362 	/* set the delegation for the state */
1363 	return (rfs4_deleg_state(sp, dtype, recall));
1364 }
1365 
1366 void
rfs4_set_deleg_response(rfs4_deleg_state_t * dsp,open_delegation4 * dp,nfsace4 * ace,int recall)1367 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1368     nfsace4 *ace,  int recall)
1369 {
1370 	open_write_delegation4 *wp;
1371 	open_read_delegation4 *rp;
1372 	nfs_space_limit4 *spl;
1373 	nfsace4 nace;
1374 
1375 	/*
1376 	 * We need to allocate a new copy of the who string.
1377 	 * this string will be freed by the rfs4_op_open dis_resfree
1378 	 * routine. We need to do this allocation since replays will
1379 	 * be allocated and rfs4_compound can't tell the difference from
1380 	 * a replay and an inital open. N.B. if an ace is passed in, it
1381 	 * the caller's responsibility to free it.
1382 	 */
1383 
1384 	if (ace == NULL) {
1385 		/*
1386 		 * Default is to deny all access, the client will have
1387 		 * to contact the server.  XXX Do we want to actually
1388 		 * set a deny for every one, or do we simply want to
1389 		 * construct an entity that will match no one?
1390 		 */
1391 		nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1392 		nace.flag = 0;
1393 		nace.access_mask = ACE4_VALID_MASK_BITS;
1394 		(void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1395 	} else {
1396 		nace.type = ace->type;
1397 		nace.flag = ace->flag;
1398 		nace.access_mask = ace->access_mask;
1399 		(void) utf8_copy(&ace->who, &nace.who);
1400 	}
1401 
1402 	dp->delegation_type = dsp->rds_dtype;
1403 
1404 	switch (dsp->rds_dtype) {
1405 	case OPEN_DELEGATE_NONE:
1406 		break;
1407 	case OPEN_DELEGATE_READ:
1408 		rp = &dp->open_delegation4_u.read;
1409 		rp->stateid = dsp->rds_delegid.stateid;
1410 		rp->recall = (bool_t)recall;
1411 		rp->permissions = nace;
1412 		break;
1413 	case OPEN_DELEGATE_WRITE:
1414 		wp = &dp->open_delegation4_u.write;
1415 		wp->stateid = dsp->rds_delegid.stateid;
1416 		wp->recall = (bool_t)recall;
1417 		spl = &wp->space_limit;
1418 		spl->limitby = NFS_LIMIT_SIZE;
1419 		spl->nfs_space_limit4_u.filesize = 0;
1420 		wp->permissions = nace;
1421 		break;
1422 	}
1423 }
1424 
1425 /*
1426  * Check if the file is delegated via the provided file struct.
1427  * Return TRUE if it is delegated.  This is intended for use by
1428  * the v4 server.  The v2/v3 server code should use rfs4_check_delegated().
1429  *
1430  * Note that if the file is found to have a delegation, it is
1431  * recalled, unless the clientid of the caller matches the clientid of the
1432  * delegation. If the caller has specified, there is a slight delay
1433  * inserted in the hopes that the delegation will be returned quickly.
1434  */
1435 bool_t
rfs4_check_delegated_byfp(int mode,rfs4_file_t * fp,bool_t trunc,bool_t do_delay,bool_t is_rm,clientid4 * cp)1436 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1437     bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1438 {
1439 	rfs4_deleg_state_t *dsp;
1440 
1441 	/* Is delegation enabled? */
1442 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1443 		return (FALSE);
1444 
1445 	/* do we have a delegation on this file? */
1446 	rfs4_dbe_lock(fp->rf_dbe);
1447 	if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1448 		if (is_rm)
1449 			fp->rf_dinfo.rd_hold_grant++;
1450 		rfs4_dbe_unlock(fp->rf_dbe);
1451 		return (FALSE);
1452 	}
1453 	/*
1454 	 * do we have a write delegation on this file or are we
1455 	 * requesting write access to a file with any type of existing
1456 	 * delegation?
1457 	 */
1458 	if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1459 		if (cp != NULL) {
1460 			dsp = list_head(&fp->rf_delegstatelist);
1461 			if (dsp == NULL) {
1462 				rfs4_dbe_unlock(fp->rf_dbe);
1463 				return (FALSE);
1464 			}
1465 			/*
1466 			 * Does the requestor already own the delegation?
1467 			 */
1468 			if (dsp->rds_client->rc_clientid == *(cp)) {
1469 				rfs4_dbe_unlock(fp->rf_dbe);
1470 				return (FALSE);
1471 			}
1472 		}
1473 
1474 		rfs4_dbe_unlock(fp->rf_dbe);
1475 		rfs4_recall_deleg(fp, trunc, NULL);
1476 
1477 		if (!do_delay) {
1478 			rfs4_dbe_lock(fp->rf_dbe);
1479 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1480 			rfs4_dbe_unlock(fp->rf_dbe);
1481 			return (TRUE);
1482 		}
1483 
1484 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
1485 
1486 		rfs4_dbe_lock(fp->rf_dbe);
1487 		if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1488 			fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1489 			rfs4_dbe_unlock(fp->rf_dbe);
1490 			return (TRUE);
1491 		}
1492 	}
1493 	if (is_rm)
1494 		fp->rf_dinfo.rd_hold_grant++;
1495 	rfs4_dbe_unlock(fp->rf_dbe);
1496 	return (FALSE);
1497 }
1498 
1499 /*
1500  * Check if the file is delegated in the case of a v2 or v3 access.
1501  * Return TRUE if it is delegated which in turn means that v2 should
1502  * drop the request and in the case of v3 JUKEBOX should be returned.
1503  */
1504 bool_t
rfs4_check_delegated(int mode,vnode_t * vp,bool_t trunc)1505 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1506 {
1507 	rfs4_file_t *fp;
1508 	bool_t create = FALSE;
1509 	bool_t rc = FALSE;
1510 
1511 	rfs4_hold_deleg_policy();
1512 
1513 	/* Is delegation enabled? */
1514 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1515 		fp = rfs4_findfile(vp, NULL, &create);
1516 		if (fp != NULL) {
1517 			if (rfs4_check_delegated_byfp(mode, fp, trunc,
1518 			    TRUE, FALSE, NULL)) {
1519 				rc = TRUE;
1520 			}
1521 			rfs4_file_rele(fp);
1522 		}
1523 	}
1524 	rfs4_rele_deleg_policy();
1525 	return (rc);
1526 }
1527 
1528 /*
1529  * Release a hold on the hold_grant counter which
1530  * prevents delegation from being granted while a remove
1531  * or a rename is in progress.
1532  */
1533 void
rfs4_clear_dont_grant(rfs4_file_t * fp)1534 rfs4_clear_dont_grant(rfs4_file_t *fp)
1535 {
1536 	if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1537 		return;
1538 	rfs4_dbe_lock(fp->rf_dbe);
1539 	ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1540 	fp->rf_dinfo.rd_hold_grant--;
1541 	fp->rf_dinfo.rd_time_rm_delayed = 0;
1542 	rfs4_dbe_unlock(fp->rf_dbe);
1543 }
1544 
1545 /*
1546  * State support for delegation.
1547  * Set the state delegation type for this state;
1548  * This routine is called from open via rfs4_grant_delegation and the entry
1549  * locks on sp and sp->rs_finfo are assumed.
1550  */
1551 static rfs4_deleg_state_t *
rfs4_deleg_state(rfs4_state_t * sp,open_delegation_type4 dtype,int * recall)1552 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1553 {
1554 	rfs4_file_t *fp = sp->rs_finfo;
1555 	bool_t create = TRUE;
1556 	rfs4_deleg_state_t *dsp;
1557 	vnode_t *vp;
1558 	int open_prev = *recall;
1559 	int ret;
1560 	int fflags = 0;
1561 
1562 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1563 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1564 
1565 	/* Shouldn't happen */
1566 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1567 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1568 	    dtype != OPEN_DELEGATE_READ)) {
1569 		return (NULL);
1570 	}
1571 
1572 	/* Unlock to avoid deadlock */
1573 	rfs4_dbe_unlock(fp->rf_dbe);
1574 	rfs4_dbe_unlock(sp->rs_dbe);
1575 
1576 	dsp = rfs4_finddeleg(sp, &create);
1577 
1578 	rfs4_dbe_lock(sp->rs_dbe);
1579 	rfs4_dbe_lock(fp->rf_dbe);
1580 
1581 	if (dsp == NULL)
1582 		return (NULL);
1583 
1584 	/*
1585 	 * It is possible that since we dropped the lock
1586 	 * in order to call finddeleg, the rfs4_file_t
1587 	 * was marked such that we should not grant a
1588 	 * delegation, if so bail out.
1589 	 */
1590 	if (fp->rf_dinfo.rd_hold_grant > 0) {
1591 		rfs4_deleg_state_rele(dsp);
1592 		return (NULL);
1593 	}
1594 
1595 	if (create == FALSE) {
1596 		if (sp->rs_owner->ro_client == dsp->rds_client &&
1597 		    dsp->rds_dtype == dtype) {
1598 			return (dsp);
1599 		} else {
1600 			rfs4_deleg_state_rele(dsp);
1601 			return (NULL);
1602 		}
1603 	}
1604 
1605 	/*
1606 	 * Check that this file has not been delegated to another
1607 	 * client
1608 	 */
1609 	if (fp->rf_dinfo.rd_recall_count != 0 ||
1610 	    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1611 	    (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1612 	    dtype != OPEN_DELEGATE_READ)) {
1613 		rfs4_deleg_state_rele(dsp);
1614 		return (NULL);
1615 	}
1616 
1617 	vp = fp->rf_vp;
1618 	/* vnevent_support returns 0 if file system supports vnevents */
1619 	if (vnevent_support(vp, NULL)) {
1620 		rfs4_deleg_state_rele(dsp);
1621 		return (NULL);
1622 	}
1623 
1624 	/* Calculate the fflags for this OPEN. */
1625 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1626 		fflags |= FREAD;
1627 	if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1628 		fflags |= FWRITE;
1629 
1630 	*recall = 0;
1631 	/*
1632 	 * Before granting a delegation we need to know if anyone else has
1633 	 * opened the file in a conflicting mode.  However, first we need to
1634 	 * know how we opened the file to check the counts properly.
1635 	 */
1636 	if (dtype == OPEN_DELEGATE_READ) {
1637 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1638 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1639 		    vn_is_mapped(vp, V_WRITE)) {
1640 			if (open_prev) {
1641 				*recall = 1;
1642 			} else {
1643 				rfs4_deleg_state_rele(dsp);
1644 				return (NULL);
1645 			}
1646 		}
1647 		ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1648 		    rfs4_mon_hold, rfs4_mon_rele);
1649 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1650 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1651 		    vn_is_mapped(vp, V_WRITE)) {
1652 			if (open_prev) {
1653 				*recall = 1;
1654 			} else {
1655 				(void) fem_uninstall(vp, deleg_rdops,
1656 				    (void *)fp);
1657 				rfs4_deleg_state_rele(dsp);
1658 				return (NULL);
1659 			}
1660 		}
1661 		/*
1662 		 * Because a client can hold onto a delegation after the
1663 		 * file has been closed, we need to keep track of the
1664 		 * access to this file.  Otherwise the CIFS server would
1665 		 * not know about the client accessing the file and could
1666 		 * inappropriately grant an OPLOCK.
1667 		 * fem_install() returns EBUSY when asked to install a
1668 		 * OPUNIQ monitor more than once.  Therefore, check the
1669 		 * return code because we only want this done once.
1670 		 */
1671 		if (ret == 0)
1672 			vn_open_upgrade(vp, FREAD);
1673 	} else { /* WRITE */
1674 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1675 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1676 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1677 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1678 		    vn_is_mapped(vp, V_RDORWR)) {
1679 			if (open_prev) {
1680 				*recall = 1;
1681 			} else {
1682 				rfs4_deleg_state_rele(dsp);
1683 				return (NULL);
1684 			}
1685 		}
1686 		ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1687 		    rfs4_mon_hold, rfs4_mon_rele);
1688 		if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1689 		    (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1690 		    ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1691 		    (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1692 		    vn_is_mapped(vp, V_RDORWR)) {
1693 			if (open_prev) {
1694 				*recall = 1;
1695 			} else {
1696 				(void) fem_uninstall(vp, deleg_wrops,
1697 				    (void *)fp);
1698 				rfs4_deleg_state_rele(dsp);
1699 				return (NULL);
1700 			}
1701 		}
1702 		/*
1703 		 * Because a client can hold onto a delegation after the
1704 		 * file has been closed, we need to keep track of the
1705 		 * access to this file.  Otherwise the CIFS server would
1706 		 * not know about the client accessing the file and could
1707 		 * inappropriately grant an OPLOCK.
1708 		 * fem_install() returns EBUSY when asked to install a
1709 		 * OPUNIQ monitor more than once.  Therefore, check the
1710 		 * return code because we only want this done once.
1711 		 */
1712 		if (ret == 0)
1713 			vn_open_upgrade(vp, FREAD|FWRITE);
1714 	}
1715 	/* Place on delegation list for file */
1716 	ASSERT(!list_link_active(&dsp->rds_node));
1717 	list_insert_tail(&fp->rf_delegstatelist, dsp);
1718 
1719 	dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1720 
1721 	/* Update delegation stats for this file */
1722 	fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1723 
1724 	/* reset since this is a new delegation */
1725 	fp->rf_dinfo.rd_conflicted_client = 0;
1726 	fp->rf_dinfo.rd_ever_recalled = FALSE;
1727 
1728 	if (dtype == OPEN_DELEGATE_READ)
1729 		fp->rf_dinfo.rd_rdgrants++;
1730 	else
1731 		fp->rf_dinfo.rd_wrgrants++;
1732 
1733 	return (dsp);
1734 }
1735 
1736 /*
1737  * State routine for the server when a delegation is returned.
1738  */
1739 void
rfs4_return_deleg(rfs4_deleg_state_t * dsp,bool_t revoked)1740 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1741 {
1742 	rfs4_file_t *fp = dsp->rds_finfo;
1743 	open_delegation_type4 dtypewas;
1744 
1745 	rfs4_dbe_lock(fp->rf_dbe);
1746 
1747 	/* nothing to do if no longer on list */
1748 	if (!list_link_active(&dsp->rds_node)) {
1749 		rfs4_dbe_unlock(fp->rf_dbe);
1750 		return;
1751 	}
1752 
1753 	/* Remove state from recall list */
1754 	list_remove(&fp->rf_delegstatelist, dsp);
1755 
1756 	if (list_is_empty(&fp->rf_delegstatelist)) {
1757 		dtypewas = fp->rf_dinfo.rd_dtype;
1758 		fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1759 		rfs4_dbe_cv_broadcast(fp->rf_dbe);
1760 
1761 		/* if file system was unshared, the vp will be NULL */
1762 		if (fp->rf_vp != NULL) {
1763 			/*
1764 			 * Once a delegation is no longer held by any client,
1765 			 * the monitor is uninstalled.  At this point, the
1766 			 * client must send OPEN otw, so we don't need the
1767 			 * reference on the vnode anymore.  The open
1768 			 * downgrade removes the reference put on earlier.
1769 			 */
1770 			if (dtypewas == OPEN_DELEGATE_READ) {
1771 				(void) fem_uninstall(fp->rf_vp, deleg_rdops,
1772 				    (void *)fp);
1773 				vn_open_downgrade(fp->rf_vp, FREAD);
1774 			} else if (dtypewas == OPEN_DELEGATE_WRITE) {
1775 				(void) fem_uninstall(fp->rf_vp, deleg_wrops,
1776 				    (void *)fp);
1777 				vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1778 			}
1779 		}
1780 	}
1781 
1782 	switch (dsp->rds_dtype) {
1783 	case OPEN_DELEGATE_READ:
1784 		fp->rf_dinfo.rd_rdgrants--;
1785 		break;
1786 	case OPEN_DELEGATE_WRITE:
1787 		fp->rf_dinfo.rd_wrgrants--;
1788 		break;
1789 	default:
1790 		break;
1791 	}
1792 
1793 	/* used in the policy decision */
1794 	fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1795 
1796 	/*
1797 	 * reset the time_recalled field so future delegations are not
1798 	 * accidentally revoked
1799 	 */
1800 	if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1801 		fp->rf_dinfo.rd_time_recalled = 0;
1802 
1803 	rfs4_dbe_unlock(fp->rf_dbe);
1804 
1805 	rfs4_dbe_lock(dsp->rds_dbe);
1806 
1807 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
1808 
1809 	if (revoked == TRUE)
1810 		dsp->rds_time_revoked = gethrestime_sec();
1811 
1812 	rfs4_dbe_invalidate(dsp->rds_dbe);
1813 
1814 	rfs4_dbe_unlock(dsp->rds_dbe);
1815 
1816 	if (revoked == TRUE) {
1817 		rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1818 		dsp->rds_client->rc_deleg_revoked++;	/* observability */
1819 		rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1820 	}
1821 }
1822 
1823 static void
rfs4_revoke_file(rfs4_file_t * fp)1824 rfs4_revoke_file(rfs4_file_t *fp)
1825 {
1826 	rfs4_deleg_state_t *dsp;
1827 
1828 	/*
1829 	 * The lock for rfs4_file_t must be held when traversing the
1830 	 * delegation list but that lock needs to be released to call
1831 	 * rfs4_return_deleg()
1832 	 */
1833 	rfs4_dbe_lock(fp->rf_dbe);
1834 	while (dsp = list_head(&fp->rf_delegstatelist)) {
1835 		rfs4_dbe_hold(dsp->rds_dbe);
1836 		rfs4_dbe_unlock(fp->rf_dbe);
1837 		rfs4_return_deleg(dsp, TRUE);
1838 		rfs4_deleg_state_rele(dsp);
1839 		rfs4_dbe_lock(fp->rf_dbe);
1840 	}
1841 	rfs4_dbe_unlock(fp->rf_dbe);
1842 }
1843 
1844 /*
1845  * A delegation is assumed to be present on the file associated with
1846  * "sp".  Check to see if the delegation matches is associated with
1847  * the same client as referenced by "sp".  If it is not, TRUE is
1848  * returned.  If the delegation DOES match the client (or no
1849  * delegation is present), return FALSE.
1850  * Assume the state entry and file entry are locked.
1851  */
1852 bool_t
rfs4_is_deleg(rfs4_state_t * sp)1853 rfs4_is_deleg(rfs4_state_t *sp)
1854 {
1855 	rfs4_deleg_state_t *dsp;
1856 	rfs4_file_t *fp = sp->rs_finfo;
1857 	rfs4_client_t *cp = sp->rs_owner->ro_client;
1858 
1859 	ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1860 	for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1861 	    dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1862 		if (cp != dsp->rds_client) {
1863 			return (TRUE);
1864 		}
1865 	}
1866 	return (FALSE);
1867 }
1868 
1869 void
rfs4_disable_delegation(void)1870 rfs4_disable_delegation(void)
1871 {
1872 	mutex_enter(&rfs4_deleg_lock);
1873 	rfs4_deleg_disabled++;
1874 	mutex_exit(&rfs4_deleg_lock);
1875 }
1876 
1877 void
rfs4_enable_delegation(void)1878 rfs4_enable_delegation(void)
1879 {
1880 	mutex_enter(&rfs4_deleg_lock);
1881 	ASSERT(rfs4_deleg_disabled > 0);
1882 	rfs4_deleg_disabled--;
1883 	mutex_exit(&rfs4_deleg_lock);
1884 }
1885 
1886 void
rfs4_mon_hold(void * arg)1887 rfs4_mon_hold(void *arg)
1888 {
1889 	rfs4_file_t *fp = arg;
1890 
1891 	rfs4_dbe_hold(fp->rf_dbe);
1892 }
1893 
1894 void
rfs4_mon_rele(void * arg)1895 rfs4_mon_rele(void *arg)
1896 {
1897 	rfs4_file_t *fp = arg;
1898 
1899 	rfs4_dbe_rele_nolock(fp->rf_dbe);
1900 }
1901