1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/systm.h> 29 #include <rpc/auth.h> 30 #include <rpc/clnt.h> 31 #include <nfs/nfs4_kprot.h> 32 #include <nfs/nfs4.h> 33 #include <nfs/lm.h> 34 #include <sys/cmn_err.h> 35 #include <sys/disp.h> 36 37 #include <sys/pathname.h> 38 39 #include <sys/strsubr.h> 40 #include <sys/ddi.h> 41 42 #include <sys/vnode.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 48 #define MAX_READ_DELEGATIONS 5 49 50 krwlock_t rfs4_deleg_policy_lock; 51 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 52 static int rfs4_deleg_wlp = 5; 53 kmutex_t rfs4_deleg_lock; 54 static int rfs4_deleg_disabled; 55 56 #ifdef DEBUG 57 58 static int rfs4_test_cbgetattr_fail = 0; 59 int rfs4_cb_null; 60 int rfs4_cb_debug; 61 int rfs4_deleg_debug; 62 63 #endif 64 65 static void rfs4_recall_file(rfs4_file_t *, 66 void (*recall)(rfs4_deleg_state_t *, bool_t), 67 bool_t, rfs4_client_t *); 68 static void rfs4_revoke_deleg(rfs4_deleg_state_t *); 69 static void rfs4_revoke_file(rfs4_file_t *); 70 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 71 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 72 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 73 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 74 open_delegation_type4, int *); 75 76 /* 77 * Convert a universal address to an transport specific 78 * address using inet_pton. 79 */ 80 static int 81 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 82 { 83 int dots = 0, i, j, len, k; 84 unsigned char c; 85 in_port_t port = 0; 86 87 len = strlen(ua); 88 89 for (i = len-1; i >= 0; i--) { 90 91 if (ua[i] == '.') 92 dots++; 93 94 if (dots == 2) { 95 96 ua[i] = '\0'; 97 /* 98 * We use k to remember were to stick '.' back, since 99 * ua was kmem_allocateded from the pool len+1. 100 */ 101 k = i; 102 if (inet_pton(af, ua, ap) == 1) { 103 104 c = 0; 105 106 for (j = i+1; j < len; j++) { 107 if (ua[j] == '.') { 108 port = c << 8; 109 c = 0; 110 } else if (ua[j] >= '0' && 111 ua[j] <= '9') { 112 c *= 10; 113 c += ua[j] - '0'; 114 } else { 115 ua[k] = '.'; 116 return (EINVAL); 117 } 118 } 119 port += c; 120 121 122 /* reset to network order */ 123 if (af == AF_INET) { 124 *(uint32_t *)ap = 125 htonl(*(uint32_t *)ap); 126 *pp = htons(port); 127 } else { 128 int ix; 129 uint16_t *sap; 130 131 for (sap = ap, ix = 0; ix < 132 sizeof (struct in6_addr) / 133 sizeof (uint16_t); ix++) 134 sap[ix] = htons(sap[ix]); 135 136 *pp = htons(port); 137 } 138 139 ua[k] = '.'; 140 return (0); 141 } else { 142 ua[k] = '.'; 143 return (EINVAL); 144 } 145 } 146 } 147 148 return (EINVAL); 149 } 150 151 /* 152 * Update the delegation policy with the 153 * value of "new_policy" 154 */ 155 void 156 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 157 { 158 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 159 rfs4_deleg_policy = new_policy; 160 rw_exit(&rfs4_deleg_policy_lock); 161 } 162 163 void 164 rfs4_hold_deleg_policy(void) 165 { 166 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 167 } 168 169 void 170 rfs4_rele_deleg_policy(void) 171 { 172 rw_exit(&rfs4_deleg_policy_lock); 173 } 174 175 176 /* 177 * This free function is to be used when the client struct is being 178 * released and nothing at all is needed of the callback info any 179 * longer. 180 */ 181 void 182 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 183 { 184 char *addr = cbp->cb_callback.cb_location.r_addr; 185 char *netid = cbp->cb_callback.cb_location.r_netid; 186 187 /* Free old address if any */ 188 189 if (addr) 190 kmem_free(addr, strlen(addr) + 1); 191 if (netid) 192 kmem_free(netid, strlen(netid) + 1); 193 194 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 195 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 196 197 if (addr) 198 kmem_free(addr, strlen(addr) + 1); 199 if (netid) 200 kmem_free(netid, strlen(netid) + 1); 201 202 if (cbp->cb_chc_free) { 203 rfs4_cb_chflush(cbp); 204 } 205 } 206 207 /* 208 * The server uses this to check the callback path supplied by the 209 * client. The callback connection is marked "in progress" while this 210 * work is going on and then eventually marked either OK or FAILED. 211 * This work can be done as part of a separate thread and at the end 212 * of this the thread will exit or it may be done such that the caller 213 * will continue with other work. 214 */ 215 static void 216 rfs4_do_cb_null(rfs4_client_t *cp) 217 { 218 struct timeval tv; 219 CLIENT *ch; 220 rfs4_cbstate_t newstate; 221 rfs4_cbinfo_t *cbp = &cp->cbinfo; 222 223 if (cp == NULL) { 224 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 225 "rfs4_do_cb_null: no rfs4_client specified\n")); 226 return; 227 } 228 229 mutex_enter(cbp->cb_lock); 230 /* If another thread is doing CB_NULL RPC then return */ 231 if (cbp->cb_nullcaller == TRUE) { 232 mutex_exit(cbp->cb_lock); 233 rfs4_client_rele(cp); 234 return; 235 } 236 237 /* Mark the cbinfo as having a thread in the NULL callback */ 238 cbp->cb_nullcaller = TRUE; 239 240 /* 241 * Are there other threads still using the cbinfo client 242 * handles? If so, this thread must wait before going and 243 * mucking aroiund with the callback information 244 */ 245 while (cbp->cb_refcnt != 0) 246 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 247 248 /* 249 * This thread itself may find that new callback info has 250 * arrived and is set up to handle this case and redrive the 251 * call to the client's callback server. 252 */ 253 retry: 254 if (cbp->cb_newer.cb_new == TRUE && 255 cbp->cb_newer.cb_confirmed == TRUE) { 256 char *addr = cbp->cb_callback.cb_location.r_addr; 257 char *netid = cbp->cb_callback.cb_location.r_netid; 258 259 /* 260 * Free the old stuff if it exists; may be the first 261 * time through this path 262 */ 263 if (addr) 264 kmem_free(addr, strlen(addr) + 1); 265 if (netid) 266 kmem_free(netid, strlen(netid) + 1); 267 268 /* Move over the addr/netid */ 269 cbp->cb_callback.cb_location.r_addr = 270 cbp->cb_newer.cb_callback.cb_location.r_addr; 271 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 272 cbp->cb_callback.cb_location.r_netid = 273 cbp->cb_newer.cb_callback.cb_location.r_netid; 274 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 275 276 /* Get the program number */ 277 cbp->cb_callback.cb_program = 278 cbp->cb_newer.cb_callback.cb_program; 279 cbp->cb_newer.cb_callback.cb_program = 0; 280 281 /* Don't forget the protocol's "cb_ident" field */ 282 cbp->cb_ident = cbp->cb_newer.cb_ident; 283 cbp->cb_newer.cb_ident = 0; 284 285 /* no longer new */ 286 cbp->cb_newer.cb_new = FALSE; 287 cbp->cb_newer.cb_confirmed = FALSE; 288 289 /* get rid of the old client handles that may exist */ 290 rfs4_cb_chflush(cbp); 291 292 cbp->cb_state = CB_NONE; 293 cbp->cb_timefailed = 0; /* reset the clock */ 294 cbp->cb_notified_of_cb_path_down = TRUE; 295 } 296 297 if (cbp->cb_state != CB_NONE) { 298 cv_broadcast(cbp->cb_cv); /* let the others know */ 299 cbp->cb_nullcaller = FALSE; 300 mutex_exit(cbp->cb_lock); 301 rfs4_client_rele(cp); 302 return; 303 } 304 305 /* mark rfs4_client_t as CALLBACK NULL in progress */ 306 cbp->cb_state = CB_INPROG; 307 mutex_exit(cbp->cb_lock); 308 309 /* get/generate a client handle */ 310 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 311 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 312 "rfs4_do_cb_null: failed to get client handle\n")); 313 mutex_enter(cbp->cb_lock); 314 cbp->cb_state = CB_BAD; 315 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 316 goto retry; 317 } 318 319 320 tv.tv_sec = 30; 321 tv.tv_usec = 0; 322 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 323 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 324 "rfs4_do_cb_null: clnt_call failed\n")); 325 326 newstate = CB_BAD; 327 } else { 328 newstate = CB_OK; 329 #ifdef DEBUG 330 rfs4_cb_null++; 331 #endif 332 } 333 334 /* Check to see if the client has specified new callback info */ 335 mutex_enter(cbp->cb_lock); 336 rfs4_cb_freech(cbp, ch, TRUE); 337 if (cbp->cb_newer.cb_new == TRUE && 338 cbp->cb_newer.cb_confirmed == TRUE) { 339 goto retry; /* give the CB_NULL another chance */ 340 } 341 342 cbp->cb_state = newstate; 343 if (cbp->cb_state == CB_BAD) 344 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 345 346 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 347 cbp->cb_nullcaller = FALSE; 348 mutex_exit(cbp->cb_lock); 349 350 rfs4_client_rele(cp); 351 } 352 353 /* 354 * Given a client struct, inspect the callback info to see if the 355 * callback path is up and available. If it is being initialized, 356 * then wait for the CB_NULL RPC call to occur. 357 */ 358 static rfs4_cbinfo_t * 359 rfs4_cbinfo_hold(rfs4_client_t *cp) 360 { 361 rfs4_cbinfo_t *cbp = &cp->cbinfo; 362 363 retry: 364 mutex_enter(cbp->cb_lock); 365 366 if (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 367 /* 368 * Looks like a new callback path may be available and 369 * noone has set it up. 370 */ 371 mutex_exit(cbp->cb_lock); 372 rfs4_dbe_hold(cp->dbe); 373 rfs4_do_cb_null(cp); /* caller will release client hold */ 374 goto retry; 375 } 376 377 /* Is there a thread working on doing the CB_NULL RPC? */ 378 if (cbp->cb_nullcaller == TRUE) 379 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 380 381 /* If the callback path is not okay (up and running), just quit */ 382 if (cbp->cb_state != CB_OK) { 383 mutex_exit(cbp->cb_lock); 384 return (NULL); 385 } 386 387 /* Let someone know we are using the current callback info */ 388 cbp->cb_refcnt++; 389 mutex_exit(cbp->cb_lock); 390 return (cbp); 391 } 392 393 /* 394 * The caller is done with the callback info. It may be that the 395 * caller's RPC failed and the NFSv4 client has actually provided new 396 * callback information. If so, let the caller know so they can 397 * advantage of this and maybe retry the RPC that originally failed. 398 */ 399 static int 400 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 401 { 402 int cb_new = FALSE; 403 404 mutex_enter(cbp->cb_lock); 405 406 /* The caller gets a chance to mark the callback info as bad */ 407 if (newstate != CB_NOCHANGE) 408 cbp->cb_state = newstate; 409 if (newstate == CB_FAILED) { 410 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 411 cbp->cb_notified_of_cb_path_down = FALSE; 412 } 413 414 cbp->cb_refcnt--; /* no longer using the information */ 415 416 /* 417 * A thread may be waiting on this one to finish and if so, 418 * let it know that it is okay to do the CB_NULL to the 419 * client's callback server. 420 */ 421 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 422 cv_broadcast(cbp->cb_cv_nullcaller); 423 424 /* 425 * If this is the last thread to use the callback info and 426 * there is new callback information to try and no thread is 427 * there ready to do the CB_NULL, then return true to teh 428 * caller so they can do the CB_NULL 429 */ 430 if (cbp->cb_refcnt == 0 && 431 cbp->cb_nullcaller == FALSE && 432 cbp->cb_newer.cb_new == TRUE && 433 cbp->cb_newer.cb_confirmed == TRUE) 434 cb_new = TRUE; 435 436 mutex_exit(cbp->cb_lock); 437 438 return (cb_new); 439 } 440 441 /* 442 * Given the information in the callback info struct, create a client 443 * handle that can be used by the server for its callback path. 444 */ 445 static CLIENT * 446 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 447 { 448 struct knetconfig knc; 449 vnode_t *vp; 450 struct sockaddr_in addr4; 451 struct sockaddr_in6 addr6; 452 void *addr, *taddr; 453 in_port_t *pp; 454 int af; 455 char *devnam; 456 int err = 0; 457 struct netbuf nb; 458 int size; 459 CLIENT *ch = NULL; 460 int useresvport = 0; 461 462 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 463 "rfs4_cbch_init: entry cbp->%p\n", (void *)cbp)); 464 465 mutex_enter(cbp->cb_lock); 466 467 if (cbp->cb_callback.cb_location.r_netid == NULL || 468 cbp->cb_callback.cb_location.r_addr == NULL) { 469 goto cb_init_out; 470 } 471 472 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 473 knc.knc_semantics = NC_TPI_COTS; 474 knc.knc_protofmly = "inet"; 475 knc.knc_proto = "tcp"; 476 devnam = "/dev/tcp"; 477 af = AF_INET; 478 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 479 == 0) { 480 knc.knc_semantics = NC_TPI_CLTS; 481 knc.knc_protofmly = "inet"; 482 knc.knc_proto = "udp"; 483 devnam = "/dev/udp"; 484 af = AF_INET; 485 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 486 == 0) { 487 knc.knc_semantics = NC_TPI_COTS; 488 knc.knc_protofmly = "inet6"; 489 knc.knc_proto = "tcp"; 490 devnam = "/dev/tcp6"; 491 af = AF_INET6; 492 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 493 == 0) { 494 knc.knc_semantics = NC_TPI_CLTS; 495 knc.knc_protofmly = "inet6"; 496 knc.knc_proto = "udp"; 497 devnam = "/dev/udp6"; 498 af = AF_INET6; 499 } else { 500 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 501 "rfs4_cbch_init: unknown transport %s\n", 502 cbp->cb_callback.cb_location.r_netid)); 503 504 goto cb_init_out; 505 } 506 507 if ((err = lookupname(devnam, UIO_SYSSPACE, FOLLOW, 508 NULLVPP, &vp)) != 0) { 509 510 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 511 "rfs4_cbch_init: lookupname failed %d\n", err)); 512 513 goto cb_init_out; 514 } 515 516 if (vp->v_type != VCHR) { 517 518 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 519 "rfs4_cbch_init: %s is not VCHR", devnam)); 520 VN_RELE(vp); 521 goto cb_init_out; 522 } 523 524 knc.knc_rdev = vp->v_rdev; 525 526 VN_RELE(vp); 527 528 if (af == AF_INET) { 529 size = sizeof (addr4); 530 bzero(&addr4, size); 531 addr4.sin_family = (sa_family_t)af; 532 addr = &addr4.sin_addr; 533 pp = &addr4.sin_port; 534 taddr = &addr4; 535 } else /* AF_INET6 */ { 536 size = sizeof (addr6); 537 bzero(&addr6, size); 538 addr6.sin6_family = (sa_family_t)af; 539 addr = &addr6.sin6_addr; 540 pp = &addr6.sin6_port; 541 taddr = &addr6; 542 } 543 544 if (uaddr2sockaddr(af, 545 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 546 547 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 548 "rfs4_cbch_init: malformed universal addr: %s\n", 549 cbp->cb_callback.cb_location.r_addr)); 550 551 goto cb_init_out; 552 } 553 554 555 nb.maxlen = nb.len = size; 556 nb.buf = (char *)taddr; 557 558 if (err = clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 559 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 560 561 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 562 "rfs4_cbch_init: clnt_tli_kcreate failed %d\n", err)); 563 ch = NULL; 564 } 565 566 /* turn off reserved port usage */ 567 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 568 569 cb_init_out: 570 mutex_exit(cbp->cb_lock); 571 return (ch); 572 } 573 574 /* 575 * Iterate over the client handle cache and 576 * destroy it. 577 */ 578 static void 579 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 580 { 581 CLIENT *ch; 582 583 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 584 "rfs4_cb_flush: enter cbp->%p, cb_chc_free=%d\n", 585 (void *)cbp, cbp->cb_chc_free)); 586 587 while (cbp->cb_chc_free) { 588 cbp->cb_chc_free--; 589 ch = cbp->cb_chc[cbp->cb_chc_free]; 590 cbp->cb_chc[cbp->cb_chc_free] = NULL; 591 if (ch) { 592 if (ch->cl_auth) 593 auth_destroy(ch->cl_auth); 594 clnt_destroy(ch); 595 } 596 } 597 } 598 599 /* 600 * Return a client handle, either from a the small 601 * rfs4_client_t cache or one that we just created. 602 */ 603 static CLIENT * 604 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 605 { 606 CLIENT *cbch = NULL; 607 uint32_t zilch = 0; 608 609 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 610 "rfs4_cb_getch: enter cbp->%p, cb_chc_free=%d\n", 611 (void *)cbp, cbp->cb_chc_free)); 612 613 mutex_enter(cbp->cb_lock); 614 615 if (cbp->cb_chc_free) { 616 cbp->cb_chc_free--; 617 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 618 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 619 "rfs4_cb_getch: cb_chc_free=%d ch->%p\n", 620 cbp->cb_chc_free, (void *)cbch)); 621 mutex_exit(cbp->cb_lock); 622 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 623 return (cbch); 624 } 625 626 mutex_exit(cbp->cb_lock); 627 628 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 629 "rfs4_cb_getch: calling rfs4_cbch_init\n")); 630 631 /* none free so make it now */ 632 cbch = rfs4_cbch_init(cbp); 633 634 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 635 "rfs4_cb_getch: returning cbch->%p\n", (void *)cbch)); 636 637 return (cbch); 638 } 639 640 /* 641 * Return the client handle to the small cache or 642 * destroy it. 643 */ 644 static void 645 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 646 { 647 if (lockheld == FALSE) 648 mutex_enter(cbp->cb_lock); 649 650 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 651 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 652 if (lockheld == FALSE) 653 mutex_exit(cbp->cb_lock); 654 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 655 "rfs4_cb_freech: caching cbp->%p, ch->%p, cb_chc_free=%d\n", 656 (void *)cbp, (void *)ch, cbp->cb_chc_free)); 657 return; 658 } 659 if (lockheld == FALSE) 660 mutex_exit(cbp->cb_lock); 661 662 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 663 "rfs4_cb_freech: destroying cbp->%p, ch->%p, cb_chc_free=%d\n", 664 (void *)cbp, (void *)ch, cbp->cb_chc_free)); 665 666 /* 667 * cache maxed out of free entries, obliterate 668 * this client handle, destroy it, throw it away. 669 */ 670 if (ch->cl_auth) 671 auth_destroy(ch->cl_auth); 672 clnt_destroy(ch); 673 } 674 675 /* 676 * With the supplied callback information - initialize the client 677 * callback data. If there is a callback in progress, save the 678 * callback info so that a thread can pick it up in the future. 679 */ 680 void 681 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 682 { 683 char *addr = NULL; 684 char *netid = NULL; 685 rfs4_cbinfo_t *cbp = &cp->cbinfo; 686 size_t len; 687 688 /* Set the call back for the client */ 689 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 690 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 691 len = strlen(cb->cb_location.r_addr) + 1; 692 addr = kmem_alloc(len, KM_SLEEP); 693 bcopy(cb->cb_location.r_addr, addr, len); 694 len = strlen(cb->cb_location.r_netid) + 1; 695 netid = kmem_alloc(len, KM_SLEEP); 696 bcopy(cb->cb_location.r_netid, netid, len); 697 } 698 /* ready to save the new information but first free old, if exists */ 699 mutex_enter(cbp->cb_lock); 700 701 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 702 703 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 704 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 705 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 706 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 707 708 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 709 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 710 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 711 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 712 713 cbp->cb_newer.cb_ident = cb_ident; 714 715 if (addr && *addr && netid && *netid) { 716 cbp->cb_newer.cb_new = TRUE; 717 cbp->cb_newer.cb_confirmed = FALSE; 718 } else { 719 cbp->cb_newer.cb_new = FALSE; 720 cbp->cb_newer.cb_confirmed = FALSE; 721 } 722 723 mutex_exit(cbp->cb_lock); 724 } 725 726 /* 727 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 728 * information may have been provided on SETCLIENTID and this call 729 * marks that information as confirmed and then starts a thread to 730 * test the callback path. 731 */ 732 void 733 rfs4_deleg_cb_check(rfs4_client_t *cp) 734 { 735 if (cp->cbinfo.cb_newer.cb_new == FALSE) 736 return; 737 738 cp->cbinfo.cb_newer.cb_confirmed = TRUE; 739 740 rfs4_dbe_hold(cp->dbe); /* hold the client struct for thread */ 741 742 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 743 minclsyspri); 744 } 745 746 static void 747 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 748 { 749 CB_RECALL4args *rec_argp; 750 751 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 752 if (rec_argp->fh.nfs_fh4_val) 753 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 754 } 755 756 /* ARGSUSED */ 757 static void 758 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 759 { 760 CB_GETATTR4args *argp; 761 762 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 763 if (argp->fh.nfs_fh4_val) 764 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 765 } 766 767 static void 768 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 769 { 770 int i, arglen; 771 nfs_cb_argop4 *argop; 772 773 /* 774 * First free any special args alloc'd for specific ops. 775 */ 776 arglen = args->array_len; 777 argop = args->array; 778 for (i = 0; i < arglen; i++, argop++) { 779 780 switch (argop->argop) { 781 case OP_CB_RECALL: 782 rfs4args_cb_recall_free(argop); 783 break; 784 785 case OP_CB_GETATTR: 786 rfs4args_cb_getattr_free(argop); 787 break; 788 789 default: 790 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, 791 "rfs4freeargres: unknown op")); 792 return; 793 } 794 } 795 796 if (args->tag.utf8string_len > 0) 797 UTF8STRING_FREE(args->tag) 798 799 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 800 if (resp) 801 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 802 } 803 804 /* 805 * General callback routine for the server to the client. 806 */ 807 static enum clnt_stat 808 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 809 CB_COMPOUND4res *res, struct timeval timeout) 810 { 811 rfs4_cbinfo_t *cbp; 812 CLIENT *ch; 813 /* start with this in case cb_getch() fails */ 814 enum clnt_stat stat = RPC_FAILED; 815 816 res->tag.utf8string_val = NULL; 817 res->array = NULL; 818 819 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 820 "rfs4_do_callback: enter cp->%p\n", (void *)cp)); 821 822 retry: 823 cbp = rfs4_cbinfo_hold(cp); 824 if (cbp == NULL) 825 return (stat); 826 827 /* get a client handle */ 828 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 829 /* 830 * reset the cb_ident since it may have changed in 831 * rfs4_cbinfo_hold() 832 */ 833 args->callback_ident = cbp->cb_ident; 834 835 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 836 (caddr_t)args, xdr_CB_COMPOUND4res, 837 (caddr_t)res, timeout); 838 839 /* free client handle */ 840 rfs4_cb_freech(cbp, ch, FALSE); 841 } 842 843 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 844 "rfs4_do_callback: exit with RPC status %d, %s", 845 stat, clnt_sperrno(stat))); 846 847 /* 848 * If the rele says that there may be new callback info then 849 * retry this sequence and it may succeed as a result of the 850 * new callback path 851 */ 852 if (rfs4_cbinfo_rele(cbp, 853 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 854 goto retry; 855 856 return (stat); 857 } 858 859 /* 860 * Used by the NFSv4 server to get attributes for a file while 861 * handling the case where a file has been write delegated. For the 862 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 863 * not undertaken. This call site is maintained in case the server is 864 * updated in the future to handle write delegation space guarantees. 865 */ 866 nfsstat4 867 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 868 { 869 uint_t mask; 870 int error; 871 872 mask = vap->va_mask; 873 error = VOP_GETATTR(vp, vap, flag, cr); 874 /* 875 * Some file systems clobber va_mask. it is probably wrong of 876 * them to do so, nonethless we practice defensive coding. 877 * See bug id 4276830. 878 */ 879 vap->va_mask = mask; 880 return (puterrno4(error)); 881 } 882 883 /* 884 * This is used everywhere in the v2/v3 server to allow the 885 * integration of all NFS versions and the support of delegation. For 886 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 887 * in the future to provide space guarantees for write delegations 888 * then this call site should be expanded to interact with the client. 889 */ 890 int 891 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 892 { 893 return (VOP_GETATTR(vp, vap, flag, cr)); 894 } 895 896 /* 897 * Place the actual cb_recall otw call to client. 898 */ 899 static void 900 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 901 { 902 CB_COMPOUND4args cb4_args; 903 CB_COMPOUND4res cb4_res; 904 CB_RECALL4args *rec_argp; 905 nfs_cb_argop4 *argop; 906 int numops; 907 int argoplist_size; 908 struct timeval timeout; 909 nfs_fh4 *fhp; 910 enum clnt_stat call_stat; 911 912 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, "rfs4_do_cb_recall: enter")); 913 /* 914 * set up the compound args 915 */ 916 numops = 1; /* CB_RECALL only */ 917 918 argoplist_size = numops * sizeof (nfs_cb_argop4); 919 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 920 argop->argop = OP_CB_RECALL; 921 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 922 923 (void) str_to_utf8("cb_recall", &cb4_args.tag); 924 cb4_args.minorversion = CB4_MINORVERSION; 925 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 926 cb4_args.array_len = numops; 927 cb4_args.array = argop; 928 929 /* 930 * fill in the args struct 931 */ 932 bcopy(&dsp->delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 933 rec_argp->truncate = trunc; 934 935 fhp = &dsp->finfo->filehandle; 936 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 937 fhp->nfs_fh4_len, KM_SLEEP); 938 nfs_fh4_copy(fhp, &rec_argp->fh); 939 940 /* Keep track of when we did this for observability */ 941 dsp->time_recalled = gethrestime_sec(); 942 943 /* 944 * Set up the timeout for the callback and make the actual call. 945 * Timeout will be 80% of the lease period for this server. 946 */ 947 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 948 timeout.tv_usec = 0; 949 950 call_stat = rfs4_do_callback(dsp->client, &cb4_args, 951 &cb4_res, timeout); 952 953 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 954 rfs4_revoke_deleg(dsp); 955 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, 956 "rfs4_do_cb_recall: rpcstat=%d cbstat=%d ", 957 call_stat, cb4_res.status)); 958 } 959 960 rfs4freeargres(&cb4_args, &cb4_res); 961 } 962 963 struct recall_arg { 964 rfs4_deleg_state_t *dsp; 965 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 966 bool_t trunc; 967 }; 968 969 static void 970 do_recall(struct recall_arg *arg) 971 { 972 rfs4_deleg_state_t *dsp = arg->dsp; 973 rfs4_file_t *fp = dsp->finfo; 974 callb_cpr_t cpr_info; 975 kmutex_t cpr_lock; 976 977 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 978 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 979 980 /* 981 * It is possible that before this thread starts 982 * the client has send us a return_delegation, and 983 * if that is the case we do not need to send the 984 * recall callback. 985 */ 986 if (dsp->dtype != OPEN_DELEGATE_NONE) { 987 NFS4_DEBUG(rfs4_deleg_debug, 988 (CE_NOTE, "recall = %p, state = %p, fp = %p trunc = %d", 989 (void*)arg->recall, (void*)dsp, (void*)fp, arg->trunc)); 990 991 if (arg->recall) 992 (void) (*arg->recall)(dsp, arg->trunc); 993 } 994 995 mutex_enter(fp->dinfo->recall_lock); 996 /* 997 * Recall count may go negative if the parent thread that is 998 * creating the individual callback threads does not modify 999 * the recall_count field before the callback thread actually 1000 * gets a response from the CB_RECALL 1001 */ 1002 fp->dinfo->recall_count--; 1003 if (fp->dinfo->recall_count == 0) 1004 cv_signal(fp->dinfo->recall_cv); 1005 mutex_exit(fp->dinfo->recall_lock); 1006 1007 mutex_enter(&cpr_lock); 1008 CALLB_CPR_EXIT(&cpr_info); 1009 mutex_destroy(&cpr_lock); 1010 1011 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 1012 1013 kmem_free(arg, sizeof (struct recall_arg)); 1014 } 1015 1016 struct master_recall_args { 1017 rfs4_file_t *fp; 1018 void (*recall)(rfs4_deleg_state_t *, bool_t); 1019 bool_t trunc; 1020 }; 1021 1022 static void 1023 do_recall_file(struct master_recall_args *map) 1024 { 1025 rfs4_file_t *fp = map->fp; 1026 rfs4_deleg_state_t *dsp; 1027 struct recall_arg *arg; 1028 callb_cpr_t cpr_info; 1029 kmutex_t cpr_lock; 1030 int32_t recall_count; 1031 1032 rfs4_dbe_lock(fp->dbe); 1033 /* Recall already in progress */ 1034 if (fp->dinfo->recall_count != 0) { 1035 rfs4_dbe_rele_nolock(fp->dbe); 1036 rfs4_dbe_unlock(fp->dbe); 1037 kmem_free(map, sizeof (struct master_recall_args)); 1038 return; 1039 } 1040 1041 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1042 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 1043 "nfsv4RecallFile"); 1044 1045 recall_count = 0; 1046 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1047 dsp = dsp->delegationlist.next->dsp) { 1048 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1049 arg->recall = map->recall; 1050 arg->trunc = map->trunc; 1051 1052 rfs4_dbe_hold(dsp->dbe); /* hold for receiving thread */ 1053 1054 arg->dsp = dsp; 1055 1056 recall_count++; 1057 1058 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1059 minclsyspri); 1060 } 1061 rfs4_dbe_unlock(fp->dbe); 1062 1063 mutex_enter(fp->dinfo->recall_lock); 1064 /* 1065 * Recall count may go negative if the parent thread that is 1066 * creating the individual callback threads does not modify 1067 * the recall_count field before the callback thread actually 1068 * gets a response from the CB_RECALL 1069 */ 1070 fp->dinfo->recall_count += recall_count; 1071 while (fp->dinfo->recall_count) 1072 cv_wait(fp->dinfo->recall_cv, fp->dinfo->recall_lock); 1073 1074 mutex_exit(fp->dinfo->recall_lock); 1075 1076 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, "Recall complete for %p", 1077 (void*)fp)); 1078 rfs4_file_rele(fp); 1079 kmem_free(map, sizeof (struct master_recall_args)); 1080 mutex_enter(&cpr_lock); 1081 CALLB_CPR_EXIT(&cpr_info); 1082 mutex_destroy(&cpr_lock); 1083 } 1084 1085 static void 1086 rfs4_recall_file(rfs4_file_t *fp, 1087 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1088 bool_t trunc, rfs4_client_t *cp) 1089 { 1090 struct master_recall_args *args; 1091 1092 rfs4_dbe_lock(fp->dbe); 1093 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1094 rfs4_dbe_unlock(fp->dbe); 1095 return; 1096 } 1097 rfs4_dbe_hold(fp->dbe); /* hold for new thread */ 1098 1099 /* 1100 * Mark the time we started the recall processing. 1101 * If it has been previously recalled, do not reset the 1102 * timer since this is used for the revocation decision. 1103 */ 1104 if (fp->dinfo->time_recalled == 0) 1105 fp->dinfo->time_recalled = gethrestime_sec(); 1106 fp->dinfo->ever_recalled = TRUE; /* used for policy decision */ 1107 /* Client causing recall not always available */ 1108 if (cp) 1109 fp->dinfo->conflicted_client = cp->clientid; 1110 1111 rfs4_dbe_unlock(fp->dbe); 1112 1113 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1114 args->fp = fp; 1115 args->recall = recall; 1116 args->trunc = trunc; 1117 1118 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1119 minclsyspri); 1120 } 1121 1122 void 1123 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1124 { 1125 time_t elapsed1, elapsed2; 1126 1127 if (fp->dinfo->time_recalled != 0) { 1128 elapsed1 = gethrestime_sec() - fp->dinfo->time_recalled; 1129 elapsed2 = gethrestime_sec() - fp->dinfo->time_lastwrite; 1130 /* First check to see if a revocation should occur */ 1131 if (elapsed1 > rfs4_lease_time && 1132 elapsed2 > rfs4_lease_time) { 1133 rfs4_revoke_file(fp); 1134 return; 1135 } 1136 /* 1137 * Next check to see if a recall should be done again 1138 * so quickly. 1139 */ 1140 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1141 return; 1142 } 1143 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1144 } 1145 1146 /* 1147 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1148 * open conflicts with the delegation. 1149 * Return true if we need recall otherwise false. 1150 * Assumes entry locks for sp and sp->finfo are held. 1151 */ 1152 bool_t 1153 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1154 { 1155 open_delegation_type4 dtype = sp->finfo->dinfo->dtype; 1156 1157 switch (dtype) { 1158 case OPEN_DELEGATE_NONE: 1159 /* Not currently delegated so there is nothing to do */ 1160 return (FALSE); 1161 case OPEN_DELEGATE_READ: 1162 /* 1163 * If the access is only asking for READ then there is 1164 * no conflict and nothing to do. If it is asking 1165 * for write, then there will be conflict and the read 1166 * delegation should be recalled. 1167 */ 1168 if (access == OPEN4_SHARE_ACCESS_READ) 1169 return (FALSE); 1170 else 1171 return (TRUE); 1172 case OPEN_DELEGATE_WRITE: 1173 /* Check to see if this client has the delegation */ 1174 return (rfs4_is_deleg(sp)); 1175 } 1176 1177 return (FALSE); 1178 } 1179 1180 /* 1181 * Return the "best" allowable delegation available given the current 1182 * delegation type and the desired access and deny modes on the file. 1183 * At the point that this routine is called we know that the access and 1184 * deny modes are consistent with the file modes. 1185 */ 1186 static open_delegation_type4 1187 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1188 { 1189 open_delegation_type4 dtype = fp->dinfo->dtype; 1190 uint32_t access = sp->share_access; 1191 uint32_t deny = sp->share_deny; 1192 int readcnt = 0; 1193 int writecnt = 0; 1194 1195 switch (dtype) { 1196 case OPEN_DELEGATE_NONE: 1197 /* 1198 * Determine if more than just this OPEN have the file 1199 * open and if so, no delegation may be provided to 1200 * the client. 1201 */ 1202 if (access & OPEN4_SHARE_ACCESS_WRITE) 1203 writecnt++; 1204 if (access & OPEN4_SHARE_ACCESS_READ) 1205 readcnt++; 1206 1207 if (fp->access_read > readcnt || fp->access_write > writecnt) 1208 return (OPEN_DELEGATE_NONE); 1209 1210 /* 1211 * If the client is going to write, or if the client 1212 * has exclusive access, return a write delegation. 1213 */ 1214 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1215 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1216 return (OPEN_DELEGATE_WRITE); 1217 /* 1218 * If we don't want to write or we've haven't denied read 1219 * access to others, return a read delegation. 1220 */ 1221 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1222 (deny & ~OPEN4_SHARE_DENY_READ)) 1223 return (OPEN_DELEGATE_READ); 1224 1225 /* Shouldn't get here */ 1226 return (OPEN_DELEGATE_NONE); 1227 1228 case OPEN_DELEGATE_READ: 1229 /* 1230 * If the file is delegated for read but we wan't to 1231 * write or deny others to read then we can't delegate 1232 * the file. We shouldn't get here since the delegation should 1233 * have been recalled already. 1234 */ 1235 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1236 (deny & OPEN4_SHARE_DENY_READ)) 1237 return (OPEN_DELEGATE_NONE); 1238 return (OPEN_DELEGATE_READ); 1239 1240 case OPEN_DELEGATE_WRITE: 1241 return (OPEN_DELEGATE_WRITE); 1242 } 1243 1244 /* Shouldn't get here */ 1245 return (OPEN_DELEGATE_NONE); 1246 } 1247 1248 /* 1249 * Given the desired delegation type and the "history" of the file 1250 * determine the actual delegation type to return. 1251 */ 1252 static open_delegation_type4 1253 rfs4_delegation_policy(open_delegation_type4 dtype, 1254 rfs4_dinfo_t *dinfo, clientid4 cid) 1255 { 1256 time_t elapsed; 1257 1258 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1259 return (OPEN_DELEGATE_NONE); 1260 1261 /* 1262 * Has this file/delegation ever been recalled? If not then 1263 * no furhter checks for a delegation race need to be done. 1264 * However if a recall has occurred, then check to see if a 1265 * client has caused its own delegation recall to occur. If 1266 * not, then has a delegation for this file been returned 1267 * recently? If so, then do not assign a new delegation to 1268 * avoid a "delegation race" between the original client and 1269 * the new/conflicting client. 1270 */ 1271 if (dinfo->ever_recalled == TRUE) { 1272 if (dinfo->conflicted_client != cid) { 1273 elapsed = gethrestime_sec() - dinfo->time_returned; 1274 if (elapsed < rfs4_lease_time) 1275 return (OPEN_DELEGATE_NONE); 1276 } 1277 } 1278 1279 /* Limit the number of read grants */ 1280 if (dtype == OPEN_DELEGATE_READ && 1281 dinfo->rdgrants > MAX_READ_DELEGATIONS) 1282 return (OPEN_DELEGATE_NONE); 1283 1284 /* 1285 * Should consider limiting total number of read/write 1286 * delegations the server will permit. 1287 */ 1288 1289 return (dtype); 1290 } 1291 1292 /* 1293 * Try and grant a delegation for an open give the state. The routine 1294 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1295 * 1296 * The state and associate file entry must be locked 1297 */ 1298 rfs4_deleg_state_t * 1299 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1300 { 1301 rfs4_file_t *fp = sp->finfo; 1302 open_delegation_type4 dtype; 1303 int no_delegation; 1304 1305 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1306 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1307 1308 /* Is the server even providing delegations? */ 1309 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1310 return (NULL); 1311 1312 /* Check to see if delegations have been temporarily disabled */ 1313 mutex_enter(&rfs4_deleg_lock); 1314 no_delegation = rfs4_deleg_disabled; 1315 mutex_exit(&rfs4_deleg_lock); 1316 1317 if (no_delegation) 1318 return (NULL); 1319 1320 /* Don't grant a delegation if a deletion is impending. */ 1321 if (fp->dinfo->hold_grant > 0) { 1322 NFS4_DEBUG(rfs4_deleg_debug, 1323 (CE_NOTE, "rfs4_grant_delegation: hold_grant is set")); 1324 return (NULL); 1325 } 1326 1327 /* 1328 * Don't grant a delegation if there are any lock manager 1329 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1330 * if there are only read locks we should be able to grant a 1331 * read-only delegation), but it's good enough for now. 1332 * 1333 * MT safety: the lock manager checks for conflicting delegations 1334 * before processing a lock request. That check will block until 1335 * we are done here. So if the lock manager acquires a lock after 1336 * we decide to grant the delegation, the delegation will get 1337 * immediately recalled (if there's a conflict), so we're safe. 1338 */ 1339 if (lm_vp_active(fp->vp)) { 1340 NFS4_DEBUG(rfs4_deleg_debug, 1341 (CE_NOTE, "rfs4_grant_delegation: NLM lock")); 1342 return (NULL); 1343 } 1344 1345 /* 1346 * Based on the type of delegation request passed in, take the 1347 * appropriate action (DELEG_NONE is handled above) 1348 */ 1349 switch (dreq) { 1350 1351 case DELEG_READ: 1352 case DELEG_WRITE: 1353 /* 1354 * The server "must" grant the delegation in this case. 1355 * Client is using open previous 1356 */ 1357 dtype = (open_delegation_type4)dreq; 1358 *recall = 1; 1359 break; 1360 case DELEG_ANY: 1361 /* 1362 * If a valid callback path does not exist, no delegation may 1363 * be granted. 1364 */ 1365 if (sp->owner->client->cbinfo.cb_state != CB_OK) 1366 return (NULL); 1367 1368 /* 1369 * If the original operation which caused time_rm_delayed 1370 * to be set hasn't been retried and completed for one 1371 * full lease period, clear it and allow delegations to 1372 * get granted again. 1373 */ 1374 if (fp->dinfo->time_rm_delayed > 0 && 1375 gethrestime_sec() > 1376 fp->dinfo->time_rm_delayed + rfs4_lease_time) 1377 fp->dinfo->time_rm_delayed = 0; 1378 1379 /* 1380 * If we are waiting for a delegation to be returned then 1381 * don't delegate this file. We do this for correctness as 1382 * well as if the file is being recalled we would likely 1383 * recall this file again. 1384 */ 1385 1386 if (fp->dinfo->time_recalled != 0 || 1387 fp->dinfo->time_rm_delayed != 0) 1388 return (NULL); 1389 1390 /* Get the "best" delegation candidate */ 1391 dtype = rfs4_check_delegation(sp, fp); 1392 1393 if (dtype == OPEN_DELEGATE_NONE) 1394 return (NULL); 1395 1396 /* 1397 * Based on policy and the history of the file get the 1398 * actual delegation. 1399 */ 1400 dtype = rfs4_delegation_policy(dtype, fp->dinfo, 1401 sp->owner->client->clientid); 1402 1403 NFS4_DEBUG(rfs4_deleg_debug, 1404 (CE_NOTE, "Grant policy dtype = %d", dtype)); 1405 if (dtype == OPEN_DELEGATE_NONE) 1406 return (NULL); 1407 break; 1408 default: 1409 return (NULL); 1410 } 1411 1412 /* set the delegation for the state */ 1413 return (rfs4_deleg_state(sp, dtype, recall)); 1414 } 1415 1416 void 1417 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1418 nfsace4 *ace, int recall) 1419 { 1420 open_write_delegation4 *wp; 1421 open_read_delegation4 *rp; 1422 nfs_space_limit4 *spl; 1423 nfsace4 nace; 1424 1425 /* 1426 * We need to allocate a new copy of the who string. 1427 * this string will be freed by the rfs4_op_open dis_resfree 1428 * routine. We need to do this allocation since replays will 1429 * be allocated and rfs4_compound can't tell the difference from 1430 * a replay and an inital open. N.B. if an ace is passed in, it 1431 * the caller's responsibility to free it. 1432 */ 1433 1434 if (ace == NULL) { 1435 /* 1436 * Default is to deny all access, the client will have 1437 * to contact the server. XXX Do we want to actually 1438 * set a deny for every one, or do we simply want to 1439 * construct an entity that will match no one? 1440 */ 1441 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1442 nace.flag = 0; 1443 nace.access_mask = ACE4_VALID_MASK_BITS; 1444 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1445 } else { 1446 nace.type = ace->type; 1447 nace.flag = ace->flag; 1448 nace.access_mask = ace->access_mask; 1449 (void) utf8_copy(&ace->who, &nace.who); 1450 } 1451 1452 dp->delegation_type = dsp->dtype; 1453 1454 switch (dsp->dtype) { 1455 case OPEN_DELEGATE_NONE: 1456 break; 1457 case OPEN_DELEGATE_READ: 1458 rp = &dp->open_delegation4_u.read; 1459 rp->stateid = dsp->delegid.stateid; 1460 rp->recall = (bool_t)recall; 1461 rp->permissions = nace; 1462 break; 1463 case OPEN_DELEGATE_WRITE: 1464 wp = &dp->open_delegation4_u.write; 1465 wp->stateid = dsp->delegid.stateid; 1466 wp->recall = (bool_t)recall; 1467 spl = &wp->space_limit; 1468 spl->limitby = NFS_LIMIT_SIZE; 1469 spl->nfs_space_limit4_u.filesize = 0; 1470 wp->permissions = nace; 1471 break; 1472 } 1473 } 1474 1475 /* 1476 * Check if the file is delegated via the provided file struct. 1477 * Return TRUE if it is delegated. This is intended for use by 1478 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1479 * 1480 * Note that if the file is found to have a delegation, it is 1481 * recalled, unless the clientid of the caller matches the clientid of the 1482 * delegation. If the caller has specified, there is a slight delay 1483 * inserted in the hopes that the delegation will be returned quickly. 1484 */ 1485 bool_t 1486 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1487 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1488 { 1489 rfs4_deleg_state_t *dsp; 1490 1491 /* Is delegation enabled? */ 1492 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1493 return (FALSE); 1494 1495 /* do we have a delegation on this file? */ 1496 rfs4_dbe_lock(fp->dbe); 1497 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1498 if (is_rm) 1499 fp->dinfo->hold_grant++; 1500 rfs4_dbe_unlock(fp->dbe); 1501 return (FALSE); 1502 } 1503 /* 1504 * do we have a write delegation on this file or are we 1505 * requesting write access to a file with any type of existing 1506 * delegation? 1507 */ 1508 if (mode == FWRITE || fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 1509 if (cp != NULL) { 1510 dsp = fp->delegationlist.next->dsp; 1511 if (dsp == NULL) { 1512 rfs4_dbe_unlock(fp->dbe); 1513 return (FALSE); 1514 } 1515 /* 1516 * Does the requestor already own the delegation? 1517 */ 1518 if (dsp->client->clientid == *(cp)) { 1519 rfs4_dbe_unlock(fp->dbe); 1520 return (FALSE); 1521 } 1522 } 1523 1524 rfs4_dbe_unlock(fp->dbe); 1525 rfs4_recall_deleg(fp, trunc, NULL); 1526 1527 if (!do_delay) { 1528 rfs4_dbe_lock(fp->dbe); 1529 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1530 rfs4_dbe_unlock(fp->dbe); 1531 return (TRUE); 1532 } 1533 1534 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1535 1536 rfs4_dbe_lock(fp->dbe); 1537 if (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { 1538 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1539 rfs4_dbe_unlock(fp->dbe); 1540 return (TRUE); 1541 } 1542 } 1543 if (is_rm) 1544 fp->dinfo->hold_grant++; 1545 rfs4_dbe_unlock(fp->dbe); 1546 return (FALSE); 1547 } 1548 1549 /* 1550 * Check if the file is delegated in the case of a v2 or v3 access. 1551 * Return TRUE if it is delegated which in turn means that v2 should 1552 * drop the request and in the case of v3 JUKEBOX should be returned. 1553 */ 1554 bool_t 1555 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1556 { 1557 rfs4_file_t *fp; 1558 bool_t create = FALSE; 1559 bool_t rc = FALSE; 1560 1561 rfs4_hold_deleg_policy(); 1562 1563 /* Is delegation enabled? */ 1564 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1565 fp = rfs4_findfile(vp, NULL, &create); 1566 if (fp != NULL) { 1567 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1568 TRUE, FALSE, NULL)) { 1569 rc = TRUE; 1570 } 1571 rfs4_file_rele(fp); 1572 } 1573 } 1574 rfs4_rele_deleg_policy(); 1575 return (rc); 1576 } 1577 1578 /* 1579 * Release a hold on the hold_grant counter which 1580 * prevents delegation from being granted while a remove 1581 * or a rename is in progress. 1582 */ 1583 void 1584 rfs4_clear_dont_grant(rfs4_file_t *fp) 1585 { 1586 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1587 return; 1588 rfs4_dbe_lock(fp->dbe); 1589 ASSERT(fp->dinfo->hold_grant > 0); 1590 fp->dinfo->hold_grant--; 1591 fp->dinfo->time_rm_delayed = 0; 1592 rfs4_dbe_unlock(fp->dbe); 1593 } 1594 1595 /* 1596 * State support for delegation. 1597 * Set the state delegation type for this state; 1598 * This routine is called from open via rfs4_grant_delegation and the entry 1599 * locks on sp and sp->finfo are assumed. 1600 */ 1601 static rfs4_deleg_state_t * 1602 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1603 { 1604 rfs4_file_t *fp = sp->finfo; 1605 bool_t create = TRUE; 1606 rfs4_deleg_state_t *dsp; 1607 vnode_t *vp; 1608 int open_prev = *recall; 1609 1610 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1611 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1612 1613 /* Shouldn't happen */ 1614 if (fp->dinfo->recall_count != 0 || 1615 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1616 dtype != OPEN_DELEGATE_READ)) { 1617 return (NULL); 1618 } 1619 1620 /* Unlock to avoid deadlock */ 1621 rfs4_dbe_unlock(fp->dbe); 1622 rfs4_dbe_unlock(sp->dbe); 1623 1624 dsp = rfs4_finddeleg(sp, &create); 1625 1626 rfs4_dbe_lock(sp->dbe); 1627 rfs4_dbe_lock(fp->dbe); 1628 1629 if (dsp == NULL) 1630 return (NULL); 1631 1632 /* 1633 * It is possible that since we dropped the lock 1634 * in order to call finddeleg, the rfs4_file_t 1635 * was marked such that we should not grant a 1636 * delegation, if so bail out. 1637 */ 1638 if (fp->dinfo->hold_grant > 0) { 1639 rfs4_deleg_state_rele(dsp); 1640 return (NULL); 1641 } 1642 1643 if (create == FALSE) { 1644 if (sp->owner->client == dsp->client && 1645 dsp->dtype == dtype) { 1646 return (dsp); 1647 } else { 1648 rfs4_deleg_state_rele(dsp); 1649 return (NULL); 1650 } 1651 } 1652 1653 /* 1654 * Check that this file has not been delegated to another 1655 * client 1656 */ 1657 if (fp->dinfo->recall_count != 0 || 1658 fp->dinfo->dtype == OPEN_DELEGATE_WRITE || 1659 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1660 dtype != OPEN_DELEGATE_READ)) { 1661 rfs4_deleg_state_rele(dsp); 1662 return (NULL); 1663 } 1664 1665 vp = fp->vp; 1666 /* vnevent_support returns 0 if file system supports vnevents */ 1667 if (vnevent_support(vp)) { 1668 rfs4_deleg_state_rele(dsp); 1669 return (NULL); 1670 } 1671 1672 *recall = 0; 1673 if (dtype == OPEN_DELEGATE_READ) { 1674 if (vn_is_opened(vp, V_WRITE) || vn_is_mapped(vp, V_WRITE)) { 1675 if (open_prev) { 1676 *recall = 1; 1677 } else { 1678 rfs4_deleg_state_rele(dsp); 1679 return (NULL); 1680 } 1681 } 1682 (void) fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1683 rfs4_mon_hold, rfs4_mon_rele); 1684 if (vn_is_opened(vp, V_WRITE) || vn_is_mapped(vp, V_WRITE)) { 1685 if (open_prev) { 1686 *recall = 1; 1687 } else { 1688 (void) fem_uninstall(vp, deleg_rdops, 1689 (void *)fp); 1690 rfs4_deleg_state_rele(dsp); 1691 return (NULL); 1692 } 1693 } 1694 } else { /* WRITE */ 1695 if (vn_is_opened(vp, V_RDORWR) || vn_is_mapped(vp, V_RDORWR)) { 1696 if (open_prev) { 1697 *recall = 1; 1698 } else { 1699 rfs4_deleg_state_rele(dsp); 1700 return (NULL); 1701 } 1702 } 1703 (void) fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1704 rfs4_mon_hold, rfs4_mon_rele); 1705 if (vn_is_opened(vp, V_RDORWR) || vn_is_mapped(vp, V_RDORWR)) { 1706 if (open_prev) { 1707 *recall = 1; 1708 } else { 1709 (void) fem_uninstall(vp, deleg_wrops, 1710 (void *)fp); 1711 rfs4_deleg_state_rele(dsp); 1712 return (NULL); 1713 } 1714 } 1715 } 1716 /* Place on delegation list for file */ 1717 insque(&dsp->delegationlist, fp->delegationlist.prev); 1718 1719 dsp->dtype = fp->dinfo->dtype = dtype; 1720 1721 /* Update delegation stats for this file */ 1722 fp->dinfo->time_lastgrant = gethrestime_sec(); 1723 1724 /* reset since this is a new delegation */ 1725 fp->dinfo->conflicted_client = 0; 1726 fp->dinfo->ever_recalled = FALSE; 1727 1728 if (dtype == OPEN_DELEGATE_READ) 1729 fp->dinfo->rdgrants++; 1730 else 1731 fp->dinfo->wrgrants++; 1732 1733 return (dsp); 1734 } 1735 1736 /* 1737 * State routine for the server when a delegation is returned. 1738 */ 1739 void 1740 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1741 { 1742 rfs4_file_t *fp = dsp->finfo; 1743 open_delegation_type4 dtypewas; 1744 1745 rfs4_dbe_lock(fp->dbe); 1746 /* Remove state from recall list */ 1747 1748 remque(&dsp->delegationlist); 1749 dsp->delegationlist.next = dsp->delegationlist.prev = 1750 &dsp->delegationlist; 1751 1752 if (&fp->delegationlist == fp->delegationlist.next) { 1753 dtypewas = fp->dinfo->dtype; 1754 fp->dinfo->dtype = OPEN_DELEGATE_NONE; 1755 rfs4_dbe_cv_broadcast(fp->dbe); 1756 1757 /* if file system was unshared, the vp will be NULL */ 1758 if (fp->vp != NULL) { 1759 if (dtypewas == OPEN_DELEGATE_READ) 1760 (void) fem_uninstall(fp->vp, deleg_rdops, 1761 (void *)fp); 1762 else 1763 (void) fem_uninstall(fp->vp, deleg_wrops, 1764 (void *)fp); 1765 } 1766 } 1767 1768 switch (dsp->dtype) { 1769 case OPEN_DELEGATE_READ: 1770 fp->dinfo->rdgrants--; 1771 break; 1772 case OPEN_DELEGATE_WRITE: 1773 fp->dinfo->wrgrants--; 1774 break; 1775 default: 1776 break; 1777 } 1778 1779 /* used in the policy decision */ 1780 fp->dinfo->time_returned = gethrestime_sec(); 1781 1782 /* 1783 * reset the time_recalled field so future delegations are not 1784 * accidentally revoked 1785 */ 1786 if ((fp->dinfo->rdgrants + fp->dinfo->wrgrants) == 0) 1787 fp->dinfo->time_recalled = 0; 1788 1789 rfs4_dbe_unlock(fp->dbe); 1790 1791 rfs4_dbe_lock(dsp->dbe); 1792 1793 dsp->dtype = OPEN_DELEGATE_NONE; 1794 1795 if (revoked == TRUE) 1796 dsp->time_revoked = gethrestime_sec(); 1797 1798 rfs4_dbe_invalidate(dsp->dbe); 1799 1800 rfs4_dbe_unlock(dsp->dbe); 1801 1802 if (revoked == TRUE) { 1803 rfs4_dbe_lock(dsp->client->dbe); 1804 dsp->client->deleg_revoked++; /* observability */ 1805 rfs4_dbe_unlock(dsp->client->dbe); 1806 } 1807 } 1808 1809 static void 1810 rfs4_revoke_deleg(rfs4_deleg_state_t *dsp) 1811 { 1812 rfs4_return_deleg(dsp, TRUE); 1813 } 1814 1815 static void 1816 rfs4_revoke_file(rfs4_file_t *fp) 1817 { 1818 rfs4_deleg_state_t *dsp; 1819 1820 /* 1821 * The lock for rfs4_file_t must be held when traversing the 1822 * delegation list but that lock needs to be released to call 1823 * rfs4_revoke_deleg() 1824 * This for loop is set up to check the list for being empty, 1825 * and locking the rfs4_file_t struct on init and end 1826 */ 1827 for (rfs4_dbe_lock(fp->dbe); 1828 &fp->delegationlist != fp->delegationlist.next; 1829 rfs4_dbe_lock(fp->dbe)) { 1830 1831 dsp = fp->delegationlist.next->dsp; 1832 rfs4_dbe_hold(dsp->dbe); 1833 rfs4_dbe_unlock(fp->dbe); 1834 rfs4_revoke_deleg(dsp); 1835 rfs4_deleg_state_rele(dsp); 1836 } 1837 rfs4_dbe_unlock(fp->dbe); 1838 } 1839 1840 /* 1841 * A delegation is assumed to be present on the file associated with 1842 * "state". Check to see if the delegation matches is associated with 1843 * the same client as referenced by "state". If it is not, TRUE is 1844 * returned. If the delegation DOES match the client (or no 1845 * delegation is present), return FALSE. 1846 * Assume the state entry and file entry are locked. 1847 */ 1848 bool_t 1849 rfs4_is_deleg(rfs4_state_t *state) 1850 { 1851 rfs4_deleg_state_t *dsp; 1852 rfs4_file_t *fp = state->finfo; 1853 rfs4_client_t *cp = state->owner->client; 1854 1855 NFS4_DEBUG(rfs4_deleg_debug, 1856 (CE_NOTE, "rfs4_is_deleg enter: cp = %p", (void*)cp)); 1857 1858 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1859 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1860 dsp = dsp->delegationlist.next->dsp) { 1861 NFS4_DEBUG(rfs4_deleg_debug, 1862 (CE_NOTE, "rfs4_is_deleg: client = %p", 1863 (void*)dsp->client)); 1864 if (cp != dsp->client) { 1865 NFS4_DEBUG(rfs4_deleg_debug, 1866 (CE_NOTE, "rfs4_is_deleg is true")); 1867 return (TRUE); 1868 } 1869 } 1870 return (FALSE); 1871 } 1872 1873 void 1874 rfs4_disable_delegation(void) 1875 { 1876 mutex_enter(&rfs4_deleg_lock); 1877 rfs4_deleg_disabled++; 1878 mutex_exit(&rfs4_deleg_lock); 1879 } 1880 1881 void 1882 rfs4_enable_delegation(void) 1883 { 1884 mutex_enter(&rfs4_deleg_lock); 1885 ASSERT(rfs4_deleg_disabled > 0); 1886 rfs4_deleg_disabled--; 1887 mutex_exit(&rfs4_deleg_lock); 1888 } 1889 1890 void 1891 rfs4_mon_hold(void *arg) 1892 { 1893 rfs4_file_t *fp = arg; 1894 1895 rfs4_dbe_hold(fp->dbe); 1896 } 1897 1898 void 1899 rfs4_mon_rele(void *arg) 1900 { 1901 rfs4_file_t *fp = arg; 1902 1903 rfs4_dbe_rele_nolock(fp->dbe); 1904 } 1905