1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/systm.h> 30 #include <rpc/auth.h> 31 #include <rpc/clnt.h> 32 #include <nfs/nfs4_kprot.h> 33 #include <nfs/nfs4.h> 34 #include <nfs/lm.h> 35 #include <sys/cmn_err.h> 36 #include <sys/disp.h> 37 38 #include <sys/pathname.h> 39 40 #include <sys/strsubr.h> 41 #include <sys/ddi.h> 42 43 #include <sys/vnode.h> 44 45 #include <inet/common.h> 46 #include <inet/ip.h> 47 #include <inet/ip6.h> 48 49 #define MAX_READ_DELEGATIONS 5 50 51 krwlock_t rfs4_deleg_policy_lock; 52 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 53 static int rfs4_deleg_wlp = 5; 54 kmutex_t rfs4_deleg_lock; 55 static int rfs4_deleg_disabled; 56 57 #ifdef DEBUG 58 59 static int rfs4_test_cbgetattr_fail = 0; 60 int rfs4_cb_null; 61 int rfs4_cb_debug; 62 int rfs4_deleg_debug; 63 64 #endif 65 66 static void rfs4_recall_file(rfs4_file_t *, 67 void (*recall)(rfs4_deleg_state_t *, bool_t), 68 bool_t, rfs4_client_t *); 69 static void rfs4_revoke_deleg(rfs4_deleg_state_t *); 70 static void rfs4_revoke_file(rfs4_file_t *); 71 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 72 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 73 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 75 open_delegation_type4, int *); 76 77 /* 78 * Convert a universal address to an transport specific 79 * address using inet_pton. 80 */ 81 static int 82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 83 { 84 int dots = 0, i, j, len, k; 85 unsigned char c; 86 in_port_t port = 0; 87 88 len = strlen(ua); 89 90 for (i = len-1; i >= 0; i--) { 91 92 if (ua[i] == '.') 93 dots++; 94 95 if (dots == 2) { 96 97 ua[i] = '\0'; 98 /* 99 * We use k to remember were to stick '.' back, since 100 * ua was kmem_allocateded from the pool len+1. 101 */ 102 k = i; 103 if (inet_pton(af, ua, ap) == 1) { 104 105 c = 0; 106 107 for (j = i+1; j < len; j++) { 108 if (ua[j] == '.') { 109 port = c << 8; 110 c = 0; 111 } else if (ua[j] >= '0' && 112 ua[j] <= '9') { 113 c *= 10; 114 c += ua[j] - '0'; 115 } else { 116 ua[k] = '.'; 117 return (EINVAL); 118 } 119 } 120 port += c; 121 122 123 /* reset to network order */ 124 if (af == AF_INET) { 125 *(uint32_t *)ap = 126 htonl(*(uint32_t *)ap); 127 *pp = htons(port); 128 } else { 129 int ix; 130 uint16_t *sap; 131 132 for (sap = ap, ix = 0; ix < 133 sizeof (struct in6_addr) / 134 sizeof (uint16_t); ix++) 135 sap[ix] = htons(sap[ix]); 136 137 *pp = htons(port); 138 } 139 140 ua[k] = '.'; 141 return (0); 142 } else { 143 ua[k] = '.'; 144 return (EINVAL); 145 } 146 } 147 } 148 149 return (EINVAL); 150 } 151 152 /* 153 * Update the delegation policy with the 154 * value of "new_policy" 155 */ 156 void 157 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 158 { 159 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 160 rfs4_deleg_policy = new_policy; 161 rw_exit(&rfs4_deleg_policy_lock); 162 } 163 164 void 165 rfs4_hold_deleg_policy(void) 166 { 167 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 168 } 169 170 void 171 rfs4_rele_deleg_policy(void) 172 { 173 rw_exit(&rfs4_deleg_policy_lock); 174 } 175 176 177 /* 178 * This free function is to be used when the client struct is being 179 * released and nothing at all is needed of the callback info any 180 * longer. 181 */ 182 void 183 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 184 { 185 char *addr = cbp->cb_callback.cb_location.r_addr; 186 char *netid = cbp->cb_callback.cb_location.r_netid; 187 188 /* Free old address if any */ 189 190 if (addr) 191 kmem_free(addr, strlen(addr) + 1); 192 if (netid) 193 kmem_free(netid, strlen(netid) + 1); 194 195 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 196 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 197 198 if (addr) 199 kmem_free(addr, strlen(addr) + 1); 200 if (netid) 201 kmem_free(netid, strlen(netid) + 1); 202 203 if (cbp->cb_chc_free) { 204 rfs4_cb_chflush(cbp); 205 } 206 } 207 208 /* 209 * The server uses this to check the callback path supplied by the 210 * client. The callback connection is marked "in progress" while this 211 * work is going on and then eventually marked either OK or FAILED. 212 * This work can be done as part of a separate thread and at the end 213 * of this the thread will exit or it may be done such that the caller 214 * will continue with other work. 215 */ 216 static void 217 rfs4_do_cb_null(rfs4_client_t *cp) 218 { 219 struct timeval tv; 220 CLIENT *ch; 221 rfs4_cbstate_t newstate; 222 rfs4_cbinfo_t *cbp = &cp->cbinfo; 223 224 if (cp == NULL) { 225 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 226 "rfs4_do_cb_null: no rfs4_client specified\n")); 227 return; 228 } 229 230 mutex_enter(cbp->cb_lock); 231 /* If another thread is doing CB_NULL RPC then return */ 232 if (cbp->cb_nullcaller == TRUE) { 233 mutex_exit(cbp->cb_lock); 234 rfs4_client_rele(cp); 235 return; 236 } 237 238 /* Mark the cbinfo as having a thread in the NULL callback */ 239 cbp->cb_nullcaller = TRUE; 240 241 /* 242 * Are there other threads still using the cbinfo client 243 * handles? If so, this thread must wait before going and 244 * mucking aroiund with the callback information 245 */ 246 while (cbp->cb_refcnt != 0) 247 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 248 249 /* 250 * This thread itself may find that new callback info has 251 * arrived and is set up to handle this case and redrive the 252 * call to the client's callback server. 253 */ 254 retry: 255 if (cbp->cb_newer.cb_new == TRUE && 256 cbp->cb_newer.cb_confirmed == TRUE) { 257 char *addr = cbp->cb_callback.cb_location.r_addr; 258 char *netid = cbp->cb_callback.cb_location.r_netid; 259 260 /* 261 * Free the old stuff if it exists; may be the first 262 * time through this path 263 */ 264 if (addr) 265 kmem_free(addr, strlen(addr) + 1); 266 if (netid) 267 kmem_free(netid, strlen(netid) + 1); 268 269 /* Move over the addr/netid */ 270 cbp->cb_callback.cb_location.r_addr = 271 cbp->cb_newer.cb_callback.cb_location.r_addr; 272 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 273 cbp->cb_callback.cb_location.r_netid = 274 cbp->cb_newer.cb_callback.cb_location.r_netid; 275 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 276 277 /* Get the program number */ 278 cbp->cb_callback.cb_program = 279 cbp->cb_newer.cb_callback.cb_program; 280 cbp->cb_newer.cb_callback.cb_program = 0; 281 282 /* Don't forget the protocol's "cb_ident" field */ 283 cbp->cb_ident = cbp->cb_newer.cb_ident; 284 cbp->cb_newer.cb_ident = 0; 285 286 /* no longer new */ 287 cbp->cb_newer.cb_new = FALSE; 288 cbp->cb_newer.cb_confirmed = FALSE; 289 290 /* get rid of the old client handles that may exist */ 291 rfs4_cb_chflush(cbp); 292 293 cbp->cb_state = CB_NONE; 294 cbp->cb_timefailed = 0; /* reset the clock */ 295 cbp->cb_notified_of_cb_path_down = TRUE; 296 } 297 298 if (cbp->cb_state != CB_NONE) { 299 cv_broadcast(cbp->cb_cv); /* let the others know */ 300 cbp->cb_nullcaller = FALSE; 301 mutex_exit(cbp->cb_lock); 302 rfs4_client_rele(cp); 303 return; 304 } 305 306 /* mark rfs4_client_t as CALLBACK NULL in progress */ 307 cbp->cb_state = CB_INPROG; 308 mutex_exit(cbp->cb_lock); 309 310 /* get/generate a client handle */ 311 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 312 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 313 "rfs4_do_cb_null: failed to get client handle\n")); 314 mutex_enter(cbp->cb_lock); 315 cbp->cb_state = CB_BAD; 316 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 317 goto retry; 318 } 319 320 321 tv.tv_sec = 30; 322 tv.tv_usec = 0; 323 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 324 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 325 "rfs4_do_cb_null: clnt_call failed\n")); 326 327 newstate = CB_BAD; 328 } else { 329 newstate = CB_OK; 330 #ifdef DEBUG 331 rfs4_cb_null++; 332 #endif 333 } 334 335 /* Check to see if the client has specified new callback info */ 336 mutex_enter(cbp->cb_lock); 337 rfs4_cb_freech(cbp, ch, TRUE); 338 if (cbp->cb_newer.cb_new == TRUE && 339 cbp->cb_newer.cb_confirmed == TRUE) { 340 goto retry; /* give the CB_NULL another chance */ 341 } 342 343 cbp->cb_state = newstate; 344 if (cbp->cb_state == CB_BAD) 345 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 346 347 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 348 cbp->cb_nullcaller = FALSE; 349 mutex_exit(cbp->cb_lock); 350 351 rfs4_client_rele(cp); 352 } 353 354 /* 355 * Given a client struct, inspect the callback info to see if the 356 * callback path is up and available. If it is being initialized, 357 * then wait for the CB_NULL RPC call to occur. 358 */ 359 static rfs4_cbinfo_t * 360 rfs4_cbinfo_hold(rfs4_client_t *cp) 361 { 362 rfs4_cbinfo_t *cbp = &cp->cbinfo; 363 364 retry: 365 mutex_enter(cbp->cb_lock); 366 367 if (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 368 /* 369 * Looks like a new callback path may be available and 370 * noone has set it up. 371 */ 372 mutex_exit(cbp->cb_lock); 373 rfs4_dbe_hold(cp->dbe); 374 rfs4_do_cb_null(cp); /* caller will release client hold */ 375 goto retry; 376 } 377 378 /* Is there a thread working on doing the CB_NULL RPC? */ 379 if (cbp->cb_nullcaller == TRUE) 380 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 381 382 /* If the callback path is not okay (up and running), just quit */ 383 if (cbp->cb_state != CB_OK) { 384 mutex_exit(cbp->cb_lock); 385 return (NULL); 386 } 387 388 /* Let someone know we are using the current callback info */ 389 cbp->cb_refcnt++; 390 mutex_exit(cbp->cb_lock); 391 return (cbp); 392 } 393 394 /* 395 * The caller is done with the callback info. It may be that the 396 * caller's RPC failed and the NFSv4 client has actually provided new 397 * callback information. If so, let the caller know so they can 398 * advantage of this and maybe retry the RPC that originally failed. 399 */ 400 static int 401 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 402 { 403 int cb_new = FALSE; 404 405 mutex_enter(cbp->cb_lock); 406 407 /* The caller gets a chance to mark the callback info as bad */ 408 if (newstate != CB_NOCHANGE) 409 cbp->cb_state = newstate; 410 if (newstate == CB_FAILED) { 411 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 412 cbp->cb_notified_of_cb_path_down = FALSE; 413 } 414 415 cbp->cb_refcnt--; /* no longer using the information */ 416 417 /* 418 * A thread may be waiting on this one to finish and if so, 419 * let it know that it is okay to do the CB_NULL to the 420 * client's callback server. 421 */ 422 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 423 cv_broadcast(cbp->cb_cv_nullcaller); 424 425 /* 426 * If this is the last thread to use the callback info and 427 * there is new callback information to try and no thread is 428 * there ready to do the CB_NULL, then return true to teh 429 * caller so they can do the CB_NULL 430 */ 431 if (cbp->cb_refcnt == 0 && 432 cbp->cb_nullcaller == FALSE && 433 cbp->cb_newer.cb_new == TRUE && 434 cbp->cb_newer.cb_confirmed == TRUE) 435 cb_new = TRUE; 436 437 mutex_exit(cbp->cb_lock); 438 439 return (cb_new); 440 } 441 442 /* 443 * Given the information in the callback info struct, create a client 444 * handle that can be used by the server for its callback path. 445 */ 446 static CLIENT * 447 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 448 { 449 struct knetconfig knc; 450 vnode_t *vp; 451 struct sockaddr_in addr4; 452 struct sockaddr_in6 addr6; 453 void *addr, *taddr; 454 in_port_t *pp; 455 int af; 456 char *devnam; 457 int err = 0; 458 struct netbuf nb; 459 int size; 460 CLIENT *ch = NULL; 461 int useresvport = 0; 462 463 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 464 "rfs4_cbch_init: entry cbp->%p\n", (void *)cbp)); 465 466 mutex_enter(cbp->cb_lock); 467 468 if (cbp->cb_callback.cb_location.r_netid == NULL || 469 cbp->cb_callback.cb_location.r_addr == NULL) { 470 goto cb_init_out; 471 } 472 473 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 474 knc.knc_semantics = NC_TPI_COTS; 475 knc.knc_protofmly = "inet"; 476 knc.knc_proto = "tcp"; 477 devnam = "/dev/tcp"; 478 af = AF_INET; 479 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 480 == 0) { 481 knc.knc_semantics = NC_TPI_CLTS; 482 knc.knc_protofmly = "inet"; 483 knc.knc_proto = "udp"; 484 devnam = "/dev/udp"; 485 af = AF_INET; 486 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 487 == 0) { 488 knc.knc_semantics = NC_TPI_COTS; 489 knc.knc_protofmly = "inet6"; 490 knc.knc_proto = "tcp"; 491 devnam = "/dev/tcp6"; 492 af = AF_INET6; 493 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 494 == 0) { 495 knc.knc_semantics = NC_TPI_CLTS; 496 knc.knc_protofmly = "inet6"; 497 knc.knc_proto = "udp"; 498 devnam = "/dev/udp6"; 499 af = AF_INET6; 500 } else { 501 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 502 "rfs4_cbch_init: unknown transport %s\n", 503 cbp->cb_callback.cb_location.r_netid)); 504 505 goto cb_init_out; 506 } 507 508 if ((err = lookupname(devnam, UIO_SYSSPACE, FOLLOW, 509 NULLVPP, &vp)) != 0) { 510 511 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 512 "rfs4_cbch_init: lookupname failed %d\n", err)); 513 514 goto cb_init_out; 515 } 516 517 if (vp->v_type != VCHR) { 518 519 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 520 "rfs4_cbch_init: %s is not VCHR", devnam)); 521 VN_RELE(vp); 522 goto cb_init_out; 523 } 524 525 knc.knc_rdev = vp->v_rdev; 526 527 VN_RELE(vp); 528 529 if (af == AF_INET) { 530 size = sizeof (addr4); 531 bzero(&addr4, size); 532 addr4.sin_family = (sa_family_t)af; 533 addr = &addr4.sin_addr; 534 pp = &addr4.sin_port; 535 taddr = &addr4; 536 } else /* AF_INET6 */ { 537 size = sizeof (addr6); 538 bzero(&addr6, size); 539 addr6.sin6_family = (sa_family_t)af; 540 addr = &addr6.sin6_addr; 541 pp = &addr6.sin6_port; 542 taddr = &addr6; 543 } 544 545 if (uaddr2sockaddr(af, 546 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 547 548 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 549 "rfs4_cbch_init: malformed universal addr: %s\n", 550 cbp->cb_callback.cb_location.r_addr)); 551 552 goto cb_init_out; 553 } 554 555 556 nb.maxlen = nb.len = size; 557 nb.buf = (char *)taddr; 558 559 if (err = clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 560 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 561 562 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 563 "rfs4_cbch_init: clnt_tli_kcreate failed %d\n", err)); 564 ch = NULL; 565 } 566 567 /* turn off reserved port usage */ 568 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 569 570 cb_init_out: 571 mutex_exit(cbp->cb_lock); 572 return (ch); 573 } 574 575 /* 576 * Iterate over the client handle cache and 577 * destroy it. 578 */ 579 static void 580 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 581 { 582 CLIENT *ch; 583 584 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 585 "rfs4_cb_flush: enter cbp->%p, cb_chc_free=%d\n", 586 (void *)cbp, cbp->cb_chc_free)); 587 588 while (cbp->cb_chc_free) { 589 cbp->cb_chc_free--; 590 ch = cbp->cb_chc[cbp->cb_chc_free]; 591 cbp->cb_chc[cbp->cb_chc_free] = NULL; 592 if (ch) { 593 if (ch->cl_auth) 594 auth_destroy(ch->cl_auth); 595 clnt_destroy(ch); 596 } 597 } 598 } 599 600 /* 601 * Return a client handle, either from a the small 602 * rfs4_client_t cache or one that we just created. 603 */ 604 static CLIENT * 605 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 606 { 607 CLIENT *cbch = NULL; 608 uint32_t zilch = 0; 609 610 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 611 "rfs4_cb_getch: enter cbp->%p, cb_chc_free=%d\n", 612 (void *)cbp, cbp->cb_chc_free)); 613 614 mutex_enter(cbp->cb_lock); 615 616 if (cbp->cb_chc_free) { 617 cbp->cb_chc_free--; 618 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 619 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 620 "rfs4_cb_getch: cb_chc_free=%d ch->%p\n", 621 cbp->cb_chc_free, (void *)cbch)); 622 mutex_exit(cbp->cb_lock); 623 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 624 return (cbch); 625 } 626 627 mutex_exit(cbp->cb_lock); 628 629 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 630 "rfs4_cb_getch: calling rfs4_cbch_init\n")); 631 632 /* none free so make it now */ 633 cbch = rfs4_cbch_init(cbp); 634 635 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 636 "rfs4_cb_getch: returning cbch->%p\n", (void *)cbch)); 637 638 return (cbch); 639 } 640 641 /* 642 * Return the client handle to the small cache or 643 * destroy it. 644 */ 645 static void 646 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 647 { 648 if (lockheld == FALSE) 649 mutex_enter(cbp->cb_lock); 650 651 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 652 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 653 if (lockheld == FALSE) 654 mutex_exit(cbp->cb_lock); 655 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 656 "rfs4_cb_freech: caching cbp->%p, ch->%p, cb_chc_free=%d\n", 657 (void *)cbp, (void *)ch, cbp->cb_chc_free)); 658 return; 659 } 660 if (lockheld == FALSE) 661 mutex_exit(cbp->cb_lock); 662 663 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 664 "rfs4_cb_freech: destroying cbp->%p, ch->%p, cb_chc_free=%d\n", 665 (void *)cbp, (void *)ch, cbp->cb_chc_free)); 666 667 /* 668 * cache maxed out of free entries, obliterate 669 * this client handle, destroy it, throw it away. 670 */ 671 if (ch->cl_auth) 672 auth_destroy(ch->cl_auth); 673 clnt_destroy(ch); 674 } 675 676 /* 677 * With the supplied callback information - initialize the client 678 * callback data. If there is a callback in progress, save the 679 * callback info so that a thread can pick it up in the future. 680 */ 681 void 682 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 683 { 684 char *addr = NULL; 685 char *netid = NULL; 686 rfs4_cbinfo_t *cbp = &cp->cbinfo; 687 size_t len; 688 689 /* Set the call back for the client */ 690 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 691 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 692 len = strlen(cb->cb_location.r_addr) + 1; 693 addr = kmem_alloc(len, KM_SLEEP); 694 bcopy(cb->cb_location.r_addr, addr, len); 695 len = strlen(cb->cb_location.r_netid) + 1; 696 netid = kmem_alloc(len, KM_SLEEP); 697 bcopy(cb->cb_location.r_netid, netid, len); 698 } 699 /* ready to save the new information but first free old, if exists */ 700 mutex_enter(cbp->cb_lock); 701 702 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 703 704 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 705 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 706 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 707 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 708 709 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 710 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 711 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 712 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 713 714 cbp->cb_newer.cb_ident = cb_ident; 715 716 if (addr && *addr && netid && *netid) { 717 cbp->cb_newer.cb_new = TRUE; 718 cbp->cb_newer.cb_confirmed = FALSE; 719 } else { 720 cbp->cb_newer.cb_new = FALSE; 721 cbp->cb_newer.cb_confirmed = FALSE; 722 } 723 724 mutex_exit(cbp->cb_lock); 725 } 726 727 /* 728 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 729 * information may have been provided on SETCLIENTID and this call 730 * marks that information as confirmed and then starts a thread to 731 * test the callback path. 732 */ 733 void 734 rfs4_deleg_cb_check(rfs4_client_t *cp) 735 { 736 if (cp->cbinfo.cb_newer.cb_new == FALSE) 737 return; 738 739 cp->cbinfo.cb_newer.cb_confirmed = TRUE; 740 741 rfs4_dbe_hold(cp->dbe); /* hold the client struct for thread */ 742 743 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 744 minclsyspri); 745 } 746 747 static void 748 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 749 { 750 CB_RECALL4args *rec_argp; 751 752 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 753 if (rec_argp->fh.nfs_fh4_val) 754 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 755 } 756 757 /* ARGSUSED */ 758 static void 759 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 760 { 761 CB_GETATTR4args *argp; 762 763 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 764 if (argp->fh.nfs_fh4_val) 765 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 766 } 767 768 static void 769 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 770 { 771 int i, arglen; 772 nfs_cb_argop4 *argop; 773 774 /* 775 * First free any special args alloc'd for specific ops. 776 */ 777 arglen = args->array_len; 778 argop = args->array; 779 for (i = 0; i < arglen; i++, argop++) { 780 781 switch (argop->argop) { 782 case OP_CB_RECALL: 783 rfs4args_cb_recall_free(argop); 784 break; 785 786 case OP_CB_GETATTR: 787 rfs4args_cb_getattr_free(argop); 788 break; 789 790 default: 791 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, 792 "rfs4freeargres: unknown op")); 793 return; 794 } 795 } 796 797 if (args->tag.utf8string_len > 0) 798 UTF8STRING_FREE(args->tag) 799 800 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 801 if (resp) 802 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 803 } 804 805 /* 806 * General callback routine for the server to the client. 807 */ 808 static enum clnt_stat 809 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 810 CB_COMPOUND4res *res, struct timeval timeout) 811 { 812 rfs4_cbinfo_t *cbp; 813 CLIENT *ch; 814 /* start with this in case cb_getch() fails */ 815 enum clnt_stat stat = RPC_FAILED; 816 817 res->tag.utf8string_val = NULL; 818 res->array = NULL; 819 820 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 821 "rfs4_do_callback: enter cp->%p\n", (void *)cp)); 822 823 retry: 824 cbp = rfs4_cbinfo_hold(cp); 825 if (cbp == NULL) 826 return (stat); 827 828 /* get a client handle */ 829 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 830 /* 831 * reset the cb_ident since it may have changed in 832 * rfs4_cbinfo_hold() 833 */ 834 args->callback_ident = cbp->cb_ident; 835 836 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 837 (caddr_t)args, xdr_CB_COMPOUND4res, 838 (caddr_t)res, timeout); 839 840 /* free client handle */ 841 rfs4_cb_freech(cbp, ch, FALSE); 842 } 843 844 NFS4_DEBUG(rfs4_cb_debug, (CE_NOTE, 845 "rfs4_do_callback: exit with RPC status %d, %s", 846 stat, clnt_sperrno(stat))); 847 848 /* 849 * If the rele says that there may be new callback info then 850 * retry this sequence and it may succeed as a result of the 851 * new callback path 852 */ 853 if (rfs4_cbinfo_rele(cbp, 854 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 855 goto retry; 856 857 return (stat); 858 } 859 860 /* 861 * Used by the NFSv4 server to get attributes for a file while 862 * handling the case where a file has been write delegated. For the 863 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 864 * not undertaken. This call site is maintained in case the server is 865 * updated in the future to handle write delegation space guarantees. 866 */ 867 nfsstat4 868 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 869 { 870 uint_t mask; 871 int error; 872 873 mask = vap->va_mask; 874 error = VOP_GETATTR(vp, vap, flag, cr); 875 /* 876 * Some file systems clobber va_mask. it is probably wrong of 877 * them to do so, nonethless we practice defensive coding. 878 * See bug id 4276830. 879 */ 880 vap->va_mask = mask; 881 return (puterrno4(error)); 882 } 883 884 /* 885 * This is used everywhere in the v2/v3 server to allow the 886 * integration of all NFS versions and the support of delegation. For 887 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 888 * in the future to provide space guarantees for write delegations 889 * then this call site should be expanded to interact with the client. 890 */ 891 int 892 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 893 { 894 return (VOP_GETATTR(vp, vap, flag, cr)); 895 } 896 897 /* 898 * Place the actual cb_recall otw call to client. 899 */ 900 static void 901 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 902 { 903 CB_COMPOUND4args cb4_args; 904 CB_COMPOUND4res cb4_res; 905 CB_RECALL4args *rec_argp; 906 nfs_cb_argop4 *argop; 907 int numops; 908 int argoplist_size; 909 struct timeval timeout; 910 nfs_fh4 *fhp; 911 enum clnt_stat call_stat; 912 913 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, "rfs4_do_cb_recall: enter")); 914 /* 915 * set up the compound args 916 */ 917 numops = 1; /* CB_RECALL only */ 918 919 argoplist_size = numops * sizeof (nfs_cb_argop4); 920 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 921 argop->argop = OP_CB_RECALL; 922 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 923 924 (void) str_to_utf8("cb_recall", &cb4_args.tag); 925 cb4_args.minorversion = CB4_MINORVERSION; 926 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 927 cb4_args.array_len = numops; 928 cb4_args.array = argop; 929 930 /* 931 * fill in the args struct 932 */ 933 bcopy(&dsp->delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 934 rec_argp->truncate = trunc; 935 936 fhp = &dsp->finfo->filehandle; 937 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 938 fhp->nfs_fh4_len, KM_SLEEP); 939 nfs_fh4_copy(fhp, &rec_argp->fh); 940 941 /* Keep track of when we did this for observability */ 942 dsp->time_recalled = gethrestime_sec(); 943 944 /* 945 * Set up the timeout for the callback and make the actual call. 946 * Timeout will be 80% of the lease period for this server. 947 */ 948 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 949 timeout.tv_usec = 0; 950 951 call_stat = rfs4_do_callback(dsp->client, &cb4_args, 952 &cb4_res, timeout); 953 954 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 955 rfs4_revoke_deleg(dsp); 956 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, 957 "rfs4_do_cb_recall: rpcstat=%d cbstat=%d ", 958 call_stat, cb4_res.status)); 959 } 960 961 rfs4freeargres(&cb4_args, &cb4_res); 962 } 963 964 struct recall_arg { 965 rfs4_deleg_state_t *dsp; 966 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 967 bool_t trunc; 968 }; 969 970 static void 971 do_recall(struct recall_arg *arg) 972 { 973 rfs4_deleg_state_t *dsp = arg->dsp; 974 rfs4_file_t *fp = dsp->finfo; 975 callb_cpr_t cpr_info; 976 kmutex_t cpr_lock; 977 978 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 979 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 980 981 /* 982 * It is possible that before this thread starts 983 * the client has send us a return_delegation, and 984 * if that is the case we do not need to send the 985 * recall callback. 986 */ 987 if (dsp->dtype != OPEN_DELEGATE_NONE) { 988 NFS4_DEBUG(rfs4_deleg_debug, 989 (CE_NOTE, "recall = %p, state = %p, fp = %p trunc = %d", 990 (void*)arg->recall, (void*)dsp, (void*)fp, arg->trunc)); 991 992 if (arg->recall) 993 (void) (*arg->recall)(dsp, arg->trunc); 994 } 995 996 mutex_enter(fp->dinfo->recall_lock); 997 /* 998 * Recall count may go negative if the parent thread that is 999 * creating the individual callback threads does not modify 1000 * the recall_count field before the callback thread actually 1001 * gets a response from the CB_RECALL 1002 */ 1003 fp->dinfo->recall_count--; 1004 if (fp->dinfo->recall_count == 0) 1005 cv_signal(fp->dinfo->recall_cv); 1006 mutex_exit(fp->dinfo->recall_lock); 1007 1008 mutex_enter(&cpr_lock); 1009 CALLB_CPR_EXIT(&cpr_info); 1010 mutex_destroy(&cpr_lock); 1011 1012 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 1013 1014 kmem_free(arg, sizeof (struct recall_arg)); 1015 } 1016 1017 struct master_recall_args { 1018 rfs4_file_t *fp; 1019 void (*recall)(rfs4_deleg_state_t *, bool_t); 1020 bool_t trunc; 1021 }; 1022 1023 static void 1024 do_recall_file(struct master_recall_args *map) 1025 { 1026 rfs4_file_t *fp = map->fp; 1027 rfs4_deleg_state_t *dsp; 1028 struct recall_arg *arg; 1029 callb_cpr_t cpr_info; 1030 kmutex_t cpr_lock; 1031 int32_t recall_count; 1032 1033 rfs4_dbe_lock(fp->dbe); 1034 /* Recall already in progress */ 1035 if (fp->dinfo->recall_count != 0) { 1036 rfs4_dbe_rele_nolock(fp->dbe); 1037 rfs4_dbe_unlock(fp->dbe); 1038 kmem_free(map, sizeof (struct master_recall_args)); 1039 return; 1040 } 1041 1042 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1043 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 1044 "nfsv4RecallFile"); 1045 1046 recall_count = 0; 1047 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1048 dsp = dsp->delegationlist.next->dsp) { 1049 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1050 arg->recall = map->recall; 1051 arg->trunc = map->trunc; 1052 1053 rfs4_dbe_hold(dsp->dbe); /* hold for receiving thread */ 1054 1055 arg->dsp = dsp; 1056 1057 recall_count++; 1058 1059 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1060 minclsyspri); 1061 } 1062 rfs4_dbe_unlock(fp->dbe); 1063 1064 mutex_enter(fp->dinfo->recall_lock); 1065 /* 1066 * Recall count may go negative if the parent thread that is 1067 * creating the individual callback threads does not modify 1068 * the recall_count field before the callback thread actually 1069 * gets a response from the CB_RECALL 1070 */ 1071 fp->dinfo->recall_count += recall_count; 1072 while (fp->dinfo->recall_count) 1073 cv_wait(fp->dinfo->recall_cv, fp->dinfo->recall_lock); 1074 1075 mutex_exit(fp->dinfo->recall_lock); 1076 1077 NFS4_DEBUG(rfs4_deleg_debug, (CE_NOTE, "Recall complete for %p", 1078 (void*)fp)); 1079 rfs4_file_rele(fp); 1080 kmem_free(map, sizeof (struct master_recall_args)); 1081 mutex_enter(&cpr_lock); 1082 CALLB_CPR_EXIT(&cpr_info); 1083 mutex_destroy(&cpr_lock); 1084 } 1085 1086 static void 1087 rfs4_recall_file(rfs4_file_t *fp, 1088 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1089 bool_t trunc, rfs4_client_t *cp) 1090 { 1091 struct master_recall_args *args; 1092 1093 rfs4_dbe_lock(fp->dbe); 1094 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1095 rfs4_dbe_unlock(fp->dbe); 1096 return; 1097 } 1098 rfs4_dbe_hold(fp->dbe); /* hold for new thread */ 1099 1100 /* 1101 * Mark the time we started the recall processing. 1102 * If it has been previously recalled, do not reset the 1103 * timer since this is used for the revocation decision. 1104 */ 1105 if (fp->dinfo->time_recalled == 0) 1106 fp->dinfo->time_recalled = gethrestime_sec(); 1107 fp->dinfo->ever_recalled = TRUE; /* used for policy decision */ 1108 /* Client causing recall not always available */ 1109 if (cp) 1110 fp->dinfo->conflicted_client = cp->clientid; 1111 1112 rfs4_dbe_unlock(fp->dbe); 1113 1114 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1115 args->fp = fp; 1116 args->recall = recall; 1117 args->trunc = trunc; 1118 1119 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1120 minclsyspri); 1121 } 1122 1123 void 1124 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1125 { 1126 time_t elapsed1, elapsed2; 1127 1128 if (fp->dinfo->time_recalled != 0) { 1129 elapsed1 = gethrestime_sec() - fp->dinfo->time_recalled; 1130 elapsed2 = gethrestime_sec() - fp->dinfo->time_lastwrite; 1131 /* First check to see if a revocation should occur */ 1132 if (elapsed1 > rfs4_lease_time && 1133 elapsed2 > rfs4_lease_time) { 1134 rfs4_revoke_file(fp); 1135 return; 1136 } 1137 /* 1138 * Next check to see if a recall should be done again 1139 * so quickly. 1140 */ 1141 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1142 return; 1143 } 1144 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1145 } 1146 1147 /* 1148 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1149 * open conflicts with the delegation. 1150 * Return true if we need recall otherwise false. 1151 * Assumes entry locks for sp and sp->finfo are held. 1152 */ 1153 bool_t 1154 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1155 { 1156 open_delegation_type4 dtype = sp->finfo->dinfo->dtype; 1157 1158 switch (dtype) { 1159 case OPEN_DELEGATE_NONE: 1160 /* Not currently delegated so there is nothing to do */ 1161 return (FALSE); 1162 case OPEN_DELEGATE_READ: 1163 /* 1164 * If the access is only asking for READ then there is 1165 * no conflict and nothing to do. If it is asking 1166 * for write, then there will be conflict and the read 1167 * delegation should be recalled. 1168 */ 1169 if (access == OPEN4_SHARE_ACCESS_READ) 1170 return (FALSE); 1171 else 1172 return (TRUE); 1173 case OPEN_DELEGATE_WRITE: 1174 /* Check to see if this client has the delegation */ 1175 return (rfs4_is_deleg(sp)); 1176 } 1177 1178 return (FALSE); 1179 } 1180 1181 /* 1182 * Return the "best" allowable delegation available given the current 1183 * delegation type and the desired access and deny modes on the file. 1184 * At the point that this routine is called we know that the access and 1185 * deny modes are consistent with the file modes. 1186 */ 1187 static open_delegation_type4 1188 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1189 { 1190 open_delegation_type4 dtype = fp->dinfo->dtype; 1191 uint32_t access = sp->share_access; 1192 uint32_t deny = sp->share_deny; 1193 int readcnt = 0; 1194 int writecnt = 0; 1195 1196 switch (dtype) { 1197 case OPEN_DELEGATE_NONE: 1198 /* 1199 * Determine if more than just this OPEN have the file 1200 * open and if so, no delegation may be provided to 1201 * the client. 1202 */ 1203 if (access & OPEN4_SHARE_ACCESS_WRITE) 1204 writecnt++; 1205 if (access & OPEN4_SHARE_ACCESS_READ) 1206 readcnt++; 1207 1208 if (fp->access_read > readcnt || fp->access_write > writecnt) 1209 return (OPEN_DELEGATE_NONE); 1210 1211 /* 1212 * If the client is going to write, or if the client 1213 * has exclusive access, return a write delegation. 1214 */ 1215 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1216 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1217 return (OPEN_DELEGATE_WRITE); 1218 /* 1219 * If we don't want to write or we've haven't denied read 1220 * access to others, return a read delegation. 1221 */ 1222 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1223 (deny & ~OPEN4_SHARE_DENY_READ)) 1224 return (OPEN_DELEGATE_READ); 1225 1226 /* Shouldn't get here */ 1227 return (OPEN_DELEGATE_NONE); 1228 1229 case OPEN_DELEGATE_READ: 1230 /* 1231 * If the file is delegated for read but we wan't to 1232 * write or deny others to read then we can't delegate 1233 * the file. We shouldn't get here since the delegation should 1234 * have been recalled already. 1235 */ 1236 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1237 (deny & OPEN4_SHARE_DENY_READ)) 1238 return (OPEN_DELEGATE_NONE); 1239 return (OPEN_DELEGATE_READ); 1240 1241 case OPEN_DELEGATE_WRITE: 1242 return (OPEN_DELEGATE_WRITE); 1243 } 1244 1245 /* Shouldn't get here */ 1246 return (OPEN_DELEGATE_NONE); 1247 } 1248 1249 /* 1250 * Given the desired delegation type and the "history" of the file 1251 * determine the actual delegation type to return. 1252 */ 1253 static open_delegation_type4 1254 rfs4_delegation_policy(open_delegation_type4 dtype, 1255 rfs4_dinfo_t *dinfo, clientid4 cid) 1256 { 1257 time_t elapsed; 1258 1259 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1260 return (OPEN_DELEGATE_NONE); 1261 1262 /* 1263 * Has this file/delegation ever been recalled? If not then 1264 * no furhter checks for a delegation race need to be done. 1265 * However if a recall has occurred, then check to see if a 1266 * client has caused its own delegation recall to occur. If 1267 * not, then has a delegation for this file been returned 1268 * recently? If so, then do not assign a new delegation to 1269 * avoid a "delegation race" between the original client and 1270 * the new/conflicting client. 1271 */ 1272 if (dinfo->ever_recalled == TRUE) { 1273 if (dinfo->conflicted_client != cid) { 1274 elapsed = gethrestime_sec() - dinfo->time_returned; 1275 if (elapsed < rfs4_lease_time) 1276 return (OPEN_DELEGATE_NONE); 1277 } 1278 } 1279 1280 /* Limit the number of read grants */ 1281 if (dtype == OPEN_DELEGATE_READ && 1282 dinfo->rdgrants > MAX_READ_DELEGATIONS) 1283 return (OPEN_DELEGATE_NONE); 1284 1285 /* 1286 * Should consider limiting total number of read/write 1287 * delegations the server will permit. 1288 */ 1289 1290 return (dtype); 1291 } 1292 1293 /* 1294 * Try and grant a delegation for an open give the state. The routine 1295 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1296 * 1297 * The state and associate file entry must be locked 1298 */ 1299 rfs4_deleg_state_t * 1300 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1301 { 1302 rfs4_file_t *fp = sp->finfo; 1303 open_delegation_type4 dtype; 1304 int no_delegation; 1305 1306 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1307 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1308 1309 /* Is the server even providing delegations? */ 1310 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1311 return (NULL); 1312 1313 /* Check to see if delegations have been temporarily disabled */ 1314 mutex_enter(&rfs4_deleg_lock); 1315 no_delegation = rfs4_deleg_disabled; 1316 mutex_exit(&rfs4_deleg_lock); 1317 1318 if (no_delegation) 1319 return (NULL); 1320 1321 /* Don't grant a delegation if a deletion is impending. */ 1322 if (fp->dinfo->hold_grant > 0) { 1323 NFS4_DEBUG(rfs4_deleg_debug, 1324 (CE_NOTE, "rfs4_grant_delegation: hold_grant is set")); 1325 return (NULL); 1326 } 1327 1328 /* 1329 * Don't grant a delegation if there are any lock manager 1330 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1331 * if there are only read locks we should be able to grant a 1332 * read-only delegation), but it's good enough for now. 1333 * 1334 * MT safety: the lock manager checks for conflicting delegations 1335 * before processing a lock request. That check will block until 1336 * we are done here. So if the lock manager acquires a lock after 1337 * we decide to grant the delegation, the delegation will get 1338 * immediately recalled (if there's a conflict), so we're safe. 1339 */ 1340 if (lm_vp_active(fp->vp)) { 1341 NFS4_DEBUG(rfs4_deleg_debug, 1342 (CE_NOTE, "rfs4_grant_delegation: NLM lock")); 1343 return (NULL); 1344 } 1345 1346 /* 1347 * Based on the type of delegation request passed in, take the 1348 * appropriate action (DELEG_NONE is handled above) 1349 */ 1350 switch (dreq) { 1351 1352 case DELEG_READ: 1353 case DELEG_WRITE: 1354 /* 1355 * The server "must" grant the delegation in this case. 1356 * Client is using open previous 1357 */ 1358 dtype = (open_delegation_type4)dreq; 1359 *recall = 1; 1360 break; 1361 case DELEG_ANY: 1362 /* 1363 * If a valid callback path does not exist, no delegation may 1364 * be granted. 1365 */ 1366 if (sp->owner->client->cbinfo.cb_state != CB_OK) 1367 return (NULL); 1368 1369 /* 1370 * If the original operation which caused time_rm_delayed 1371 * to be set hasn't been retried and completed for one 1372 * full lease period, clear it and allow delegations to 1373 * get granted again. 1374 */ 1375 if (fp->dinfo->time_rm_delayed > 0 && 1376 gethrestime_sec() > 1377 fp->dinfo->time_rm_delayed + rfs4_lease_time) 1378 fp->dinfo->time_rm_delayed = 0; 1379 1380 /* 1381 * If we are waiting for a delegation to be returned then 1382 * don't delegate this file. We do this for correctness as 1383 * well as if the file is being recalled we would likely 1384 * recall this file again. 1385 */ 1386 1387 if (fp->dinfo->time_recalled != 0 || 1388 fp->dinfo->time_rm_delayed != 0) 1389 return (NULL); 1390 1391 /* Get the "best" delegation candidate */ 1392 dtype = rfs4_check_delegation(sp, fp); 1393 1394 if (dtype == OPEN_DELEGATE_NONE) 1395 return (NULL); 1396 1397 /* 1398 * Based on policy and the history of the file get the 1399 * actual delegation. 1400 */ 1401 dtype = rfs4_delegation_policy(dtype, fp->dinfo, 1402 sp->owner->client->clientid); 1403 1404 NFS4_DEBUG(rfs4_deleg_debug, 1405 (CE_NOTE, "Grant policy dtype = %d", dtype)); 1406 if (dtype == OPEN_DELEGATE_NONE) 1407 return (NULL); 1408 break; 1409 default: 1410 return (NULL); 1411 } 1412 1413 /* set the delegation for the state */ 1414 return (rfs4_deleg_state(sp, dtype, recall)); 1415 } 1416 1417 void 1418 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1419 nfsace4 *ace, int recall) 1420 { 1421 open_write_delegation4 *wp; 1422 open_read_delegation4 *rp; 1423 nfs_space_limit4 *spl; 1424 nfsace4 nace; 1425 1426 /* 1427 * We need to allocate a new copy of the who string. 1428 * this string will be freed by the rfs4_op_open dis_resfree 1429 * routine. We need to do this allocation since replays will 1430 * be allocated and rfs4_compound can't tell the difference from 1431 * a replay and an inital open. N.B. if an ace is passed in, it 1432 * the caller's responsibility to free it. 1433 */ 1434 1435 if (ace == NULL) { 1436 /* 1437 * Default is to deny all access, the client will have 1438 * to contact the server. XXX Do we want to actually 1439 * set a deny for every one, or do we simply want to 1440 * construct an entity that will match no one? 1441 */ 1442 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1443 nace.flag = 0; 1444 nace.access_mask = ACE4_VALID_MASK_BITS; 1445 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1446 } else { 1447 nace.type = ace->type; 1448 nace.flag = ace->flag; 1449 nace.access_mask = ace->access_mask; 1450 (void) utf8_copy(&ace->who, &nace.who); 1451 } 1452 1453 dp->delegation_type = dsp->dtype; 1454 1455 switch (dsp->dtype) { 1456 case OPEN_DELEGATE_NONE: 1457 break; 1458 case OPEN_DELEGATE_READ: 1459 rp = &dp->open_delegation4_u.read; 1460 rp->stateid = dsp->delegid.stateid; 1461 rp->recall = (bool_t)recall; 1462 rp->permissions = nace; 1463 break; 1464 case OPEN_DELEGATE_WRITE: 1465 wp = &dp->open_delegation4_u.write; 1466 wp->stateid = dsp->delegid.stateid; 1467 wp->recall = (bool_t)recall; 1468 spl = &wp->space_limit; 1469 spl->limitby = NFS_LIMIT_SIZE; 1470 spl->nfs_space_limit4_u.filesize = 0; 1471 wp->permissions = nace; 1472 break; 1473 } 1474 } 1475 1476 /* 1477 * Check if the file is delegated via the provided file struct. 1478 * Return TRUE if it is delegated. This is intended for use by 1479 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1480 * 1481 * Note that if the file is found to have a delegation, it is 1482 * recalled, unless the clientid of the caller matches the clientid of the 1483 * delegation. If the caller has specified, there is a slight delay 1484 * inserted in the hopes that the delegation will be returned quickly. 1485 */ 1486 bool_t 1487 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1488 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1489 { 1490 rfs4_deleg_state_t *dsp; 1491 1492 /* Is delegation enabled? */ 1493 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1494 return (FALSE); 1495 1496 /* do we have a delegation on this file? */ 1497 rfs4_dbe_lock(fp->dbe); 1498 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1499 if (is_rm) 1500 fp->dinfo->hold_grant++; 1501 rfs4_dbe_unlock(fp->dbe); 1502 return (FALSE); 1503 } 1504 /* 1505 * do we have a write delegation on this file or are we 1506 * requesting write access to a file with any type of existing 1507 * delegation? 1508 */ 1509 if (mode == FWRITE || fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 1510 if (cp != NULL) { 1511 dsp = fp->delegationlist.next->dsp; 1512 if (dsp == NULL) { 1513 rfs4_dbe_unlock(fp->dbe); 1514 return (FALSE); 1515 } 1516 /* 1517 * Does the requestor already own the delegation? 1518 */ 1519 if (dsp->client->clientid == *(cp)) { 1520 rfs4_dbe_unlock(fp->dbe); 1521 return (FALSE); 1522 } 1523 } 1524 1525 rfs4_dbe_unlock(fp->dbe); 1526 rfs4_recall_deleg(fp, trunc, NULL); 1527 1528 if (!do_delay) { 1529 rfs4_dbe_lock(fp->dbe); 1530 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1531 rfs4_dbe_unlock(fp->dbe); 1532 return (TRUE); 1533 } 1534 1535 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1536 1537 rfs4_dbe_lock(fp->dbe); 1538 if (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { 1539 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1540 rfs4_dbe_unlock(fp->dbe); 1541 return (TRUE); 1542 } 1543 } 1544 if (is_rm) 1545 fp->dinfo->hold_grant++; 1546 rfs4_dbe_unlock(fp->dbe); 1547 return (FALSE); 1548 } 1549 1550 /* 1551 * Check if the file is delegated in the case of a v2 or v3 access. 1552 * Return TRUE if it is delegated which in turn means that v2 should 1553 * drop the request and in the case of v3 JUKEBOX should be returned. 1554 */ 1555 bool_t 1556 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1557 { 1558 rfs4_file_t *fp; 1559 bool_t create = FALSE; 1560 bool_t rc = FALSE; 1561 1562 rfs4_hold_deleg_policy(); 1563 1564 /* Is delegation enabled? */ 1565 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1566 fp = rfs4_findfile(vp, NULL, &create); 1567 if (fp != NULL) { 1568 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1569 TRUE, FALSE, NULL)) { 1570 rc = TRUE; 1571 } 1572 rfs4_file_rele(fp); 1573 } 1574 } 1575 rfs4_rele_deleg_policy(); 1576 return (rc); 1577 } 1578 1579 /* 1580 * Release a hold on the hold_grant counter which 1581 * prevents delegation from being granted while a remove 1582 * or a rename is in progress. 1583 */ 1584 void 1585 rfs4_clear_dont_grant(rfs4_file_t *fp) 1586 { 1587 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1588 return; 1589 rfs4_dbe_lock(fp->dbe); 1590 ASSERT(fp->dinfo->hold_grant > 0); 1591 fp->dinfo->hold_grant--; 1592 fp->dinfo->time_rm_delayed = 0; 1593 rfs4_dbe_unlock(fp->dbe); 1594 } 1595 1596 /* 1597 * State support for delegation. 1598 * Set the state delegation type for this state; 1599 * This routine is called from open via rfs4_grant_delegation and the entry 1600 * locks on sp and sp->finfo are assumed. 1601 */ 1602 static rfs4_deleg_state_t * 1603 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1604 { 1605 rfs4_file_t *fp = sp->finfo; 1606 bool_t create = TRUE; 1607 rfs4_deleg_state_t *dsp; 1608 vnode_t *vp; 1609 int open_prev = *recall; 1610 1611 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1612 1613 /* Shouldn't happen */ 1614 if (fp->dinfo->recall_count != 0 || 1615 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1616 dtype != OPEN_DELEGATE_READ)) { 1617 return (NULL); 1618 } 1619 1620 /* Unlock to avoid deadlock */ 1621 rfs4_dbe_unlock(fp->dbe); 1622 1623 dsp = rfs4_finddeleg(sp, &create); 1624 1625 rfs4_dbe_lock(fp->dbe); 1626 1627 if (dsp == NULL) 1628 return (NULL); 1629 1630 if (create == FALSE) { 1631 if (sp->owner->client == dsp->client && 1632 dsp->dtype == dtype) { 1633 return (dsp); 1634 } else { 1635 rfs4_deleg_state_rele(dsp); 1636 return (NULL); 1637 } 1638 } 1639 1640 /* 1641 * Check that this file has not been delegated to another 1642 * client 1643 */ 1644 if (fp->dinfo->recall_count != 0 || 1645 fp->dinfo->dtype == OPEN_DELEGATE_WRITE || 1646 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1647 dtype != OPEN_DELEGATE_READ)) { 1648 rfs4_deleg_state_rele(dsp); 1649 return (NULL); 1650 } 1651 1652 vp = fp->vp; 1653 /* vnevent_support returns 0 if file system supports vnevents */ 1654 if (vnevent_support(vp)) { 1655 rfs4_deleg_state_rele(dsp); 1656 return (NULL); 1657 } 1658 1659 *recall = 0; 1660 if (dtype == OPEN_DELEGATE_READ) { 1661 if (vn_is_opened(vp, V_WRITE) || vn_is_mapped(vp, V_WRITE)) { 1662 if (open_prev) { 1663 *recall = 1; 1664 } else { 1665 rfs4_deleg_state_rele(dsp); 1666 return (NULL); 1667 } 1668 } 1669 (void) fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1670 rfs4_mon_hold, rfs4_mon_rele); 1671 if (vn_is_opened(vp, V_WRITE) || vn_is_mapped(vp, V_WRITE)) { 1672 if (open_prev) { 1673 *recall = 1; 1674 } else { 1675 (void) fem_uninstall(vp, deleg_rdops, 1676 (void *)fp); 1677 rfs4_deleg_state_rele(dsp); 1678 return (NULL); 1679 } 1680 } 1681 } else { /* WRITE */ 1682 if (vn_is_opened(vp, V_RDORWR) || vn_is_mapped(vp, V_RDORWR)) { 1683 if (open_prev) { 1684 *recall = 1; 1685 } else { 1686 rfs4_deleg_state_rele(dsp); 1687 return (NULL); 1688 } 1689 } 1690 (void) fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1691 rfs4_mon_hold, rfs4_mon_rele); 1692 if (vn_is_opened(vp, V_RDORWR) || vn_is_mapped(vp, V_RDORWR)) { 1693 if (open_prev) { 1694 *recall = 1; 1695 } else { 1696 (void) fem_uninstall(vp, deleg_wrops, 1697 (void *)fp); 1698 rfs4_deleg_state_rele(dsp); 1699 return (NULL); 1700 } 1701 } 1702 } 1703 /* Place on delegation list for file */ 1704 insque(&dsp->delegationlist, fp->delegationlist.prev); 1705 1706 dsp->dtype = fp->dinfo->dtype = dtype; 1707 1708 /* Update delegation stats for this file */ 1709 fp->dinfo->time_lastgrant = gethrestime_sec(); 1710 1711 /* reset since this is a new delegation */ 1712 fp->dinfo->conflicted_client = 0; 1713 fp->dinfo->ever_recalled = FALSE; 1714 1715 if (dtype == OPEN_DELEGATE_READ) 1716 fp->dinfo->rdgrants++; 1717 else 1718 fp->dinfo->wrgrants++; 1719 1720 return (dsp); 1721 } 1722 1723 /* 1724 * State routine for the server when a delegation is returned. 1725 */ 1726 void 1727 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1728 { 1729 rfs4_file_t *fp = dsp->finfo; 1730 open_delegation_type4 dtypewas; 1731 1732 rfs4_dbe_lock(fp->dbe); 1733 /* Remove state from recall list */ 1734 1735 remque(&dsp->delegationlist); 1736 dsp->delegationlist.next = dsp->delegationlist.prev = 1737 &dsp->delegationlist; 1738 1739 if (&fp->delegationlist == fp->delegationlist.next) { 1740 dtypewas = fp->dinfo->dtype; 1741 fp->dinfo->dtype = OPEN_DELEGATE_NONE; 1742 rfs4_dbe_cv_broadcast(fp->dbe); 1743 1744 /* if file system was unshared, the vp will be NULL */ 1745 if (fp->vp != NULL) { 1746 if (dtypewas == OPEN_DELEGATE_READ) 1747 (void) fem_uninstall(fp->vp, deleg_rdops, 1748 (void *)fp); 1749 else 1750 (void) fem_uninstall(fp->vp, deleg_wrops, 1751 (void *)fp); 1752 } 1753 } 1754 1755 switch (dsp->dtype) { 1756 case OPEN_DELEGATE_READ: 1757 fp->dinfo->rdgrants--; 1758 break; 1759 case OPEN_DELEGATE_WRITE: 1760 fp->dinfo->wrgrants--; 1761 break; 1762 default: 1763 break; 1764 } 1765 1766 /* used in the policy decision */ 1767 fp->dinfo->time_returned = gethrestime_sec(); 1768 1769 /* 1770 * reset the time_recalled field so future delegations are not 1771 * accidentally revoked 1772 */ 1773 if ((fp->dinfo->rdgrants + fp->dinfo->wrgrants) == 0) 1774 fp->dinfo->time_recalled = 0; 1775 1776 rfs4_dbe_unlock(fp->dbe); 1777 1778 rfs4_dbe_lock(dsp->dbe); 1779 1780 dsp->dtype = OPEN_DELEGATE_NONE; 1781 1782 if (revoked == TRUE) 1783 dsp->time_revoked = gethrestime_sec(); 1784 1785 rfs4_dbe_invalidate(dsp->dbe); 1786 1787 rfs4_dbe_unlock(dsp->dbe); 1788 1789 if (revoked == TRUE) { 1790 rfs4_dbe_lock(dsp->client->dbe); 1791 dsp->client->deleg_revoked++; /* observability */ 1792 rfs4_dbe_unlock(dsp->client->dbe); 1793 } 1794 } 1795 1796 static void 1797 rfs4_revoke_deleg(rfs4_deleg_state_t *dsp) 1798 { 1799 rfs4_return_deleg(dsp, TRUE); 1800 } 1801 1802 static void 1803 rfs4_revoke_file(rfs4_file_t *fp) 1804 { 1805 rfs4_deleg_state_t *dsp; 1806 1807 /* 1808 * The lock for rfs4_file_t must be held when traversing the 1809 * delegation list but that lock needs to be released to call 1810 * rfs4_revoke_deleg() 1811 * This for loop is set up to check the list for being empty, 1812 * and locking the rfs4_file_t struct on init and end 1813 */ 1814 for (rfs4_dbe_lock(fp->dbe); 1815 &fp->delegationlist != fp->delegationlist.next; 1816 rfs4_dbe_lock(fp->dbe)) { 1817 1818 dsp = fp->delegationlist.next->dsp; 1819 rfs4_dbe_hold(dsp->dbe); 1820 rfs4_dbe_unlock(fp->dbe); 1821 rfs4_revoke_deleg(dsp); 1822 rfs4_deleg_state_rele(dsp); 1823 } 1824 rfs4_dbe_unlock(fp->dbe); 1825 } 1826 1827 /* 1828 * A delegation is assumed to be present on the file associated with 1829 * "state". Check to see if the delegation matches is associated with 1830 * the same client as referenced by "state". If it is not, TRUE is 1831 * returned. If the delegation DOES match the client (or no 1832 * delegation is present), return FALSE. 1833 * Assume the state entry and file entry are locked. 1834 */ 1835 bool_t 1836 rfs4_is_deleg(rfs4_state_t *state) 1837 { 1838 rfs4_deleg_state_t *dsp; 1839 rfs4_file_t *fp = state->finfo; 1840 rfs4_client_t *cp = state->owner->client; 1841 1842 NFS4_DEBUG(rfs4_deleg_debug, 1843 (CE_NOTE, "rfs4_is_deleg enter: cp = %p", (void*)cp)); 1844 1845 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1846 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1847 dsp = dsp->delegationlist.next->dsp) { 1848 NFS4_DEBUG(rfs4_deleg_debug, 1849 (CE_NOTE, "rfs4_is_deleg: client = %p", 1850 (void*)dsp->client)); 1851 if (cp != dsp->client) { 1852 NFS4_DEBUG(rfs4_deleg_debug, 1853 (CE_NOTE, "rfs4_is_deleg is true")); 1854 return (TRUE); 1855 } 1856 } 1857 return (FALSE); 1858 } 1859 1860 void 1861 rfs4_disable_delegation(void) 1862 { 1863 mutex_enter(&rfs4_deleg_lock); 1864 rfs4_deleg_disabled++; 1865 mutex_exit(&rfs4_deleg_lock); 1866 } 1867 1868 void 1869 rfs4_enable_delegation(void) 1870 { 1871 mutex_enter(&rfs4_deleg_lock); 1872 ASSERT(rfs4_deleg_disabled > 0); 1873 rfs4_deleg_disabled--; 1874 mutex_exit(&rfs4_deleg_lock); 1875 } 1876 1877 void 1878 rfs4_mon_hold(void *arg) 1879 { 1880 rfs4_file_t *fp = arg; 1881 1882 rfs4_dbe_hold(fp->dbe); 1883 } 1884 1885 void 1886 rfs4_mon_rele(void *arg) 1887 { 1888 rfs4_file_t *fp = arg; 1889 1890 rfs4_dbe_rele_nolock(fp->dbe); 1891 } 1892