1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <rpc/auth.h> 28 #include <rpc/clnt.h> 29 #include <nfs/nfs4_kprot.h> 30 #include <nfs/nfs4.h> 31 #include <nfs/lm.h> 32 #include <sys/cmn_err.h> 33 #include <sys/disp.h> 34 #include <sys/sdt.h> 35 36 #include <sys/pathname.h> 37 38 #include <sys/strsubr.h> 39 #include <sys/ddi.h> 40 41 #include <sys/vnode.h> 42 #include <sys/sdt.h> 43 #include <inet/common.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 47 #define MAX_READ_DELEGATIONS 5 48 49 krwlock_t rfs4_deleg_policy_lock; 50 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 51 static int rfs4_deleg_wlp = 5; 52 kmutex_t rfs4_deleg_lock; 53 static int rfs4_deleg_disabled; 54 static int rfs4_max_setup_cb_tries = 5; 55 56 #ifdef DEBUG 57 58 static int rfs4_test_cbgetattr_fail = 0; 59 int rfs4_cb_null; 60 int rfs4_cb_debug; 61 int rfs4_deleg_debug; 62 63 #endif 64 65 static void rfs4_recall_file(rfs4_file_t *, 66 void (*recall)(rfs4_deleg_state_t *, bool_t), 67 bool_t, rfs4_client_t *); 68 static void rfs4_revoke_file(rfs4_file_t *); 69 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 70 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 71 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 73 open_delegation_type4, int *); 74 75 /* 76 * Convert a universal address to an transport specific 77 * address using inet_pton. 78 */ 79 static int 80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 81 { 82 int dots = 0, i, j, len, k; 83 unsigned char c; 84 in_port_t port = 0; 85 86 len = strlen(ua); 87 88 for (i = len-1; i >= 0; i--) { 89 90 if (ua[i] == '.') 91 dots++; 92 93 if (dots == 2) { 94 95 ua[i] = '\0'; 96 /* 97 * We use k to remember were to stick '.' back, since 98 * ua was kmem_allocateded from the pool len+1. 99 */ 100 k = i; 101 if (inet_pton(af, ua, ap) == 1) { 102 103 c = 0; 104 105 for (j = i+1; j < len; j++) { 106 if (ua[j] == '.') { 107 port = c << 8; 108 c = 0; 109 } else if (ua[j] >= '0' && 110 ua[j] <= '9') { 111 c *= 10; 112 c += ua[j] - '0'; 113 } else { 114 ua[k] = '.'; 115 return (EINVAL); 116 } 117 } 118 port += c; 119 120 121 /* reset to network order */ 122 if (af == AF_INET) { 123 *(uint32_t *)ap = 124 htonl(*(uint32_t *)ap); 125 *pp = htons(port); 126 } else { 127 int ix; 128 uint16_t *sap; 129 130 for (sap = ap, ix = 0; ix < 131 sizeof (struct in6_addr) / 132 sizeof (uint16_t); ix++) 133 sap[ix] = htons(sap[ix]); 134 135 *pp = htons(port); 136 } 137 138 ua[k] = '.'; 139 return (0); 140 } else { 141 ua[k] = '.'; 142 return (EINVAL); 143 } 144 } 145 } 146 147 return (EINVAL); 148 } 149 150 /* 151 * Update the delegation policy with the 152 * value of "new_policy" 153 */ 154 void 155 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 156 { 157 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 158 rfs4_deleg_policy = new_policy; 159 rw_exit(&rfs4_deleg_policy_lock); 160 } 161 162 void 163 rfs4_hold_deleg_policy(void) 164 { 165 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 166 } 167 168 void 169 rfs4_rele_deleg_policy(void) 170 { 171 rw_exit(&rfs4_deleg_policy_lock); 172 } 173 174 175 /* 176 * This free function is to be used when the client struct is being 177 * released and nothing at all is needed of the callback info any 178 * longer. 179 */ 180 void 181 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 182 { 183 char *addr = cbp->cb_callback.cb_location.r_addr; 184 char *netid = cbp->cb_callback.cb_location.r_netid; 185 186 /* Free old address if any */ 187 188 if (addr) 189 kmem_free(addr, strlen(addr) + 1); 190 if (netid) 191 kmem_free(netid, strlen(netid) + 1); 192 193 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 194 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 195 196 if (addr) 197 kmem_free(addr, strlen(addr) + 1); 198 if (netid) 199 kmem_free(netid, strlen(netid) + 1); 200 201 if (cbp->cb_chc_free) { 202 rfs4_cb_chflush(cbp); 203 } 204 } 205 206 /* 207 * The server uses this to check the callback path supplied by the 208 * client. The callback connection is marked "in progress" while this 209 * work is going on and then eventually marked either OK or FAILED. 210 * This work can be done as part of a separate thread and at the end 211 * of this the thread will exit or it may be done such that the caller 212 * will continue with other work. 213 */ 214 static void 215 rfs4_do_cb_null(rfs4_client_t *cp) 216 { 217 struct timeval tv; 218 CLIENT *ch; 219 rfs4_cbstate_t newstate; 220 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 221 222 mutex_enter(cbp->cb_lock); 223 /* If another thread is doing CB_NULL RPC then return */ 224 if (cbp->cb_nullcaller == TRUE) { 225 mutex_exit(cbp->cb_lock); 226 rfs4_client_rele(cp); 227 return; 228 } 229 230 /* Mark the cbinfo as having a thread in the NULL callback */ 231 cbp->cb_nullcaller = TRUE; 232 233 /* 234 * Are there other threads still using the cbinfo client 235 * handles? If so, this thread must wait before going and 236 * mucking aroiund with the callback information 237 */ 238 while (cbp->cb_refcnt != 0) 239 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 240 241 /* 242 * This thread itself may find that new callback info has 243 * arrived and is set up to handle this case and redrive the 244 * call to the client's callback server. 245 */ 246 retry: 247 if (cbp->cb_newer.cb_new == TRUE && 248 cbp->cb_newer.cb_confirmed == TRUE) { 249 char *addr = cbp->cb_callback.cb_location.r_addr; 250 char *netid = cbp->cb_callback.cb_location.r_netid; 251 252 /* 253 * Free the old stuff if it exists; may be the first 254 * time through this path 255 */ 256 if (addr) 257 kmem_free(addr, strlen(addr) + 1); 258 if (netid) 259 kmem_free(netid, strlen(netid) + 1); 260 261 /* Move over the addr/netid */ 262 cbp->cb_callback.cb_location.r_addr = 263 cbp->cb_newer.cb_callback.cb_location.r_addr; 264 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 265 cbp->cb_callback.cb_location.r_netid = 266 cbp->cb_newer.cb_callback.cb_location.r_netid; 267 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 268 269 /* Get the program number */ 270 cbp->cb_callback.cb_program = 271 cbp->cb_newer.cb_callback.cb_program; 272 cbp->cb_newer.cb_callback.cb_program = 0; 273 274 /* Don't forget the protocol's "cb_ident" field */ 275 cbp->cb_ident = cbp->cb_newer.cb_ident; 276 cbp->cb_newer.cb_ident = 0; 277 278 /* no longer new */ 279 cbp->cb_newer.cb_new = FALSE; 280 cbp->cb_newer.cb_confirmed = FALSE; 281 282 /* get rid of the old client handles that may exist */ 283 rfs4_cb_chflush(cbp); 284 285 cbp->cb_state = CB_NONE; 286 cbp->cb_timefailed = 0; /* reset the clock */ 287 cbp->cb_notified_of_cb_path_down = TRUE; 288 } 289 290 if (cbp->cb_state != CB_NONE) { 291 cv_broadcast(cbp->cb_cv); /* let the others know */ 292 cbp->cb_nullcaller = FALSE; 293 mutex_exit(cbp->cb_lock); 294 rfs4_client_rele(cp); 295 return; 296 } 297 298 /* mark rfs4_client_t as CALLBACK NULL in progress */ 299 cbp->cb_state = CB_INPROG; 300 mutex_exit(cbp->cb_lock); 301 302 /* get/generate a client handle */ 303 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 304 mutex_enter(cbp->cb_lock); 305 cbp->cb_state = CB_BAD; 306 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 307 goto retry; 308 } 309 310 311 tv.tv_sec = 30; 312 tv.tv_usec = 0; 313 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 314 newstate = CB_BAD; 315 } else { 316 newstate = CB_OK; 317 #ifdef DEBUG 318 rfs4_cb_null++; 319 #endif 320 } 321 322 /* Check to see if the client has specified new callback info */ 323 mutex_enter(cbp->cb_lock); 324 rfs4_cb_freech(cbp, ch, TRUE); 325 if (cbp->cb_newer.cb_new == TRUE && 326 cbp->cb_newer.cb_confirmed == TRUE) { 327 goto retry; /* give the CB_NULL another chance */ 328 } 329 330 cbp->cb_state = newstate; 331 if (cbp->cb_state == CB_BAD) 332 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 333 334 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 335 cbp->cb_nullcaller = FALSE; 336 mutex_exit(cbp->cb_lock); 337 338 rfs4_client_rele(cp); 339 } 340 341 /* 342 * Given a client struct, inspect the callback info to see if the 343 * callback path is up and available. 344 * 345 * If new callback path is available and no one has set it up then 346 * try to set it up. If setup is not successful after 5 tries (5 secs) 347 * then gives up and returns NULL. 348 * 349 * If callback path is being initialized, then wait for the CB_NULL RPC 350 * call to occur. 351 */ 352 static rfs4_cbinfo_t * 353 rfs4_cbinfo_hold(rfs4_client_t *cp) 354 { 355 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 356 int retries = 0; 357 358 mutex_enter(cbp->cb_lock); 359 360 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 361 /* 362 * Looks like a new callback path may be available and 363 * noone has set it up. 364 */ 365 mutex_exit(cbp->cb_lock); 366 rfs4_dbe_hold(cp->rc_dbe); 367 rfs4_do_cb_null(cp); /* caller will release client hold */ 368 369 mutex_enter(cbp->cb_lock); 370 /* 371 * If callback path is no longer new, or it's being setup 372 * then stop and wait for it to be done. 373 */ 374 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 375 break; 376 mutex_exit(cbp->cb_lock); 377 378 if (++retries >= rfs4_max_setup_cb_tries) 379 return (NULL); 380 delay(hz); 381 mutex_enter(cbp->cb_lock); 382 } 383 384 /* Is there a thread working on doing the CB_NULL RPC? */ 385 if (cbp->cb_nullcaller == TRUE) 386 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 387 388 /* If the callback path is not okay (up and running), just quit */ 389 if (cbp->cb_state != CB_OK) { 390 mutex_exit(cbp->cb_lock); 391 return (NULL); 392 } 393 394 /* Let someone know we are using the current callback info */ 395 cbp->cb_refcnt++; 396 mutex_exit(cbp->cb_lock); 397 return (cbp); 398 } 399 400 /* 401 * The caller is done with the callback info. It may be that the 402 * caller's RPC failed and the NFSv4 client has actually provided new 403 * callback information. If so, let the caller know so they can 404 * advantage of this and maybe retry the RPC that originally failed. 405 */ 406 static int 407 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 408 { 409 int cb_new = FALSE; 410 411 mutex_enter(cbp->cb_lock); 412 413 /* The caller gets a chance to mark the callback info as bad */ 414 if (newstate != CB_NOCHANGE) 415 cbp->cb_state = newstate; 416 if (newstate == CB_FAILED) { 417 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 418 cbp->cb_notified_of_cb_path_down = FALSE; 419 } 420 421 cbp->cb_refcnt--; /* no longer using the information */ 422 423 /* 424 * A thread may be waiting on this one to finish and if so, 425 * let it know that it is okay to do the CB_NULL to the 426 * client's callback server. 427 */ 428 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 429 cv_broadcast(cbp->cb_cv_nullcaller); 430 431 /* 432 * If this is the last thread to use the callback info and 433 * there is new callback information to try and no thread is 434 * there ready to do the CB_NULL, then return true to teh 435 * caller so they can do the CB_NULL 436 */ 437 if (cbp->cb_refcnt == 0 && 438 cbp->cb_nullcaller == FALSE && 439 cbp->cb_newer.cb_new == TRUE && 440 cbp->cb_newer.cb_confirmed == TRUE) 441 cb_new = TRUE; 442 443 mutex_exit(cbp->cb_lock); 444 445 return (cb_new); 446 } 447 448 /* 449 * Given the information in the callback info struct, create a client 450 * handle that can be used by the server for its callback path. 451 */ 452 static CLIENT * 453 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 454 { 455 struct knetconfig knc; 456 vnode_t *vp; 457 struct sockaddr_in addr4; 458 struct sockaddr_in6 addr6; 459 void *addr, *taddr; 460 in_port_t *pp; 461 int af; 462 char *devnam; 463 struct netbuf nb; 464 int size; 465 CLIENT *ch = NULL; 466 int useresvport = 0; 467 468 mutex_enter(cbp->cb_lock); 469 470 if (cbp->cb_callback.cb_location.r_netid == NULL || 471 cbp->cb_callback.cb_location.r_addr == NULL) { 472 goto cb_init_out; 473 } 474 475 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 476 knc.knc_semantics = NC_TPI_COTS; 477 knc.knc_protofmly = "inet"; 478 knc.knc_proto = "tcp"; 479 devnam = "/dev/tcp"; 480 af = AF_INET; 481 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 482 == 0) { 483 knc.knc_semantics = NC_TPI_CLTS; 484 knc.knc_protofmly = "inet"; 485 knc.knc_proto = "udp"; 486 devnam = "/dev/udp"; 487 af = AF_INET; 488 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 489 == 0) { 490 knc.knc_semantics = NC_TPI_COTS; 491 knc.knc_protofmly = "inet6"; 492 knc.knc_proto = "tcp"; 493 devnam = "/dev/tcp6"; 494 af = AF_INET6; 495 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 496 == 0) { 497 knc.knc_semantics = NC_TPI_CLTS; 498 knc.knc_protofmly = "inet6"; 499 knc.knc_proto = "udp"; 500 devnam = "/dev/udp6"; 501 af = AF_INET6; 502 } else { 503 goto cb_init_out; 504 } 505 506 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 507 508 goto cb_init_out; 509 } 510 511 if (vp->v_type != VCHR) { 512 VN_RELE(vp); 513 goto cb_init_out; 514 } 515 516 knc.knc_rdev = vp->v_rdev; 517 518 VN_RELE(vp); 519 520 if (af == AF_INET) { 521 size = sizeof (addr4); 522 bzero(&addr4, size); 523 addr4.sin_family = (sa_family_t)af; 524 addr = &addr4.sin_addr; 525 pp = &addr4.sin_port; 526 taddr = &addr4; 527 } else /* AF_INET6 */ { 528 size = sizeof (addr6); 529 bzero(&addr6, size); 530 addr6.sin6_family = (sa_family_t)af; 531 addr = &addr6.sin6_addr; 532 pp = &addr6.sin6_port; 533 taddr = &addr6; 534 } 535 536 if (uaddr2sockaddr(af, 537 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 538 539 goto cb_init_out; 540 } 541 542 543 nb.maxlen = nb.len = size; 544 nb.buf = (char *)taddr; 545 546 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 547 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 548 549 ch = NULL; 550 } 551 552 /* turn off reserved port usage */ 553 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 554 555 cb_init_out: 556 mutex_exit(cbp->cb_lock); 557 return (ch); 558 } 559 560 /* 561 * Iterate over the client handle cache and 562 * destroy it. 563 */ 564 static void 565 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 566 { 567 CLIENT *ch; 568 569 while (cbp->cb_chc_free) { 570 cbp->cb_chc_free--; 571 ch = cbp->cb_chc[cbp->cb_chc_free]; 572 cbp->cb_chc[cbp->cb_chc_free] = NULL; 573 if (ch) { 574 if (ch->cl_auth) 575 auth_destroy(ch->cl_auth); 576 clnt_destroy(ch); 577 } 578 } 579 } 580 581 /* 582 * Return a client handle, either from a the small 583 * rfs4_client_t cache or one that we just created. 584 */ 585 static CLIENT * 586 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 587 { 588 CLIENT *cbch = NULL; 589 uint32_t zilch = 0; 590 591 mutex_enter(cbp->cb_lock); 592 593 if (cbp->cb_chc_free) { 594 cbp->cb_chc_free--; 595 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 596 mutex_exit(cbp->cb_lock); 597 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 598 return (cbch); 599 } 600 601 mutex_exit(cbp->cb_lock); 602 603 /* none free so make it now */ 604 cbch = rfs4_cbch_init(cbp); 605 606 return (cbch); 607 } 608 609 /* 610 * Return the client handle to the small cache or 611 * destroy it. 612 */ 613 static void 614 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 615 { 616 if (lockheld == FALSE) 617 mutex_enter(cbp->cb_lock); 618 619 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 620 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 621 if (lockheld == FALSE) 622 mutex_exit(cbp->cb_lock); 623 return; 624 } 625 if (lockheld == FALSE) 626 mutex_exit(cbp->cb_lock); 627 628 /* 629 * cache maxed out of free entries, obliterate 630 * this client handle, destroy it, throw it away. 631 */ 632 if (ch->cl_auth) 633 auth_destroy(ch->cl_auth); 634 clnt_destroy(ch); 635 } 636 637 /* 638 * With the supplied callback information - initialize the client 639 * callback data. If there is a callback in progress, save the 640 * callback info so that a thread can pick it up in the future. 641 */ 642 void 643 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 644 { 645 char *addr = NULL; 646 char *netid = NULL; 647 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 648 size_t len; 649 650 /* Set the call back for the client */ 651 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 652 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 653 len = strlen(cb->cb_location.r_addr) + 1; 654 addr = kmem_alloc(len, KM_SLEEP); 655 bcopy(cb->cb_location.r_addr, addr, len); 656 len = strlen(cb->cb_location.r_netid) + 1; 657 netid = kmem_alloc(len, KM_SLEEP); 658 bcopy(cb->cb_location.r_netid, netid, len); 659 } 660 /* ready to save the new information but first free old, if exists */ 661 mutex_enter(cbp->cb_lock); 662 663 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 664 665 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 666 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 667 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 668 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 669 670 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 671 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 672 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 673 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 674 675 cbp->cb_newer.cb_ident = cb_ident; 676 677 if (addr && *addr && netid && *netid) { 678 cbp->cb_newer.cb_new = TRUE; 679 cbp->cb_newer.cb_confirmed = FALSE; 680 } else { 681 cbp->cb_newer.cb_new = FALSE; 682 cbp->cb_newer.cb_confirmed = FALSE; 683 } 684 685 mutex_exit(cbp->cb_lock); 686 } 687 688 /* 689 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 690 * information may have been provided on SETCLIENTID and this call 691 * marks that information as confirmed and then starts a thread to 692 * test the callback path. 693 */ 694 void 695 rfs4_deleg_cb_check(rfs4_client_t *cp) 696 { 697 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE) 698 return; 699 700 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE; 701 702 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ 703 704 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 705 minclsyspri); 706 } 707 708 static void 709 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 710 { 711 CB_RECALL4args *rec_argp; 712 713 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 714 if (rec_argp->fh.nfs_fh4_val) 715 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 716 } 717 718 /* ARGSUSED */ 719 static void 720 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 721 { 722 CB_GETATTR4args *argp; 723 724 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 725 if (argp->fh.nfs_fh4_val) 726 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 727 } 728 729 static void 730 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 731 { 732 int i, arglen; 733 nfs_cb_argop4 *argop; 734 735 /* 736 * First free any special args alloc'd for specific ops. 737 */ 738 arglen = args->array_len; 739 argop = args->array; 740 for (i = 0; i < arglen; i++, argop++) { 741 742 switch (argop->argop) { 743 case OP_CB_RECALL: 744 rfs4args_cb_recall_free(argop); 745 break; 746 747 case OP_CB_GETATTR: 748 rfs4args_cb_getattr_free(argop); 749 break; 750 751 default: 752 return; 753 } 754 } 755 756 if (args->tag.utf8string_len > 0) 757 UTF8STRING_FREE(args->tag) 758 759 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 760 if (resp) 761 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 762 } 763 764 /* 765 * General callback routine for the server to the client. 766 */ 767 static enum clnt_stat 768 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 769 CB_COMPOUND4res *res, struct timeval timeout) 770 { 771 rfs4_cbinfo_t *cbp; 772 CLIENT *ch; 773 /* start with this in case cb_getch() fails */ 774 enum clnt_stat stat = RPC_FAILED; 775 776 res->tag.utf8string_val = NULL; 777 res->array = NULL; 778 779 retry: 780 cbp = rfs4_cbinfo_hold(cp); 781 if (cbp == NULL) 782 return (stat); 783 784 /* get a client handle */ 785 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 786 /* 787 * reset the cb_ident since it may have changed in 788 * rfs4_cbinfo_hold() 789 */ 790 args->callback_ident = cbp->cb_ident; 791 792 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 793 (caddr_t)args, xdr_CB_COMPOUND4res, 794 (caddr_t)res, timeout); 795 796 /* free client handle */ 797 rfs4_cb_freech(cbp, ch, FALSE); 798 } 799 800 /* 801 * If the rele says that there may be new callback info then 802 * retry this sequence and it may succeed as a result of the 803 * new callback path 804 */ 805 if (rfs4_cbinfo_rele(cbp, 806 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 807 goto retry; 808 809 return (stat); 810 } 811 812 /* 813 * Used by the NFSv4 server to get attributes for a file while 814 * handling the case where a file has been write delegated. For the 815 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 816 * not undertaken. This call site is maintained in case the server is 817 * updated in the future to handle write delegation space guarantees. 818 */ 819 nfsstat4 820 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 821 { 822 823 int error; 824 825 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 826 return (puterrno4(error)); 827 } 828 829 /* 830 * This is used everywhere in the v2/v3 server to allow the 831 * integration of all NFS versions and the support of delegation. For 832 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 833 * in the future to provide space guarantees for write delegations 834 * then this call site should be expanded to interact with the client. 835 */ 836 int 837 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 838 { 839 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 840 } 841 842 /* 843 * Place the actual cb_recall otw call to client. 844 */ 845 static void 846 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 847 { 848 CB_COMPOUND4args cb4_args; 849 CB_COMPOUND4res cb4_res; 850 CB_RECALL4args *rec_argp; 851 CB_RECALL4res *rec_resp; 852 nfs_cb_argop4 *argop; 853 int numops; 854 int argoplist_size; 855 struct timeval timeout; 856 nfs_fh4 *fhp; 857 enum clnt_stat call_stat; 858 859 /* 860 * set up the compound args 861 */ 862 numops = 1; /* CB_RECALL only */ 863 864 argoplist_size = numops * sizeof (nfs_cb_argop4); 865 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 866 argop->argop = OP_CB_RECALL; 867 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 868 869 (void) str_to_utf8("cb_recall", &cb4_args.tag); 870 cb4_args.minorversion = CB4_MINORVERSION; 871 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 872 cb4_args.array_len = numops; 873 cb4_args.array = argop; 874 875 /* 876 * fill in the args struct 877 */ 878 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 879 rec_argp->truncate = trunc; 880 881 fhp = &dsp->rds_finfo->rf_filehandle; 882 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 883 fhp->nfs_fh4_len, KM_SLEEP); 884 nfs_fh4_copy(fhp, &rec_argp->fh); 885 886 /* Keep track of when we did this for observability */ 887 dsp->rds_time_recalled = gethrestime_sec(); 888 889 /* 890 * Set up the timeout for the callback and make the actual call. 891 * Timeout will be 80% of the lease period for this server. 892 */ 893 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 894 timeout.tv_usec = 0; 895 896 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client, 897 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 898 899 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res, 900 timeout); 901 902 rec_resp = (cb4_res.array_len == 0) ? NULL : 903 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 904 905 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client, 906 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 907 908 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 909 rfs4_return_deleg(dsp, TRUE); 910 } 911 912 rfs4freeargres(&cb4_args, &cb4_res); 913 } 914 915 struct recall_arg { 916 rfs4_deleg_state_t *dsp; 917 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 918 bool_t trunc; 919 }; 920 921 static void 922 do_recall(struct recall_arg *arg) 923 { 924 rfs4_deleg_state_t *dsp = arg->dsp; 925 rfs4_file_t *fp = dsp->rds_finfo; 926 callb_cpr_t cpr_info; 927 kmutex_t cpr_lock; 928 929 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 930 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 931 932 /* 933 * It is possible that before this thread starts 934 * the client has send us a return_delegation, and 935 * if that is the case we do not need to send the 936 * recall callback. 937 */ 938 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) { 939 DTRACE_PROBE3(nfss__i__recall, 940 struct recall_arg *, arg, 941 struct rfs4_deleg_state_t *, dsp, 942 struct rfs4_file_t *, fp); 943 944 if (arg->recall) 945 (void) (*arg->recall)(dsp, arg->trunc); 946 } 947 948 mutex_enter(fp->rf_dinfo.rd_recall_lock); 949 /* 950 * Recall count may go negative if the parent thread that is 951 * creating the individual callback threads does not modify 952 * the recall_count field before the callback thread actually 953 * gets a response from the CB_RECALL 954 */ 955 fp->rf_dinfo.rd_recall_count--; 956 if (fp->rf_dinfo.rd_recall_count == 0) 957 cv_signal(fp->rf_dinfo.rd_recall_cv); 958 mutex_exit(fp->rf_dinfo.rd_recall_lock); 959 960 mutex_enter(&cpr_lock); 961 CALLB_CPR_EXIT(&cpr_info); 962 mutex_destroy(&cpr_lock); 963 964 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 965 966 kmem_free(arg, sizeof (struct recall_arg)); 967 } 968 969 struct master_recall_args { 970 rfs4_file_t *fp; 971 void (*recall)(rfs4_deleg_state_t *, bool_t); 972 bool_t trunc; 973 }; 974 975 static void 976 do_recall_file(struct master_recall_args *map) 977 { 978 rfs4_file_t *fp = map->fp; 979 rfs4_deleg_state_t *dsp; 980 struct recall_arg *arg; 981 callb_cpr_t cpr_info; 982 kmutex_t cpr_lock; 983 int32_t recall_count; 984 985 rfs4_dbe_lock(fp->rf_dbe); 986 987 /* Recall already in progress ? */ 988 mutex_enter(fp->rf_dinfo.rd_recall_lock); 989 if (fp->rf_dinfo.rd_recall_count != 0) { 990 mutex_exit(fp->rf_dinfo.rd_recall_lock); 991 rfs4_dbe_rele_nolock(fp->rf_dbe); 992 rfs4_dbe_unlock(fp->rf_dbe); 993 kmem_free(map, sizeof (struct master_recall_args)); 994 return; 995 } 996 997 mutex_exit(fp->rf_dinfo.rd_recall_lock); 998 999 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1000 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 1001 1002 recall_count = 0; 1003 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1004 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1005 1006 rfs4_dbe_lock(dsp->rds_dbe); 1007 /* 1008 * if this delegation state 1009 * is being reaped skip it 1010 */ 1011 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) { 1012 rfs4_dbe_unlock(dsp->rds_dbe); 1013 continue; 1014 } 1015 1016 /* hold for receiving thread */ 1017 rfs4_dbe_hold(dsp->rds_dbe); 1018 rfs4_dbe_unlock(dsp->rds_dbe); 1019 1020 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1021 arg->recall = map->recall; 1022 arg->trunc = map->trunc; 1023 arg->dsp = dsp; 1024 1025 recall_count++; 1026 1027 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1028 minclsyspri); 1029 } 1030 1031 rfs4_dbe_unlock(fp->rf_dbe); 1032 1033 mutex_enter(fp->rf_dinfo.rd_recall_lock); 1034 /* 1035 * Recall count may go negative if the parent thread that is 1036 * creating the individual callback threads does not modify 1037 * the recall_count field before the callback thread actually 1038 * gets a response from the CB_RECALL 1039 */ 1040 fp->rf_dinfo.rd_recall_count += recall_count; 1041 while (fp->rf_dinfo.rd_recall_count) 1042 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock); 1043 1044 mutex_exit(fp->rf_dinfo.rd_recall_lock); 1045 1046 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1047 rfs4_file_rele(fp); 1048 kmem_free(map, sizeof (struct master_recall_args)); 1049 mutex_enter(&cpr_lock); 1050 CALLB_CPR_EXIT(&cpr_info); 1051 mutex_destroy(&cpr_lock); 1052 } 1053 1054 static void 1055 rfs4_recall_file(rfs4_file_t *fp, 1056 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1057 bool_t trunc, rfs4_client_t *cp) 1058 { 1059 struct master_recall_args *args; 1060 1061 rfs4_dbe_lock(fp->rf_dbe); 1062 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1063 rfs4_dbe_unlock(fp->rf_dbe); 1064 return; 1065 } 1066 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */ 1067 1068 /* 1069 * Mark the time we started the recall processing. 1070 * If it has been previously recalled, do not reset the 1071 * timer since this is used for the revocation decision. 1072 */ 1073 if (fp->rf_dinfo.rd_time_recalled == 0) 1074 fp->rf_dinfo.rd_time_recalled = gethrestime_sec(); 1075 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */ 1076 /* Client causing recall not always available */ 1077 if (cp) 1078 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid; 1079 1080 rfs4_dbe_unlock(fp->rf_dbe); 1081 1082 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1083 args->fp = fp; 1084 args->recall = recall; 1085 args->trunc = trunc; 1086 1087 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1088 minclsyspri); 1089 } 1090 1091 void 1092 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1093 { 1094 time_t elapsed1, elapsed2; 1095 1096 if (fp->rf_dinfo.rd_time_recalled != 0) { 1097 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled; 1098 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite; 1099 /* First check to see if a revocation should occur */ 1100 if (elapsed1 > rfs4_lease_time && 1101 elapsed2 > rfs4_lease_time) { 1102 rfs4_revoke_file(fp); 1103 return; 1104 } 1105 /* 1106 * Next check to see if a recall should be done again 1107 * so quickly. 1108 */ 1109 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1110 return; 1111 } 1112 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1113 } 1114 1115 /* 1116 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1117 * open conflicts with the delegation. 1118 * Return true if we need recall otherwise false. 1119 * Assumes entry locks for sp and sp->rs_finfo are held. 1120 */ 1121 bool_t 1122 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1123 { 1124 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype; 1125 1126 switch (dtype) { 1127 case OPEN_DELEGATE_NONE: 1128 /* Not currently delegated so there is nothing to do */ 1129 return (FALSE); 1130 case OPEN_DELEGATE_READ: 1131 /* 1132 * If the access is only asking for READ then there is 1133 * no conflict and nothing to do. If it is asking 1134 * for write, then there will be conflict and the read 1135 * delegation should be recalled. 1136 */ 1137 if (access == OPEN4_SHARE_ACCESS_READ) 1138 return (FALSE); 1139 else 1140 return (TRUE); 1141 case OPEN_DELEGATE_WRITE: 1142 /* Check to see if this client has the delegation */ 1143 return (rfs4_is_deleg(sp)); 1144 } 1145 1146 return (FALSE); 1147 } 1148 1149 /* 1150 * Return the "best" allowable delegation available given the current 1151 * delegation type and the desired access and deny modes on the file. 1152 * At the point that this routine is called we know that the access and 1153 * deny modes are consistent with the file modes. 1154 */ 1155 static open_delegation_type4 1156 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1157 { 1158 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype; 1159 uint32_t access = sp->rs_share_access; 1160 uint32_t deny = sp->rs_share_deny; 1161 int readcnt = 0; 1162 int writecnt = 0; 1163 1164 switch (dtype) { 1165 case OPEN_DELEGATE_NONE: 1166 /* 1167 * Determine if more than just this OPEN have the file 1168 * open and if so, no delegation may be provided to 1169 * the client. 1170 */ 1171 if (access & OPEN4_SHARE_ACCESS_WRITE) 1172 writecnt++; 1173 if (access & OPEN4_SHARE_ACCESS_READ) 1174 readcnt++; 1175 1176 if (fp->rf_access_read > readcnt || 1177 fp->rf_access_write > writecnt) 1178 return (OPEN_DELEGATE_NONE); 1179 1180 /* 1181 * If the client is going to write, or if the client 1182 * has exclusive access, return a write delegation. 1183 */ 1184 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1185 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1186 return (OPEN_DELEGATE_WRITE); 1187 /* 1188 * If we don't want to write or we've haven't denied read 1189 * access to others, return a read delegation. 1190 */ 1191 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1192 (deny & ~OPEN4_SHARE_DENY_READ)) 1193 return (OPEN_DELEGATE_READ); 1194 1195 /* Shouldn't get here */ 1196 return (OPEN_DELEGATE_NONE); 1197 1198 case OPEN_DELEGATE_READ: 1199 /* 1200 * If the file is delegated for read but we wan't to 1201 * write or deny others to read then we can't delegate 1202 * the file. We shouldn't get here since the delegation should 1203 * have been recalled already. 1204 */ 1205 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1206 (deny & OPEN4_SHARE_DENY_READ)) 1207 return (OPEN_DELEGATE_NONE); 1208 return (OPEN_DELEGATE_READ); 1209 1210 case OPEN_DELEGATE_WRITE: 1211 return (OPEN_DELEGATE_WRITE); 1212 } 1213 1214 /* Shouldn't get here */ 1215 return (OPEN_DELEGATE_NONE); 1216 } 1217 1218 /* 1219 * Given the desired delegation type and the "history" of the file 1220 * determine the actual delegation type to return. 1221 */ 1222 static open_delegation_type4 1223 rfs4_delegation_policy(open_delegation_type4 dtype, 1224 rfs4_dinfo_t *dinfo, clientid4 cid) 1225 { 1226 time_t elapsed; 1227 1228 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1229 return (OPEN_DELEGATE_NONE); 1230 1231 /* 1232 * Has this file/delegation ever been recalled? If not then 1233 * no further checks for a delegation race need to be done. 1234 * However if a recall has occurred, then check to see if a 1235 * client has caused its own delegation recall to occur. If 1236 * not, then has a delegation for this file been returned 1237 * recently? If so, then do not assign a new delegation to 1238 * avoid a "delegation race" between the original client and 1239 * the new/conflicting client. 1240 */ 1241 if (dinfo->rd_ever_recalled == TRUE) { 1242 if (dinfo->rd_conflicted_client != cid) { 1243 elapsed = gethrestime_sec() - dinfo->rd_time_returned; 1244 if (elapsed < rfs4_lease_time) 1245 return (OPEN_DELEGATE_NONE); 1246 } 1247 } 1248 1249 /* Limit the number of read grants */ 1250 if (dtype == OPEN_DELEGATE_READ && 1251 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS) 1252 return (OPEN_DELEGATE_NONE); 1253 1254 /* 1255 * Should consider limiting total number of read/write 1256 * delegations the server will permit. 1257 */ 1258 1259 return (dtype); 1260 } 1261 1262 /* 1263 * Try and grant a delegation for an open give the state. The routine 1264 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1265 * 1266 * The state and associate file entry must be locked 1267 */ 1268 rfs4_deleg_state_t * 1269 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1270 { 1271 rfs4_file_t *fp = sp->rs_finfo; 1272 open_delegation_type4 dtype; 1273 int no_delegation; 1274 1275 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1276 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1277 1278 /* Is the server even providing delegations? */ 1279 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1280 return (NULL); 1281 1282 /* Check to see if delegations have been temporarily disabled */ 1283 mutex_enter(&rfs4_deleg_lock); 1284 no_delegation = rfs4_deleg_disabled; 1285 mutex_exit(&rfs4_deleg_lock); 1286 1287 if (no_delegation) 1288 return (NULL); 1289 1290 /* Don't grant a delegation if a deletion is impending. */ 1291 if (fp->rf_dinfo.rd_hold_grant > 0) { 1292 return (NULL); 1293 } 1294 1295 /* 1296 * Don't grant a delegation if there are any lock manager 1297 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1298 * if there are only read locks we should be able to grant a 1299 * read-only delegation), but it's good enough for now. 1300 * 1301 * MT safety: the lock manager checks for conflicting delegations 1302 * before processing a lock request. That check will block until 1303 * we are done here. So if the lock manager acquires a lock after 1304 * we decide to grant the delegation, the delegation will get 1305 * immediately recalled (if there's a conflict), so we're safe. 1306 */ 1307 if (lm_vp_active(fp->rf_vp)) { 1308 return (NULL); 1309 } 1310 1311 /* 1312 * Based on the type of delegation request passed in, take the 1313 * appropriate action (DELEG_NONE is handled above) 1314 */ 1315 switch (dreq) { 1316 1317 case DELEG_READ: 1318 case DELEG_WRITE: 1319 /* 1320 * The server "must" grant the delegation in this case. 1321 * Client is using open previous 1322 */ 1323 dtype = (open_delegation_type4)dreq; 1324 *recall = 1; 1325 break; 1326 case DELEG_ANY: 1327 /* 1328 * If a valid callback path does not exist, no delegation may 1329 * be granted. 1330 */ 1331 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK) 1332 return (NULL); 1333 1334 /* 1335 * If the original operation which caused time_rm_delayed 1336 * to be set hasn't been retried and completed for one 1337 * full lease period, clear it and allow delegations to 1338 * get granted again. 1339 */ 1340 if (fp->rf_dinfo.rd_time_rm_delayed > 0 && 1341 gethrestime_sec() > 1342 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time) 1343 fp->rf_dinfo.rd_time_rm_delayed = 0; 1344 1345 /* 1346 * If we are waiting for a delegation to be returned then 1347 * don't delegate this file. We do this for correctness as 1348 * well as if the file is being recalled we would likely 1349 * recall this file again. 1350 */ 1351 1352 if (fp->rf_dinfo.rd_time_recalled != 0 || 1353 fp->rf_dinfo.rd_time_rm_delayed != 0) 1354 return (NULL); 1355 1356 /* Get the "best" delegation candidate */ 1357 dtype = rfs4_check_delegation(sp, fp); 1358 1359 if (dtype == OPEN_DELEGATE_NONE) 1360 return (NULL); 1361 1362 /* 1363 * Based on policy and the history of the file get the 1364 * actual delegation. 1365 */ 1366 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo, 1367 sp->rs_owner->ro_client->rc_clientid); 1368 1369 if (dtype == OPEN_DELEGATE_NONE) 1370 return (NULL); 1371 break; 1372 default: 1373 return (NULL); 1374 } 1375 1376 /* set the delegation for the state */ 1377 return (rfs4_deleg_state(sp, dtype, recall)); 1378 } 1379 1380 void 1381 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1382 nfsace4 *ace, int recall) 1383 { 1384 open_write_delegation4 *wp; 1385 open_read_delegation4 *rp; 1386 nfs_space_limit4 *spl; 1387 nfsace4 nace; 1388 1389 /* 1390 * We need to allocate a new copy of the who string. 1391 * this string will be freed by the rfs4_op_open dis_resfree 1392 * routine. We need to do this allocation since replays will 1393 * be allocated and rfs4_compound can't tell the difference from 1394 * a replay and an inital open. N.B. if an ace is passed in, it 1395 * the caller's responsibility to free it. 1396 */ 1397 1398 if (ace == NULL) { 1399 /* 1400 * Default is to deny all access, the client will have 1401 * to contact the server. XXX Do we want to actually 1402 * set a deny for every one, or do we simply want to 1403 * construct an entity that will match no one? 1404 */ 1405 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1406 nace.flag = 0; 1407 nace.access_mask = ACE4_VALID_MASK_BITS; 1408 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1409 } else { 1410 nace.type = ace->type; 1411 nace.flag = ace->flag; 1412 nace.access_mask = ace->access_mask; 1413 (void) utf8_copy(&ace->who, &nace.who); 1414 } 1415 1416 dp->delegation_type = dsp->rds_dtype; 1417 1418 switch (dsp->rds_dtype) { 1419 case OPEN_DELEGATE_NONE: 1420 break; 1421 case OPEN_DELEGATE_READ: 1422 rp = &dp->open_delegation4_u.read; 1423 rp->stateid = dsp->rds_delegid.stateid; 1424 rp->recall = (bool_t)recall; 1425 rp->permissions = nace; 1426 break; 1427 case OPEN_DELEGATE_WRITE: 1428 wp = &dp->open_delegation4_u.write; 1429 wp->stateid = dsp->rds_delegid.stateid; 1430 wp->recall = (bool_t)recall; 1431 spl = &wp->space_limit; 1432 spl->limitby = NFS_LIMIT_SIZE; 1433 spl->nfs_space_limit4_u.filesize = 0; 1434 wp->permissions = nace; 1435 break; 1436 } 1437 } 1438 1439 /* 1440 * Check if the file is delegated via the provided file struct. 1441 * Return TRUE if it is delegated. This is intended for use by 1442 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1443 * 1444 * Note that if the file is found to have a delegation, it is 1445 * recalled, unless the clientid of the caller matches the clientid of the 1446 * delegation. If the caller has specified, there is a slight delay 1447 * inserted in the hopes that the delegation will be returned quickly. 1448 */ 1449 bool_t 1450 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1451 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1452 { 1453 rfs4_deleg_state_t *dsp; 1454 1455 /* Is delegation enabled? */ 1456 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1457 return (FALSE); 1458 1459 /* do we have a delegation on this file? */ 1460 rfs4_dbe_lock(fp->rf_dbe); 1461 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1462 if (is_rm) 1463 fp->rf_dinfo.rd_hold_grant++; 1464 rfs4_dbe_unlock(fp->rf_dbe); 1465 return (FALSE); 1466 } 1467 /* 1468 * do we have a write delegation on this file or are we 1469 * requesting write access to a file with any type of existing 1470 * delegation? 1471 */ 1472 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) { 1473 if (cp != NULL) { 1474 dsp = list_head(&fp->rf_delegstatelist); 1475 if (dsp == NULL) { 1476 rfs4_dbe_unlock(fp->rf_dbe); 1477 return (FALSE); 1478 } 1479 /* 1480 * Does the requestor already own the delegation? 1481 */ 1482 if (dsp->rds_client->rc_clientid == *(cp)) { 1483 rfs4_dbe_unlock(fp->rf_dbe); 1484 return (FALSE); 1485 } 1486 } 1487 1488 rfs4_dbe_unlock(fp->rf_dbe); 1489 rfs4_recall_deleg(fp, trunc, NULL); 1490 1491 if (!do_delay) { 1492 rfs4_dbe_lock(fp->rf_dbe); 1493 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1494 rfs4_dbe_unlock(fp->rf_dbe); 1495 return (TRUE); 1496 } 1497 1498 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1499 1500 rfs4_dbe_lock(fp->rf_dbe); 1501 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) { 1502 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1503 rfs4_dbe_unlock(fp->rf_dbe); 1504 return (TRUE); 1505 } 1506 } 1507 if (is_rm) 1508 fp->rf_dinfo.rd_hold_grant++; 1509 rfs4_dbe_unlock(fp->rf_dbe); 1510 return (FALSE); 1511 } 1512 1513 /* 1514 * Check if the file is delegated in the case of a v2 or v3 access. 1515 * Return TRUE if it is delegated which in turn means that v2 should 1516 * drop the request and in the case of v3 JUKEBOX should be returned. 1517 */ 1518 bool_t 1519 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1520 { 1521 rfs4_file_t *fp; 1522 bool_t create = FALSE; 1523 bool_t rc = FALSE; 1524 1525 rfs4_hold_deleg_policy(); 1526 1527 /* Is delegation enabled? */ 1528 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1529 fp = rfs4_findfile(vp, NULL, &create); 1530 if (fp != NULL) { 1531 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1532 TRUE, FALSE, NULL)) { 1533 rc = TRUE; 1534 } 1535 rfs4_file_rele(fp); 1536 } 1537 } 1538 rfs4_rele_deleg_policy(); 1539 return (rc); 1540 } 1541 1542 /* 1543 * Release a hold on the hold_grant counter which 1544 * prevents delegation from being granted while a remove 1545 * or a rename is in progress. 1546 */ 1547 void 1548 rfs4_clear_dont_grant(rfs4_file_t *fp) 1549 { 1550 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1551 return; 1552 rfs4_dbe_lock(fp->rf_dbe); 1553 ASSERT(fp->rf_dinfo.rd_hold_grant > 0); 1554 fp->rf_dinfo.rd_hold_grant--; 1555 fp->rf_dinfo.rd_time_rm_delayed = 0; 1556 rfs4_dbe_unlock(fp->rf_dbe); 1557 } 1558 1559 /* 1560 * State support for delegation. 1561 * Set the state delegation type for this state; 1562 * This routine is called from open via rfs4_grant_delegation and the entry 1563 * locks on sp and sp->rs_finfo are assumed. 1564 */ 1565 static rfs4_deleg_state_t * 1566 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1567 { 1568 rfs4_file_t *fp = sp->rs_finfo; 1569 bool_t create = TRUE; 1570 rfs4_deleg_state_t *dsp; 1571 vnode_t *vp; 1572 int open_prev = *recall; 1573 int ret; 1574 int fflags = 0; 1575 1576 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1577 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1578 1579 /* Shouldn't happen */ 1580 if (fp->rf_dinfo.rd_recall_count != 0 || 1581 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1582 dtype != OPEN_DELEGATE_READ)) { 1583 return (NULL); 1584 } 1585 1586 /* Unlock to avoid deadlock */ 1587 rfs4_dbe_unlock(fp->rf_dbe); 1588 rfs4_dbe_unlock(sp->rs_dbe); 1589 1590 dsp = rfs4_finddeleg(sp, &create); 1591 1592 rfs4_dbe_lock(sp->rs_dbe); 1593 rfs4_dbe_lock(fp->rf_dbe); 1594 1595 if (dsp == NULL) 1596 return (NULL); 1597 1598 /* 1599 * It is possible that since we dropped the lock 1600 * in order to call finddeleg, the rfs4_file_t 1601 * was marked such that we should not grant a 1602 * delegation, if so bail out. 1603 */ 1604 if (fp->rf_dinfo.rd_hold_grant > 0) { 1605 rfs4_deleg_state_rele(dsp); 1606 return (NULL); 1607 } 1608 1609 if (create == FALSE) { 1610 if (sp->rs_owner->ro_client == dsp->rds_client && 1611 dsp->rds_dtype == dtype) { 1612 return (dsp); 1613 } else { 1614 rfs4_deleg_state_rele(dsp); 1615 return (NULL); 1616 } 1617 } 1618 1619 /* 1620 * Check that this file has not been delegated to another 1621 * client 1622 */ 1623 if (fp->rf_dinfo.rd_recall_count != 0 || 1624 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE || 1625 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1626 dtype != OPEN_DELEGATE_READ)) { 1627 rfs4_deleg_state_rele(dsp); 1628 return (NULL); 1629 } 1630 1631 vp = fp->rf_vp; 1632 /* vnevent_support returns 0 if file system supports vnevents */ 1633 if (vnevent_support(vp, NULL)) { 1634 rfs4_deleg_state_rele(dsp); 1635 return (NULL); 1636 } 1637 1638 /* Calculate the fflags for this OPEN. */ 1639 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) 1640 fflags |= FREAD; 1641 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) 1642 fflags |= FWRITE; 1643 1644 *recall = 0; 1645 /* 1646 * Before granting a delegation we need to know if anyone else has 1647 * opened the file in a conflicting mode. However, first we need to 1648 * know how we opened the file to check the counts properly. 1649 */ 1650 if (dtype == OPEN_DELEGATE_READ) { 1651 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1652 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1653 vn_is_mapped(vp, V_WRITE)) { 1654 if (open_prev) { 1655 *recall = 1; 1656 } else { 1657 rfs4_deleg_state_rele(dsp); 1658 return (NULL); 1659 } 1660 } 1661 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1662 rfs4_mon_hold, rfs4_mon_rele); 1663 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1664 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1665 vn_is_mapped(vp, V_WRITE)) { 1666 if (open_prev) { 1667 *recall = 1; 1668 } else { 1669 (void) fem_uninstall(vp, deleg_rdops, 1670 (void *)fp); 1671 rfs4_deleg_state_rele(dsp); 1672 return (NULL); 1673 } 1674 } 1675 /* 1676 * Because a client can hold onto a delegation after the 1677 * file has been closed, we need to keep track of the 1678 * access to this file. Otherwise the CIFS server would 1679 * not know about the client accessing the file and could 1680 * inappropriately grant an OPLOCK. 1681 * fem_install() returns EBUSY when asked to install a 1682 * OPUNIQ monitor more than once. Therefore, check the 1683 * return code because we only want this done once. 1684 */ 1685 if (ret == 0) 1686 vn_open_upgrade(vp, FREAD); 1687 } else { /* WRITE */ 1688 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1689 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1690 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1691 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1692 vn_is_mapped(vp, V_RDORWR)) { 1693 if (open_prev) { 1694 *recall = 1; 1695 } else { 1696 rfs4_deleg_state_rele(dsp); 1697 return (NULL); 1698 } 1699 } 1700 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1701 rfs4_mon_hold, rfs4_mon_rele); 1702 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1703 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1704 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1705 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1706 vn_is_mapped(vp, V_RDORWR)) { 1707 if (open_prev) { 1708 *recall = 1; 1709 } else { 1710 (void) fem_uninstall(vp, deleg_wrops, 1711 (void *)fp); 1712 rfs4_deleg_state_rele(dsp); 1713 return (NULL); 1714 } 1715 } 1716 /* 1717 * Because a client can hold onto a delegation after the 1718 * file has been closed, we need to keep track of the 1719 * access to this file. Otherwise the CIFS server would 1720 * not know about the client accessing the file and could 1721 * inappropriately grant an OPLOCK. 1722 * fem_install() returns EBUSY when asked to install a 1723 * OPUNIQ monitor more than once. Therefore, check the 1724 * return code because we only want this done once. 1725 */ 1726 if (ret == 0) 1727 vn_open_upgrade(vp, FREAD|FWRITE); 1728 } 1729 /* Place on delegation list for file */ 1730 ASSERT(!list_link_active(&dsp->rds_node)); 1731 list_insert_tail(&fp->rf_delegstatelist, dsp); 1732 1733 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype; 1734 1735 /* Update delegation stats for this file */ 1736 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec(); 1737 1738 /* reset since this is a new delegation */ 1739 fp->rf_dinfo.rd_conflicted_client = 0; 1740 fp->rf_dinfo.rd_ever_recalled = FALSE; 1741 1742 if (dtype == OPEN_DELEGATE_READ) 1743 fp->rf_dinfo.rd_rdgrants++; 1744 else 1745 fp->rf_dinfo.rd_wrgrants++; 1746 1747 return (dsp); 1748 } 1749 1750 /* 1751 * State routine for the server when a delegation is returned. 1752 */ 1753 void 1754 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1755 { 1756 rfs4_file_t *fp = dsp->rds_finfo; 1757 open_delegation_type4 dtypewas; 1758 1759 rfs4_dbe_lock(fp->rf_dbe); 1760 1761 /* nothing to do if no longer on list */ 1762 if (!list_link_active(&dsp->rds_node)) { 1763 rfs4_dbe_unlock(fp->rf_dbe); 1764 return; 1765 } 1766 1767 /* Remove state from recall list */ 1768 list_remove(&fp->rf_delegstatelist, dsp); 1769 1770 if (list_is_empty(&fp->rf_delegstatelist)) { 1771 dtypewas = fp->rf_dinfo.rd_dtype; 1772 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE; 1773 rfs4_dbe_cv_broadcast(fp->rf_dbe); 1774 1775 /* if file system was unshared, the vp will be NULL */ 1776 if (fp->rf_vp != NULL) { 1777 /* 1778 * Once a delegation is no longer held by any client, 1779 * the monitor is uninstalled. At this point, the 1780 * client must send OPEN otw, so we don't need the 1781 * reference on the vnode anymore. The open 1782 * downgrade removes the reference put on earlier. 1783 */ 1784 if (dtypewas == OPEN_DELEGATE_READ) { 1785 (void) fem_uninstall(fp->rf_vp, deleg_rdops, 1786 (void *)fp); 1787 vn_open_downgrade(fp->rf_vp, FREAD); 1788 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1789 (void) fem_uninstall(fp->rf_vp, deleg_wrops, 1790 (void *)fp); 1791 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE); 1792 } 1793 } 1794 } 1795 1796 switch (dsp->rds_dtype) { 1797 case OPEN_DELEGATE_READ: 1798 fp->rf_dinfo.rd_rdgrants--; 1799 break; 1800 case OPEN_DELEGATE_WRITE: 1801 fp->rf_dinfo.rd_wrgrants--; 1802 break; 1803 default: 1804 break; 1805 } 1806 1807 /* used in the policy decision */ 1808 fp->rf_dinfo.rd_time_returned = gethrestime_sec(); 1809 1810 /* 1811 * reset the time_recalled field so future delegations are not 1812 * accidentally revoked 1813 */ 1814 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0) 1815 fp->rf_dinfo.rd_time_recalled = 0; 1816 1817 rfs4_dbe_unlock(fp->rf_dbe); 1818 1819 rfs4_dbe_lock(dsp->rds_dbe); 1820 1821 dsp->rds_dtype = OPEN_DELEGATE_NONE; 1822 1823 if (revoked == TRUE) 1824 dsp->rds_time_revoked = gethrestime_sec(); 1825 1826 rfs4_dbe_invalidate(dsp->rds_dbe); 1827 1828 rfs4_dbe_unlock(dsp->rds_dbe); 1829 1830 if (revoked == TRUE) { 1831 rfs4_dbe_lock(dsp->rds_client->rc_dbe); 1832 dsp->rds_client->rc_deleg_revoked++; /* observability */ 1833 rfs4_dbe_unlock(dsp->rds_client->rc_dbe); 1834 } 1835 } 1836 1837 static void 1838 rfs4_revoke_file(rfs4_file_t *fp) 1839 { 1840 rfs4_deleg_state_t *dsp; 1841 1842 /* 1843 * The lock for rfs4_file_t must be held when traversing the 1844 * delegation list but that lock needs to be released to call 1845 * rfs4_return_deleg() 1846 */ 1847 rfs4_dbe_lock(fp->rf_dbe); 1848 while (dsp = list_head(&fp->rf_delegstatelist)) { 1849 rfs4_dbe_hold(dsp->rds_dbe); 1850 rfs4_dbe_unlock(fp->rf_dbe); 1851 rfs4_return_deleg(dsp, TRUE); 1852 rfs4_deleg_state_rele(dsp); 1853 rfs4_dbe_lock(fp->rf_dbe); 1854 } 1855 rfs4_dbe_unlock(fp->rf_dbe); 1856 } 1857 1858 /* 1859 * A delegation is assumed to be present on the file associated with 1860 * "sp". Check to see if the delegation matches is associated with 1861 * the same client as referenced by "sp". If it is not, TRUE is 1862 * returned. If the delegation DOES match the client (or no 1863 * delegation is present), return FALSE. 1864 * Assume the state entry and file entry are locked. 1865 */ 1866 bool_t 1867 rfs4_is_deleg(rfs4_state_t *sp) 1868 { 1869 rfs4_deleg_state_t *dsp; 1870 rfs4_file_t *fp = sp->rs_finfo; 1871 rfs4_client_t *cp = sp->rs_owner->ro_client; 1872 1873 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1874 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1875 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1876 if (cp != dsp->rds_client) { 1877 return (TRUE); 1878 } 1879 } 1880 return (FALSE); 1881 } 1882 1883 void 1884 rfs4_disable_delegation(void) 1885 { 1886 mutex_enter(&rfs4_deleg_lock); 1887 rfs4_deleg_disabled++; 1888 mutex_exit(&rfs4_deleg_lock); 1889 } 1890 1891 void 1892 rfs4_enable_delegation(void) 1893 { 1894 mutex_enter(&rfs4_deleg_lock); 1895 ASSERT(rfs4_deleg_disabled > 0); 1896 rfs4_deleg_disabled--; 1897 mutex_exit(&rfs4_deleg_lock); 1898 } 1899 1900 void 1901 rfs4_mon_hold(void *arg) 1902 { 1903 rfs4_file_t *fp = arg; 1904 1905 rfs4_dbe_hold(fp->rf_dbe); 1906 } 1907 1908 void 1909 rfs4_mon_rele(void *arg) 1910 { 1911 rfs4_file_t *fp = arg; 1912 1913 rfs4_dbe_rele_nolock(fp->rf_dbe); 1914 } 1915