1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <rpc/auth.h> 28 #include <rpc/clnt.h> 29 #include <nfs/nfs4_kprot.h> 30 #include <nfs/nfs4.h> 31 #include <nfs/lm.h> 32 #include <sys/cmn_err.h> 33 #include <sys/disp.h> 34 #include <sys/sdt.h> 35 36 #include <sys/pathname.h> 37 38 #include <sys/strsubr.h> 39 #include <sys/ddi.h> 40 41 #include <sys/vnode.h> 42 #include <sys/sdt.h> 43 #include <inet/common.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 47 #define MAX_READ_DELEGATIONS 5 48 49 krwlock_t rfs4_deleg_policy_lock; 50 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 51 static int rfs4_deleg_wlp = 5; 52 kmutex_t rfs4_deleg_lock; 53 static int rfs4_deleg_disabled; 54 static int rfs4_max_setup_cb_tries = 5; 55 56 #ifdef DEBUG 57 58 static int rfs4_test_cbgetattr_fail = 0; 59 int rfs4_cb_null; 60 int rfs4_cb_debug; 61 int rfs4_deleg_debug; 62 63 #endif 64 65 static void rfs4_recall_file(rfs4_file_t *, 66 void (*recall)(rfs4_deleg_state_t *, bool_t), 67 bool_t, rfs4_client_t *); 68 static void rfs4_revoke_deleg(rfs4_deleg_state_t *); 69 static void rfs4_revoke_file(rfs4_file_t *); 70 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 71 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 72 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 73 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 74 open_delegation_type4, int *); 75 76 /* 77 * Convert a universal address to an transport specific 78 * address using inet_pton. 79 */ 80 static int 81 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 82 { 83 int dots = 0, i, j, len, k; 84 unsigned char c; 85 in_port_t port = 0; 86 87 len = strlen(ua); 88 89 for (i = len-1; i >= 0; i--) { 90 91 if (ua[i] == '.') 92 dots++; 93 94 if (dots == 2) { 95 96 ua[i] = '\0'; 97 /* 98 * We use k to remember were to stick '.' back, since 99 * ua was kmem_allocateded from the pool len+1. 100 */ 101 k = i; 102 if (inet_pton(af, ua, ap) == 1) { 103 104 c = 0; 105 106 for (j = i+1; j < len; j++) { 107 if (ua[j] == '.') { 108 port = c << 8; 109 c = 0; 110 } else if (ua[j] >= '0' && 111 ua[j] <= '9') { 112 c *= 10; 113 c += ua[j] - '0'; 114 } else { 115 ua[k] = '.'; 116 return (EINVAL); 117 } 118 } 119 port += c; 120 121 122 /* reset to network order */ 123 if (af == AF_INET) { 124 *(uint32_t *)ap = 125 htonl(*(uint32_t *)ap); 126 *pp = htons(port); 127 } else { 128 int ix; 129 uint16_t *sap; 130 131 for (sap = ap, ix = 0; ix < 132 sizeof (struct in6_addr) / 133 sizeof (uint16_t); ix++) 134 sap[ix] = htons(sap[ix]); 135 136 *pp = htons(port); 137 } 138 139 ua[k] = '.'; 140 return (0); 141 } else { 142 ua[k] = '.'; 143 return (EINVAL); 144 } 145 } 146 } 147 148 return (EINVAL); 149 } 150 151 /* 152 * Update the delegation policy with the 153 * value of "new_policy" 154 */ 155 void 156 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 157 { 158 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 159 rfs4_deleg_policy = new_policy; 160 rw_exit(&rfs4_deleg_policy_lock); 161 } 162 163 void 164 rfs4_hold_deleg_policy(void) 165 { 166 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 167 } 168 169 void 170 rfs4_rele_deleg_policy(void) 171 { 172 rw_exit(&rfs4_deleg_policy_lock); 173 } 174 175 176 /* 177 * This free function is to be used when the client struct is being 178 * released and nothing at all is needed of the callback info any 179 * longer. 180 */ 181 void 182 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 183 { 184 char *addr = cbp->cb_callback.cb_location.r_addr; 185 char *netid = cbp->cb_callback.cb_location.r_netid; 186 187 /* Free old address if any */ 188 189 if (addr) 190 kmem_free(addr, strlen(addr) + 1); 191 if (netid) 192 kmem_free(netid, strlen(netid) + 1); 193 194 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 195 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 196 197 if (addr) 198 kmem_free(addr, strlen(addr) + 1); 199 if (netid) 200 kmem_free(netid, strlen(netid) + 1); 201 202 if (cbp->cb_chc_free) { 203 rfs4_cb_chflush(cbp); 204 } 205 } 206 207 /* 208 * The server uses this to check the callback path supplied by the 209 * client. The callback connection is marked "in progress" while this 210 * work is going on and then eventually marked either OK or FAILED. 211 * This work can be done as part of a separate thread and at the end 212 * of this the thread will exit or it may be done such that the caller 213 * will continue with other work. 214 */ 215 static void 216 rfs4_do_cb_null(rfs4_client_t *cp) 217 { 218 struct timeval tv; 219 CLIENT *ch; 220 rfs4_cbstate_t newstate; 221 rfs4_cbinfo_t *cbp = &cp->cbinfo; 222 223 mutex_enter(cbp->cb_lock); 224 /* If another thread is doing CB_NULL RPC then return */ 225 if (cbp->cb_nullcaller == TRUE) { 226 mutex_exit(cbp->cb_lock); 227 rfs4_client_rele(cp); 228 return; 229 } 230 231 /* Mark the cbinfo as having a thread in the NULL callback */ 232 cbp->cb_nullcaller = TRUE; 233 234 /* 235 * Are there other threads still using the cbinfo client 236 * handles? If so, this thread must wait before going and 237 * mucking aroiund with the callback information 238 */ 239 while (cbp->cb_refcnt != 0) 240 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 241 242 /* 243 * This thread itself may find that new callback info has 244 * arrived and is set up to handle this case and redrive the 245 * call to the client's callback server. 246 */ 247 retry: 248 if (cbp->cb_newer.cb_new == TRUE && 249 cbp->cb_newer.cb_confirmed == TRUE) { 250 char *addr = cbp->cb_callback.cb_location.r_addr; 251 char *netid = cbp->cb_callback.cb_location.r_netid; 252 253 /* 254 * Free the old stuff if it exists; may be the first 255 * time through this path 256 */ 257 if (addr) 258 kmem_free(addr, strlen(addr) + 1); 259 if (netid) 260 kmem_free(netid, strlen(netid) + 1); 261 262 /* Move over the addr/netid */ 263 cbp->cb_callback.cb_location.r_addr = 264 cbp->cb_newer.cb_callback.cb_location.r_addr; 265 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 266 cbp->cb_callback.cb_location.r_netid = 267 cbp->cb_newer.cb_callback.cb_location.r_netid; 268 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 269 270 /* Get the program number */ 271 cbp->cb_callback.cb_program = 272 cbp->cb_newer.cb_callback.cb_program; 273 cbp->cb_newer.cb_callback.cb_program = 0; 274 275 /* Don't forget the protocol's "cb_ident" field */ 276 cbp->cb_ident = cbp->cb_newer.cb_ident; 277 cbp->cb_newer.cb_ident = 0; 278 279 /* no longer new */ 280 cbp->cb_newer.cb_new = FALSE; 281 cbp->cb_newer.cb_confirmed = FALSE; 282 283 /* get rid of the old client handles that may exist */ 284 rfs4_cb_chflush(cbp); 285 286 cbp->cb_state = CB_NONE; 287 cbp->cb_timefailed = 0; /* reset the clock */ 288 cbp->cb_notified_of_cb_path_down = TRUE; 289 } 290 291 if (cbp->cb_state != CB_NONE) { 292 cv_broadcast(cbp->cb_cv); /* let the others know */ 293 cbp->cb_nullcaller = FALSE; 294 mutex_exit(cbp->cb_lock); 295 rfs4_client_rele(cp); 296 return; 297 } 298 299 /* mark rfs4_client_t as CALLBACK NULL in progress */ 300 cbp->cb_state = CB_INPROG; 301 mutex_exit(cbp->cb_lock); 302 303 /* get/generate a client handle */ 304 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 305 mutex_enter(cbp->cb_lock); 306 cbp->cb_state = CB_BAD; 307 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 308 goto retry; 309 } 310 311 312 tv.tv_sec = 30; 313 tv.tv_usec = 0; 314 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 315 newstate = CB_BAD; 316 } else { 317 newstate = CB_OK; 318 #ifdef DEBUG 319 rfs4_cb_null++; 320 #endif 321 } 322 323 /* Check to see if the client has specified new callback info */ 324 mutex_enter(cbp->cb_lock); 325 rfs4_cb_freech(cbp, ch, TRUE); 326 if (cbp->cb_newer.cb_new == TRUE && 327 cbp->cb_newer.cb_confirmed == TRUE) { 328 goto retry; /* give the CB_NULL another chance */ 329 } 330 331 cbp->cb_state = newstate; 332 if (cbp->cb_state == CB_BAD) 333 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 334 335 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 336 cbp->cb_nullcaller = FALSE; 337 mutex_exit(cbp->cb_lock); 338 339 rfs4_client_rele(cp); 340 } 341 342 /* 343 * Given a client struct, inspect the callback info to see if the 344 * callback path is up and available. 345 * 346 * If new callback path is available and no one has set it up then 347 * try to set it up. If setup is not successful after 5 tries (5 secs) 348 * then gives up and returns NULL. 349 * 350 * If callback path is being initialized, then wait for the CB_NULL RPC 351 * call to occur. 352 */ 353 static rfs4_cbinfo_t * 354 rfs4_cbinfo_hold(rfs4_client_t *cp) 355 { 356 rfs4_cbinfo_t *cbp = &cp->cbinfo; 357 int retries = 0; 358 359 mutex_enter(cbp->cb_lock); 360 361 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 362 /* 363 * Looks like a new callback path may be available and 364 * noone has set it up. 365 */ 366 mutex_exit(cbp->cb_lock); 367 rfs4_dbe_hold(cp->dbe); 368 rfs4_do_cb_null(cp); /* caller will release client hold */ 369 370 mutex_enter(cbp->cb_lock); 371 /* 372 * If callback path is no longer new, or it's being setup 373 * then stop and wait for it to be done. 374 */ 375 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 376 break; 377 mutex_exit(cbp->cb_lock); 378 379 if (++retries >= rfs4_max_setup_cb_tries) 380 return (NULL); 381 delay(hz); 382 mutex_enter(cbp->cb_lock); 383 } 384 385 /* Is there a thread working on doing the CB_NULL RPC? */ 386 if (cbp->cb_nullcaller == TRUE) 387 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 388 389 /* If the callback path is not okay (up and running), just quit */ 390 if (cbp->cb_state != CB_OK) { 391 mutex_exit(cbp->cb_lock); 392 return (NULL); 393 } 394 395 /* Let someone know we are using the current callback info */ 396 cbp->cb_refcnt++; 397 mutex_exit(cbp->cb_lock); 398 return (cbp); 399 } 400 401 /* 402 * The caller is done with the callback info. It may be that the 403 * caller's RPC failed and the NFSv4 client has actually provided new 404 * callback information. If so, let the caller know so they can 405 * advantage of this and maybe retry the RPC that originally failed. 406 */ 407 static int 408 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 409 { 410 int cb_new = FALSE; 411 412 mutex_enter(cbp->cb_lock); 413 414 /* The caller gets a chance to mark the callback info as bad */ 415 if (newstate != CB_NOCHANGE) 416 cbp->cb_state = newstate; 417 if (newstate == CB_FAILED) { 418 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 419 cbp->cb_notified_of_cb_path_down = FALSE; 420 } 421 422 cbp->cb_refcnt--; /* no longer using the information */ 423 424 /* 425 * A thread may be waiting on this one to finish and if so, 426 * let it know that it is okay to do the CB_NULL to the 427 * client's callback server. 428 */ 429 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 430 cv_broadcast(cbp->cb_cv_nullcaller); 431 432 /* 433 * If this is the last thread to use the callback info and 434 * there is new callback information to try and no thread is 435 * there ready to do the CB_NULL, then return true to teh 436 * caller so they can do the CB_NULL 437 */ 438 if (cbp->cb_refcnt == 0 && 439 cbp->cb_nullcaller == FALSE && 440 cbp->cb_newer.cb_new == TRUE && 441 cbp->cb_newer.cb_confirmed == TRUE) 442 cb_new = TRUE; 443 444 mutex_exit(cbp->cb_lock); 445 446 return (cb_new); 447 } 448 449 /* 450 * Given the information in the callback info struct, create a client 451 * handle that can be used by the server for its callback path. 452 */ 453 static CLIENT * 454 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 455 { 456 struct knetconfig knc; 457 vnode_t *vp; 458 struct sockaddr_in addr4; 459 struct sockaddr_in6 addr6; 460 void *addr, *taddr; 461 in_port_t *pp; 462 int af; 463 char *devnam; 464 struct netbuf nb; 465 int size; 466 CLIENT *ch = NULL; 467 int useresvport = 0; 468 469 mutex_enter(cbp->cb_lock); 470 471 if (cbp->cb_callback.cb_location.r_netid == NULL || 472 cbp->cb_callback.cb_location.r_addr == NULL) { 473 goto cb_init_out; 474 } 475 476 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 477 knc.knc_semantics = NC_TPI_COTS; 478 knc.knc_protofmly = "inet"; 479 knc.knc_proto = "tcp"; 480 devnam = "/dev/tcp"; 481 af = AF_INET; 482 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 483 == 0) { 484 knc.knc_semantics = NC_TPI_CLTS; 485 knc.knc_protofmly = "inet"; 486 knc.knc_proto = "udp"; 487 devnam = "/dev/udp"; 488 af = AF_INET; 489 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 490 == 0) { 491 knc.knc_semantics = NC_TPI_COTS; 492 knc.knc_protofmly = "inet6"; 493 knc.knc_proto = "tcp"; 494 devnam = "/dev/tcp6"; 495 af = AF_INET6; 496 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 497 == 0) { 498 knc.knc_semantics = NC_TPI_CLTS; 499 knc.knc_protofmly = "inet6"; 500 knc.knc_proto = "udp"; 501 devnam = "/dev/udp6"; 502 af = AF_INET6; 503 } else { 504 goto cb_init_out; 505 } 506 507 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 508 509 goto cb_init_out; 510 } 511 512 if (vp->v_type != VCHR) { 513 VN_RELE(vp); 514 goto cb_init_out; 515 } 516 517 knc.knc_rdev = vp->v_rdev; 518 519 VN_RELE(vp); 520 521 if (af == AF_INET) { 522 size = sizeof (addr4); 523 bzero(&addr4, size); 524 addr4.sin_family = (sa_family_t)af; 525 addr = &addr4.sin_addr; 526 pp = &addr4.sin_port; 527 taddr = &addr4; 528 } else /* AF_INET6 */ { 529 size = sizeof (addr6); 530 bzero(&addr6, size); 531 addr6.sin6_family = (sa_family_t)af; 532 addr = &addr6.sin6_addr; 533 pp = &addr6.sin6_port; 534 taddr = &addr6; 535 } 536 537 if (uaddr2sockaddr(af, 538 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 539 540 goto cb_init_out; 541 } 542 543 544 nb.maxlen = nb.len = size; 545 nb.buf = (char *)taddr; 546 547 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 548 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 549 550 ch = NULL; 551 } 552 553 /* turn off reserved port usage */ 554 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 555 556 cb_init_out: 557 mutex_exit(cbp->cb_lock); 558 return (ch); 559 } 560 561 /* 562 * Iterate over the client handle cache and 563 * destroy it. 564 */ 565 static void 566 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 567 { 568 CLIENT *ch; 569 570 while (cbp->cb_chc_free) { 571 cbp->cb_chc_free--; 572 ch = cbp->cb_chc[cbp->cb_chc_free]; 573 cbp->cb_chc[cbp->cb_chc_free] = NULL; 574 if (ch) { 575 if (ch->cl_auth) 576 auth_destroy(ch->cl_auth); 577 clnt_destroy(ch); 578 } 579 } 580 } 581 582 /* 583 * Return a client handle, either from a the small 584 * rfs4_client_t cache or one that we just created. 585 */ 586 static CLIENT * 587 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 588 { 589 CLIENT *cbch = NULL; 590 uint32_t zilch = 0; 591 592 mutex_enter(cbp->cb_lock); 593 594 if (cbp->cb_chc_free) { 595 cbp->cb_chc_free--; 596 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 597 mutex_exit(cbp->cb_lock); 598 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 599 return (cbch); 600 } 601 602 mutex_exit(cbp->cb_lock); 603 604 /* none free so make it now */ 605 cbch = rfs4_cbch_init(cbp); 606 607 return (cbch); 608 } 609 610 /* 611 * Return the client handle to the small cache or 612 * destroy it. 613 */ 614 static void 615 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 616 { 617 if (lockheld == FALSE) 618 mutex_enter(cbp->cb_lock); 619 620 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 621 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 622 if (lockheld == FALSE) 623 mutex_exit(cbp->cb_lock); 624 return; 625 } 626 if (lockheld == FALSE) 627 mutex_exit(cbp->cb_lock); 628 629 /* 630 * cache maxed out of free entries, obliterate 631 * this client handle, destroy it, throw it away. 632 */ 633 if (ch->cl_auth) 634 auth_destroy(ch->cl_auth); 635 clnt_destroy(ch); 636 } 637 638 /* 639 * With the supplied callback information - initialize the client 640 * callback data. If there is a callback in progress, save the 641 * callback info so that a thread can pick it up in the future. 642 */ 643 void 644 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 645 { 646 char *addr = NULL; 647 char *netid = NULL; 648 rfs4_cbinfo_t *cbp = &cp->cbinfo; 649 size_t len; 650 651 /* Set the call back for the client */ 652 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 653 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 654 len = strlen(cb->cb_location.r_addr) + 1; 655 addr = kmem_alloc(len, KM_SLEEP); 656 bcopy(cb->cb_location.r_addr, addr, len); 657 len = strlen(cb->cb_location.r_netid) + 1; 658 netid = kmem_alloc(len, KM_SLEEP); 659 bcopy(cb->cb_location.r_netid, netid, len); 660 } 661 /* ready to save the new information but first free old, if exists */ 662 mutex_enter(cbp->cb_lock); 663 664 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 665 666 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 667 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 668 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 669 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 670 671 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 672 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 673 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 674 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 675 676 cbp->cb_newer.cb_ident = cb_ident; 677 678 if (addr && *addr && netid && *netid) { 679 cbp->cb_newer.cb_new = TRUE; 680 cbp->cb_newer.cb_confirmed = FALSE; 681 } else { 682 cbp->cb_newer.cb_new = FALSE; 683 cbp->cb_newer.cb_confirmed = FALSE; 684 } 685 686 mutex_exit(cbp->cb_lock); 687 } 688 689 /* 690 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 691 * information may have been provided on SETCLIENTID and this call 692 * marks that information as confirmed and then starts a thread to 693 * test the callback path. 694 */ 695 void 696 rfs4_deleg_cb_check(rfs4_client_t *cp) 697 { 698 if (cp->cbinfo.cb_newer.cb_new == FALSE) 699 return; 700 701 cp->cbinfo.cb_newer.cb_confirmed = TRUE; 702 703 rfs4_dbe_hold(cp->dbe); /* hold the client struct for thread */ 704 705 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 706 minclsyspri); 707 } 708 709 static void 710 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 711 { 712 CB_RECALL4args *rec_argp; 713 714 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 715 if (rec_argp->fh.nfs_fh4_val) 716 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 717 } 718 719 /* ARGSUSED */ 720 static void 721 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 722 { 723 CB_GETATTR4args *argp; 724 725 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 726 if (argp->fh.nfs_fh4_val) 727 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 728 } 729 730 static void 731 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 732 { 733 int i, arglen; 734 nfs_cb_argop4 *argop; 735 736 /* 737 * First free any special args alloc'd for specific ops. 738 */ 739 arglen = args->array_len; 740 argop = args->array; 741 for (i = 0; i < arglen; i++, argop++) { 742 743 switch (argop->argop) { 744 case OP_CB_RECALL: 745 rfs4args_cb_recall_free(argop); 746 break; 747 748 case OP_CB_GETATTR: 749 rfs4args_cb_getattr_free(argop); 750 break; 751 752 default: 753 return; 754 } 755 } 756 757 if (args->tag.utf8string_len > 0) 758 UTF8STRING_FREE(args->tag) 759 760 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 761 if (resp) 762 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 763 } 764 765 /* 766 * General callback routine for the server to the client. 767 */ 768 static enum clnt_stat 769 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 770 CB_COMPOUND4res *res, struct timeval timeout) 771 { 772 rfs4_cbinfo_t *cbp; 773 CLIENT *ch; 774 /* start with this in case cb_getch() fails */ 775 enum clnt_stat stat = RPC_FAILED; 776 777 res->tag.utf8string_val = NULL; 778 res->array = NULL; 779 780 retry: 781 cbp = rfs4_cbinfo_hold(cp); 782 if (cbp == NULL) 783 return (stat); 784 785 /* get a client handle */ 786 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 787 /* 788 * reset the cb_ident since it may have changed in 789 * rfs4_cbinfo_hold() 790 */ 791 args->callback_ident = cbp->cb_ident; 792 793 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 794 (caddr_t)args, xdr_CB_COMPOUND4res, 795 (caddr_t)res, timeout); 796 797 /* free client handle */ 798 rfs4_cb_freech(cbp, ch, FALSE); 799 } 800 801 /* 802 * If the rele says that there may be new callback info then 803 * retry this sequence and it may succeed as a result of the 804 * new callback path 805 */ 806 if (rfs4_cbinfo_rele(cbp, 807 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 808 goto retry; 809 810 return (stat); 811 } 812 813 /* 814 * Used by the NFSv4 server to get attributes for a file while 815 * handling the case where a file has been write delegated. For the 816 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 817 * not undertaken. This call site is maintained in case the server is 818 * updated in the future to handle write delegation space guarantees. 819 */ 820 nfsstat4 821 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 822 { 823 uint_t mask; 824 int error; 825 826 mask = vap->va_mask; 827 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 828 /* 829 * Some file systems clobber va_mask. it is probably wrong of 830 * them to do so, nonethless we practice defensive coding. 831 * See bug id 4276830. 832 */ 833 vap->va_mask = mask; 834 return (puterrno4(error)); 835 } 836 837 /* 838 * This is used everywhere in the v2/v3 server to allow the 839 * integration of all NFS versions and the support of delegation. For 840 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 841 * in the future to provide space guarantees for write delegations 842 * then this call site should be expanded to interact with the client. 843 */ 844 int 845 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 846 { 847 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 848 } 849 850 /* 851 * Place the actual cb_recall otw call to client. 852 */ 853 static void 854 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 855 { 856 CB_COMPOUND4args cb4_args; 857 CB_COMPOUND4res cb4_res; 858 CB_RECALL4args *rec_argp; 859 CB_RECALL4res *rec_resp; 860 nfs_cb_argop4 *argop; 861 int numops; 862 int argoplist_size; 863 struct timeval timeout; 864 nfs_fh4 *fhp; 865 enum clnt_stat call_stat; 866 867 /* 868 * set up the compound args 869 */ 870 numops = 1; /* CB_RECALL only */ 871 872 argoplist_size = numops * sizeof (nfs_cb_argop4); 873 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 874 argop->argop = OP_CB_RECALL; 875 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 876 877 (void) str_to_utf8("cb_recall", &cb4_args.tag); 878 cb4_args.minorversion = CB4_MINORVERSION; 879 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 880 cb4_args.array_len = numops; 881 cb4_args.array = argop; 882 883 /* 884 * fill in the args struct 885 */ 886 bcopy(&dsp->delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 887 rec_argp->truncate = trunc; 888 889 fhp = &dsp->finfo->filehandle; 890 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 891 fhp->nfs_fh4_len, KM_SLEEP); 892 nfs_fh4_copy(fhp, &rec_argp->fh); 893 894 /* Keep track of when we did this for observability */ 895 dsp->time_recalled = gethrestime_sec(); 896 897 /* 898 * Set up the timeout for the callback and make the actual call. 899 * Timeout will be 80% of the lease period for this server. 900 */ 901 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 902 timeout.tv_usec = 0; 903 904 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->client, 905 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 906 907 call_stat = rfs4_do_callback(dsp->client, &cb4_args, &cb4_res, timeout); 908 909 rec_resp = (cb4_res.array_len == 0) ? NULL : 910 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 911 912 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->client, 913 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 914 915 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 916 rfs4_revoke_deleg(dsp); 917 } 918 919 rfs4freeargres(&cb4_args, &cb4_res); 920 } 921 922 struct recall_arg { 923 rfs4_deleg_state_t *dsp; 924 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 925 bool_t trunc; 926 }; 927 928 static void 929 do_recall(struct recall_arg *arg) 930 { 931 rfs4_deleg_state_t *dsp = arg->dsp; 932 rfs4_file_t *fp = dsp->finfo; 933 callb_cpr_t cpr_info; 934 kmutex_t cpr_lock; 935 936 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 937 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 938 939 /* 940 * It is possible that before this thread starts 941 * the client has send us a return_delegation, and 942 * if that is the case we do not need to send the 943 * recall callback. 944 */ 945 if (dsp->dtype != OPEN_DELEGATE_NONE) { 946 DTRACE_PROBE3(nfss__i__recall, 947 struct recall_arg *, arg, 948 struct rfs4_deleg_state_t *, dsp, 949 struct rfs4_file_t *, fp); 950 951 if (arg->recall) 952 (void) (*arg->recall)(dsp, arg->trunc); 953 } 954 955 mutex_enter(fp->dinfo->recall_lock); 956 /* 957 * Recall count may go negative if the parent thread that is 958 * creating the individual callback threads does not modify 959 * the recall_count field before the callback thread actually 960 * gets a response from the CB_RECALL 961 */ 962 fp->dinfo->recall_count--; 963 if (fp->dinfo->recall_count == 0) 964 cv_signal(fp->dinfo->recall_cv); 965 mutex_exit(fp->dinfo->recall_lock); 966 967 mutex_enter(&cpr_lock); 968 CALLB_CPR_EXIT(&cpr_info); 969 mutex_destroy(&cpr_lock); 970 971 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 972 973 kmem_free(arg, sizeof (struct recall_arg)); 974 } 975 976 struct master_recall_args { 977 rfs4_file_t *fp; 978 void (*recall)(rfs4_deleg_state_t *, bool_t); 979 bool_t trunc; 980 }; 981 982 static void 983 do_recall_file(struct master_recall_args *map) 984 { 985 rfs4_file_t *fp = map->fp; 986 rfs4_deleg_state_t *dsp; 987 struct recall_arg *arg; 988 callb_cpr_t cpr_info; 989 kmutex_t cpr_lock; 990 int32_t recall_count; 991 992 rfs4_dbe_lock(fp->dbe); 993 994 /* Recall already in progress ? */ 995 mutex_enter(fp->dinfo->recall_lock); 996 if (fp->dinfo->recall_count != 0) { 997 mutex_exit(fp->dinfo->recall_lock); 998 rfs4_dbe_rele_nolock(fp->dbe); 999 rfs4_dbe_unlock(fp->dbe); 1000 kmem_free(map, sizeof (struct master_recall_args)); 1001 return; 1002 } 1003 1004 mutex_exit(fp->dinfo->recall_lock); 1005 1006 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1007 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 1008 1009 recall_count = 0; 1010 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1011 dsp = dsp->delegationlist.next->dsp) { 1012 1013 rfs4_dbe_lock(dsp->dbe); 1014 /* 1015 * if this delegation state 1016 * is being reaped skip it 1017 */ 1018 if (rfs4_dbe_is_invalid(dsp->dbe)) { 1019 rfs4_dbe_unlock(dsp->dbe); 1020 continue; 1021 } 1022 1023 /* hold for receiving thread */ 1024 rfs4_dbe_hold(dsp->dbe); 1025 rfs4_dbe_unlock(dsp->dbe); 1026 1027 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1028 arg->recall = map->recall; 1029 arg->trunc = map->trunc; 1030 arg->dsp = dsp; 1031 1032 recall_count++; 1033 1034 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1035 minclsyspri); 1036 } 1037 1038 rfs4_dbe_unlock(fp->dbe); 1039 1040 mutex_enter(fp->dinfo->recall_lock); 1041 /* 1042 * Recall count may go negative if the parent thread that is 1043 * creating the individual callback threads does not modify 1044 * the recall_count field before the callback thread actually 1045 * gets a response from the CB_RECALL 1046 */ 1047 fp->dinfo->recall_count += recall_count; 1048 while (fp->dinfo->recall_count) 1049 cv_wait(fp->dinfo->recall_cv, fp->dinfo->recall_lock); 1050 1051 mutex_exit(fp->dinfo->recall_lock); 1052 1053 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1054 rfs4_file_rele(fp); 1055 kmem_free(map, sizeof (struct master_recall_args)); 1056 mutex_enter(&cpr_lock); 1057 CALLB_CPR_EXIT(&cpr_info); 1058 mutex_destroy(&cpr_lock); 1059 } 1060 1061 static void 1062 rfs4_recall_file(rfs4_file_t *fp, 1063 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1064 bool_t trunc, rfs4_client_t *cp) 1065 { 1066 struct master_recall_args *args; 1067 1068 rfs4_dbe_lock(fp->dbe); 1069 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1070 rfs4_dbe_unlock(fp->dbe); 1071 return; 1072 } 1073 rfs4_dbe_hold(fp->dbe); /* hold for new thread */ 1074 1075 /* 1076 * Mark the time we started the recall processing. 1077 * If it has been previously recalled, do not reset the 1078 * timer since this is used for the revocation decision. 1079 */ 1080 if (fp->dinfo->time_recalled == 0) 1081 fp->dinfo->time_recalled = gethrestime_sec(); 1082 fp->dinfo->ever_recalled = TRUE; /* used for policy decision */ 1083 /* Client causing recall not always available */ 1084 if (cp) 1085 fp->dinfo->conflicted_client = cp->clientid; 1086 1087 rfs4_dbe_unlock(fp->dbe); 1088 1089 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1090 args->fp = fp; 1091 args->recall = recall; 1092 args->trunc = trunc; 1093 1094 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1095 minclsyspri); 1096 } 1097 1098 void 1099 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1100 { 1101 time_t elapsed1, elapsed2; 1102 1103 if (fp->dinfo->time_recalled != 0) { 1104 elapsed1 = gethrestime_sec() - fp->dinfo->time_recalled; 1105 elapsed2 = gethrestime_sec() - fp->dinfo->time_lastwrite; 1106 /* First check to see if a revocation should occur */ 1107 if (elapsed1 > rfs4_lease_time && 1108 elapsed2 > rfs4_lease_time) { 1109 rfs4_revoke_file(fp); 1110 return; 1111 } 1112 /* 1113 * Next check to see if a recall should be done again 1114 * so quickly. 1115 */ 1116 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1117 return; 1118 } 1119 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1120 } 1121 1122 /* 1123 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1124 * open conflicts with the delegation. 1125 * Return true if we need recall otherwise false. 1126 * Assumes entry locks for sp and sp->finfo are held. 1127 */ 1128 bool_t 1129 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1130 { 1131 open_delegation_type4 dtype = sp->finfo->dinfo->dtype; 1132 1133 switch (dtype) { 1134 case OPEN_DELEGATE_NONE: 1135 /* Not currently delegated so there is nothing to do */ 1136 return (FALSE); 1137 case OPEN_DELEGATE_READ: 1138 /* 1139 * If the access is only asking for READ then there is 1140 * no conflict and nothing to do. If it is asking 1141 * for write, then there will be conflict and the read 1142 * delegation should be recalled. 1143 */ 1144 if (access == OPEN4_SHARE_ACCESS_READ) 1145 return (FALSE); 1146 else 1147 return (TRUE); 1148 case OPEN_DELEGATE_WRITE: 1149 /* Check to see if this client has the delegation */ 1150 return (rfs4_is_deleg(sp)); 1151 } 1152 1153 return (FALSE); 1154 } 1155 1156 /* 1157 * Return the "best" allowable delegation available given the current 1158 * delegation type and the desired access and deny modes on the file. 1159 * At the point that this routine is called we know that the access and 1160 * deny modes are consistent with the file modes. 1161 */ 1162 static open_delegation_type4 1163 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1164 { 1165 open_delegation_type4 dtype = fp->dinfo->dtype; 1166 uint32_t access = sp->share_access; 1167 uint32_t deny = sp->share_deny; 1168 int readcnt = 0; 1169 int writecnt = 0; 1170 1171 switch (dtype) { 1172 case OPEN_DELEGATE_NONE: 1173 /* 1174 * Determine if more than just this OPEN have the file 1175 * open and if so, no delegation may be provided to 1176 * the client. 1177 */ 1178 if (access & OPEN4_SHARE_ACCESS_WRITE) 1179 writecnt++; 1180 if (access & OPEN4_SHARE_ACCESS_READ) 1181 readcnt++; 1182 1183 if (fp->access_read > readcnt || fp->access_write > writecnt) 1184 return (OPEN_DELEGATE_NONE); 1185 1186 /* 1187 * If the client is going to write, or if the client 1188 * has exclusive access, return a write delegation. 1189 */ 1190 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1191 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1192 return (OPEN_DELEGATE_WRITE); 1193 /* 1194 * If we don't want to write or we've haven't denied read 1195 * access to others, return a read delegation. 1196 */ 1197 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1198 (deny & ~OPEN4_SHARE_DENY_READ)) 1199 return (OPEN_DELEGATE_READ); 1200 1201 /* Shouldn't get here */ 1202 return (OPEN_DELEGATE_NONE); 1203 1204 case OPEN_DELEGATE_READ: 1205 /* 1206 * If the file is delegated for read but we wan't to 1207 * write or deny others to read then we can't delegate 1208 * the file. We shouldn't get here since the delegation should 1209 * have been recalled already. 1210 */ 1211 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1212 (deny & OPEN4_SHARE_DENY_READ)) 1213 return (OPEN_DELEGATE_NONE); 1214 return (OPEN_DELEGATE_READ); 1215 1216 case OPEN_DELEGATE_WRITE: 1217 return (OPEN_DELEGATE_WRITE); 1218 } 1219 1220 /* Shouldn't get here */ 1221 return (OPEN_DELEGATE_NONE); 1222 } 1223 1224 /* 1225 * Given the desired delegation type and the "history" of the file 1226 * determine the actual delegation type to return. 1227 */ 1228 static open_delegation_type4 1229 rfs4_delegation_policy(open_delegation_type4 dtype, 1230 rfs4_dinfo_t *dinfo, clientid4 cid) 1231 { 1232 time_t elapsed; 1233 1234 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1235 return (OPEN_DELEGATE_NONE); 1236 1237 /* 1238 * Has this file/delegation ever been recalled? If not then 1239 * no furhter checks for a delegation race need to be done. 1240 * However if a recall has occurred, then check to see if a 1241 * client has caused its own delegation recall to occur. If 1242 * not, then has a delegation for this file been returned 1243 * recently? If so, then do not assign a new delegation to 1244 * avoid a "delegation race" between the original client and 1245 * the new/conflicting client. 1246 */ 1247 if (dinfo->ever_recalled == TRUE) { 1248 if (dinfo->conflicted_client != cid) { 1249 elapsed = gethrestime_sec() - dinfo->time_returned; 1250 if (elapsed < rfs4_lease_time) 1251 return (OPEN_DELEGATE_NONE); 1252 } 1253 } 1254 1255 /* Limit the number of read grants */ 1256 if (dtype == OPEN_DELEGATE_READ && 1257 dinfo->rdgrants > MAX_READ_DELEGATIONS) 1258 return (OPEN_DELEGATE_NONE); 1259 1260 /* 1261 * Should consider limiting total number of read/write 1262 * delegations the server will permit. 1263 */ 1264 1265 return (dtype); 1266 } 1267 1268 /* 1269 * Try and grant a delegation for an open give the state. The routine 1270 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1271 * 1272 * The state and associate file entry must be locked 1273 */ 1274 rfs4_deleg_state_t * 1275 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1276 { 1277 rfs4_file_t *fp = sp->finfo; 1278 open_delegation_type4 dtype; 1279 int no_delegation; 1280 1281 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1282 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1283 1284 /* Is the server even providing delegations? */ 1285 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1286 return (NULL); 1287 1288 /* Check to see if delegations have been temporarily disabled */ 1289 mutex_enter(&rfs4_deleg_lock); 1290 no_delegation = rfs4_deleg_disabled; 1291 mutex_exit(&rfs4_deleg_lock); 1292 1293 if (no_delegation) 1294 return (NULL); 1295 1296 /* Don't grant a delegation if a deletion is impending. */ 1297 if (fp->dinfo->hold_grant > 0) { 1298 return (NULL); 1299 } 1300 1301 /* 1302 * Don't grant a delegation if there are any lock manager 1303 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1304 * if there are only read locks we should be able to grant a 1305 * read-only delegation), but it's good enough for now. 1306 * 1307 * MT safety: the lock manager checks for conflicting delegations 1308 * before processing a lock request. That check will block until 1309 * we are done here. So if the lock manager acquires a lock after 1310 * we decide to grant the delegation, the delegation will get 1311 * immediately recalled (if there's a conflict), so we're safe. 1312 */ 1313 if (lm_vp_active(fp->vp)) { 1314 return (NULL); 1315 } 1316 1317 /* 1318 * Based on the type of delegation request passed in, take the 1319 * appropriate action (DELEG_NONE is handled above) 1320 */ 1321 switch (dreq) { 1322 1323 case DELEG_READ: 1324 case DELEG_WRITE: 1325 /* 1326 * The server "must" grant the delegation in this case. 1327 * Client is using open previous 1328 */ 1329 dtype = (open_delegation_type4)dreq; 1330 *recall = 1; 1331 break; 1332 case DELEG_ANY: 1333 /* 1334 * If a valid callback path does not exist, no delegation may 1335 * be granted. 1336 */ 1337 if (sp->owner->client->cbinfo.cb_state != CB_OK) 1338 return (NULL); 1339 1340 /* 1341 * If the original operation which caused time_rm_delayed 1342 * to be set hasn't been retried and completed for one 1343 * full lease period, clear it and allow delegations to 1344 * get granted again. 1345 */ 1346 if (fp->dinfo->time_rm_delayed > 0 && 1347 gethrestime_sec() > 1348 fp->dinfo->time_rm_delayed + rfs4_lease_time) 1349 fp->dinfo->time_rm_delayed = 0; 1350 1351 /* 1352 * If we are waiting for a delegation to be returned then 1353 * don't delegate this file. We do this for correctness as 1354 * well as if the file is being recalled we would likely 1355 * recall this file again. 1356 */ 1357 1358 if (fp->dinfo->time_recalled != 0 || 1359 fp->dinfo->time_rm_delayed != 0) 1360 return (NULL); 1361 1362 /* Get the "best" delegation candidate */ 1363 dtype = rfs4_check_delegation(sp, fp); 1364 1365 if (dtype == OPEN_DELEGATE_NONE) 1366 return (NULL); 1367 1368 /* 1369 * Based on policy and the history of the file get the 1370 * actual delegation. 1371 */ 1372 dtype = rfs4_delegation_policy(dtype, fp->dinfo, 1373 sp->owner->client->clientid); 1374 1375 if (dtype == OPEN_DELEGATE_NONE) 1376 return (NULL); 1377 break; 1378 default: 1379 return (NULL); 1380 } 1381 1382 /* set the delegation for the state */ 1383 return (rfs4_deleg_state(sp, dtype, recall)); 1384 } 1385 1386 void 1387 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1388 nfsace4 *ace, int recall) 1389 { 1390 open_write_delegation4 *wp; 1391 open_read_delegation4 *rp; 1392 nfs_space_limit4 *spl; 1393 nfsace4 nace; 1394 1395 /* 1396 * We need to allocate a new copy of the who string. 1397 * this string will be freed by the rfs4_op_open dis_resfree 1398 * routine. We need to do this allocation since replays will 1399 * be allocated and rfs4_compound can't tell the difference from 1400 * a replay and an inital open. N.B. if an ace is passed in, it 1401 * the caller's responsibility to free it. 1402 */ 1403 1404 if (ace == NULL) { 1405 /* 1406 * Default is to deny all access, the client will have 1407 * to contact the server. XXX Do we want to actually 1408 * set a deny for every one, or do we simply want to 1409 * construct an entity that will match no one? 1410 */ 1411 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1412 nace.flag = 0; 1413 nace.access_mask = ACE4_VALID_MASK_BITS; 1414 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1415 } else { 1416 nace.type = ace->type; 1417 nace.flag = ace->flag; 1418 nace.access_mask = ace->access_mask; 1419 (void) utf8_copy(&ace->who, &nace.who); 1420 } 1421 1422 dp->delegation_type = dsp->dtype; 1423 1424 switch (dsp->dtype) { 1425 case OPEN_DELEGATE_NONE: 1426 break; 1427 case OPEN_DELEGATE_READ: 1428 rp = &dp->open_delegation4_u.read; 1429 rp->stateid = dsp->delegid.stateid; 1430 rp->recall = (bool_t)recall; 1431 rp->permissions = nace; 1432 break; 1433 case OPEN_DELEGATE_WRITE: 1434 wp = &dp->open_delegation4_u.write; 1435 wp->stateid = dsp->delegid.stateid; 1436 wp->recall = (bool_t)recall; 1437 spl = &wp->space_limit; 1438 spl->limitby = NFS_LIMIT_SIZE; 1439 spl->nfs_space_limit4_u.filesize = 0; 1440 wp->permissions = nace; 1441 break; 1442 } 1443 } 1444 1445 /* 1446 * Check if the file is delegated via the provided file struct. 1447 * Return TRUE if it is delegated. This is intended for use by 1448 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1449 * 1450 * Note that if the file is found to have a delegation, it is 1451 * recalled, unless the clientid of the caller matches the clientid of the 1452 * delegation. If the caller has specified, there is a slight delay 1453 * inserted in the hopes that the delegation will be returned quickly. 1454 */ 1455 bool_t 1456 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1457 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1458 { 1459 rfs4_deleg_state_t *dsp; 1460 1461 /* Is delegation enabled? */ 1462 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1463 return (FALSE); 1464 1465 /* do we have a delegation on this file? */ 1466 rfs4_dbe_lock(fp->dbe); 1467 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1468 if (is_rm) 1469 fp->dinfo->hold_grant++; 1470 rfs4_dbe_unlock(fp->dbe); 1471 return (FALSE); 1472 } 1473 /* 1474 * do we have a write delegation on this file or are we 1475 * requesting write access to a file with any type of existing 1476 * delegation? 1477 */ 1478 if (mode == FWRITE || fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 1479 if (cp != NULL) { 1480 dsp = fp->delegationlist.next->dsp; 1481 if (dsp == NULL) { 1482 rfs4_dbe_unlock(fp->dbe); 1483 return (FALSE); 1484 } 1485 /* 1486 * Does the requestor already own the delegation? 1487 */ 1488 if (dsp->client->clientid == *(cp)) { 1489 rfs4_dbe_unlock(fp->dbe); 1490 return (FALSE); 1491 } 1492 } 1493 1494 rfs4_dbe_unlock(fp->dbe); 1495 rfs4_recall_deleg(fp, trunc, NULL); 1496 1497 if (!do_delay) { 1498 rfs4_dbe_lock(fp->dbe); 1499 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1500 rfs4_dbe_unlock(fp->dbe); 1501 return (TRUE); 1502 } 1503 1504 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1505 1506 rfs4_dbe_lock(fp->dbe); 1507 if (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { 1508 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1509 rfs4_dbe_unlock(fp->dbe); 1510 return (TRUE); 1511 } 1512 } 1513 if (is_rm) 1514 fp->dinfo->hold_grant++; 1515 rfs4_dbe_unlock(fp->dbe); 1516 return (FALSE); 1517 } 1518 1519 /* 1520 * Check if the file is delegated in the case of a v2 or v3 access. 1521 * Return TRUE if it is delegated which in turn means that v2 should 1522 * drop the request and in the case of v3 JUKEBOX should be returned. 1523 */ 1524 bool_t 1525 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1526 { 1527 rfs4_file_t *fp; 1528 bool_t create = FALSE; 1529 bool_t rc = FALSE; 1530 1531 rfs4_hold_deleg_policy(); 1532 1533 /* Is delegation enabled? */ 1534 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1535 fp = rfs4_findfile(vp, NULL, &create); 1536 if (fp != NULL) { 1537 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1538 TRUE, FALSE, NULL)) { 1539 rc = TRUE; 1540 } 1541 rfs4_file_rele(fp); 1542 } 1543 } 1544 rfs4_rele_deleg_policy(); 1545 return (rc); 1546 } 1547 1548 /* 1549 * Release a hold on the hold_grant counter which 1550 * prevents delegation from being granted while a remove 1551 * or a rename is in progress. 1552 */ 1553 void 1554 rfs4_clear_dont_grant(rfs4_file_t *fp) 1555 { 1556 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1557 return; 1558 rfs4_dbe_lock(fp->dbe); 1559 ASSERT(fp->dinfo->hold_grant > 0); 1560 fp->dinfo->hold_grant--; 1561 fp->dinfo->time_rm_delayed = 0; 1562 rfs4_dbe_unlock(fp->dbe); 1563 } 1564 1565 /* 1566 * State support for delegation. 1567 * Set the state delegation type for this state; 1568 * This routine is called from open via rfs4_grant_delegation and the entry 1569 * locks on sp and sp->finfo are assumed. 1570 */ 1571 static rfs4_deleg_state_t * 1572 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1573 { 1574 rfs4_file_t *fp = sp->finfo; 1575 bool_t create = TRUE; 1576 rfs4_deleg_state_t *dsp; 1577 vnode_t *vp; 1578 int open_prev = *recall; 1579 int ret; 1580 int fflags = 0; 1581 1582 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1583 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1584 1585 /* Shouldn't happen */ 1586 if (fp->dinfo->recall_count != 0 || 1587 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1588 dtype != OPEN_DELEGATE_READ)) { 1589 return (NULL); 1590 } 1591 1592 /* Unlock to avoid deadlock */ 1593 rfs4_dbe_unlock(fp->dbe); 1594 rfs4_dbe_unlock(sp->dbe); 1595 1596 dsp = rfs4_finddeleg(sp, &create); 1597 1598 rfs4_dbe_lock(sp->dbe); 1599 rfs4_dbe_lock(fp->dbe); 1600 1601 if (dsp == NULL) 1602 return (NULL); 1603 1604 /* 1605 * It is possible that since we dropped the lock 1606 * in order to call finddeleg, the rfs4_file_t 1607 * was marked such that we should not grant a 1608 * delegation, if so bail out. 1609 */ 1610 if (fp->dinfo->hold_grant > 0) { 1611 rfs4_deleg_state_rele(dsp); 1612 return (NULL); 1613 } 1614 1615 if (create == FALSE) { 1616 if (sp->owner->client == dsp->client && 1617 dsp->dtype == dtype) { 1618 return (dsp); 1619 } else { 1620 rfs4_deleg_state_rele(dsp); 1621 return (NULL); 1622 } 1623 } 1624 1625 /* 1626 * Check that this file has not been delegated to another 1627 * client 1628 */ 1629 if (fp->dinfo->recall_count != 0 || 1630 fp->dinfo->dtype == OPEN_DELEGATE_WRITE || 1631 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1632 dtype != OPEN_DELEGATE_READ)) { 1633 rfs4_deleg_state_rele(dsp); 1634 return (NULL); 1635 } 1636 1637 vp = fp->vp; 1638 /* vnevent_support returns 0 if file system supports vnevents */ 1639 if (vnevent_support(vp, NULL)) { 1640 rfs4_deleg_state_rele(dsp); 1641 return (NULL); 1642 } 1643 1644 /* Calculate the fflags for this OPEN. */ 1645 if (sp->share_access & OPEN4_SHARE_ACCESS_READ) 1646 fflags |= FREAD; 1647 if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) 1648 fflags |= FWRITE; 1649 1650 *recall = 0; 1651 /* 1652 * Before granting a delegation we need to know if anyone else has 1653 * opened the file in a conflicting mode. However, first we need to 1654 * know how we opened the file to check the counts properly. 1655 */ 1656 if (dtype == OPEN_DELEGATE_READ) { 1657 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1658 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1659 vn_is_mapped(vp, V_WRITE)) { 1660 if (open_prev) { 1661 *recall = 1; 1662 } else { 1663 rfs4_deleg_state_rele(dsp); 1664 return (NULL); 1665 } 1666 } 1667 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1668 rfs4_mon_hold, rfs4_mon_rele); 1669 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1670 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1671 vn_is_mapped(vp, V_WRITE)) { 1672 if (open_prev) { 1673 *recall = 1; 1674 } else { 1675 (void) fem_uninstall(vp, deleg_rdops, 1676 (void *)fp); 1677 rfs4_deleg_state_rele(dsp); 1678 return (NULL); 1679 } 1680 } 1681 /* 1682 * Because a client can hold onto a delegation after the 1683 * file has been closed, we need to keep track of the 1684 * access to this file. Otherwise the CIFS server would 1685 * not know about the client accessing the file and could 1686 * inappropriately grant an OPLOCK. 1687 * fem_install() returns EBUSY when asked to install a 1688 * OPUNIQ monitor more than once. Therefore, check the 1689 * return code because we only want this done once. 1690 */ 1691 if (ret == 0) 1692 vn_open_upgrade(vp, FREAD); 1693 } else { /* WRITE */ 1694 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1695 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1696 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1697 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1698 vn_is_mapped(vp, V_RDORWR)) { 1699 if (open_prev) { 1700 *recall = 1; 1701 } else { 1702 rfs4_deleg_state_rele(dsp); 1703 return (NULL); 1704 } 1705 } 1706 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1707 rfs4_mon_hold, rfs4_mon_rele); 1708 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1709 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1710 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1711 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1712 vn_is_mapped(vp, V_RDORWR)) { 1713 if (open_prev) { 1714 *recall = 1; 1715 } else { 1716 (void) fem_uninstall(vp, deleg_wrops, 1717 (void *)fp); 1718 rfs4_deleg_state_rele(dsp); 1719 return (NULL); 1720 } 1721 } 1722 /* 1723 * Because a client can hold onto a delegation after the 1724 * file has been closed, we need to keep track of the 1725 * access to this file. Otherwise the CIFS server would 1726 * not know about the client accessing the file and could 1727 * inappropriately grant an OPLOCK. 1728 * fem_install() returns EBUSY when asked to install a 1729 * OPUNIQ monitor more than once. Therefore, check the 1730 * return code because we only want this done once. 1731 */ 1732 if (ret == 0) 1733 vn_open_upgrade(vp, FREAD|FWRITE); 1734 } 1735 /* Place on delegation list for file */ 1736 insque(&dsp->delegationlist, fp->delegationlist.prev); 1737 1738 dsp->dtype = fp->dinfo->dtype = dtype; 1739 1740 /* Update delegation stats for this file */ 1741 fp->dinfo->time_lastgrant = gethrestime_sec(); 1742 1743 /* reset since this is a new delegation */ 1744 fp->dinfo->conflicted_client = 0; 1745 fp->dinfo->ever_recalled = FALSE; 1746 1747 if (dtype == OPEN_DELEGATE_READ) 1748 fp->dinfo->rdgrants++; 1749 else 1750 fp->dinfo->wrgrants++; 1751 1752 return (dsp); 1753 } 1754 1755 /* 1756 * State routine for the server when a delegation is returned. 1757 */ 1758 void 1759 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1760 { 1761 rfs4_file_t *fp = dsp->finfo; 1762 open_delegation_type4 dtypewas; 1763 1764 rfs4_dbe_lock(fp->dbe); 1765 /* Remove state from recall list */ 1766 1767 remque(&dsp->delegationlist); 1768 dsp->delegationlist.next = dsp->delegationlist.prev = 1769 &dsp->delegationlist; 1770 1771 if (&fp->delegationlist == fp->delegationlist.next) { 1772 dtypewas = fp->dinfo->dtype; 1773 fp->dinfo->dtype = OPEN_DELEGATE_NONE; 1774 rfs4_dbe_cv_broadcast(fp->dbe); 1775 1776 /* if file system was unshared, the vp will be NULL */ 1777 if (fp->vp != NULL) { 1778 /* 1779 * Once a delegation is no longer held by any client, 1780 * the monitor is uninstalled. At this point, the 1781 * client must send OPEN otw, so we don't need the 1782 * reference on the vnode anymore. The open 1783 * downgrade removes the reference put on earlier. 1784 */ 1785 if (dtypewas == OPEN_DELEGATE_READ) { 1786 (void) fem_uninstall(fp->vp, deleg_rdops, 1787 (void *)fp); 1788 vn_open_downgrade(fp->vp, FREAD); 1789 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1790 (void) fem_uninstall(fp->vp, deleg_wrops, 1791 (void *)fp); 1792 vn_open_downgrade(fp->vp, FREAD|FWRITE); 1793 } 1794 } 1795 } 1796 1797 switch (dsp->dtype) { 1798 case OPEN_DELEGATE_READ: 1799 fp->dinfo->rdgrants--; 1800 break; 1801 case OPEN_DELEGATE_WRITE: 1802 fp->dinfo->wrgrants--; 1803 break; 1804 default: 1805 break; 1806 } 1807 1808 /* used in the policy decision */ 1809 fp->dinfo->time_returned = gethrestime_sec(); 1810 1811 /* 1812 * reset the time_recalled field so future delegations are not 1813 * accidentally revoked 1814 */ 1815 if ((fp->dinfo->rdgrants + fp->dinfo->wrgrants) == 0) 1816 fp->dinfo->time_recalled = 0; 1817 1818 rfs4_dbe_unlock(fp->dbe); 1819 1820 rfs4_dbe_lock(dsp->dbe); 1821 1822 dsp->dtype = OPEN_DELEGATE_NONE; 1823 1824 if (revoked == TRUE) 1825 dsp->time_revoked = gethrestime_sec(); 1826 1827 rfs4_dbe_invalidate(dsp->dbe); 1828 1829 rfs4_dbe_unlock(dsp->dbe); 1830 1831 if (revoked == TRUE) { 1832 rfs4_dbe_lock(dsp->client->dbe); 1833 dsp->client->deleg_revoked++; /* observability */ 1834 rfs4_dbe_unlock(dsp->client->dbe); 1835 } 1836 } 1837 1838 static void 1839 rfs4_revoke_deleg(rfs4_deleg_state_t *dsp) 1840 { 1841 rfs4_return_deleg(dsp, TRUE); 1842 } 1843 1844 static void 1845 rfs4_revoke_file(rfs4_file_t *fp) 1846 { 1847 rfs4_deleg_state_t *dsp; 1848 1849 /* 1850 * The lock for rfs4_file_t must be held when traversing the 1851 * delegation list but that lock needs to be released to call 1852 * rfs4_revoke_deleg() 1853 * This for loop is set up to check the list for being empty, 1854 * and locking the rfs4_file_t struct on init and end 1855 */ 1856 for (rfs4_dbe_lock(fp->dbe); 1857 &fp->delegationlist != fp->delegationlist.next; 1858 rfs4_dbe_lock(fp->dbe)) { 1859 1860 dsp = fp->delegationlist.next->dsp; 1861 rfs4_dbe_hold(dsp->dbe); 1862 rfs4_dbe_unlock(fp->dbe); 1863 rfs4_revoke_deleg(dsp); 1864 rfs4_deleg_state_rele(dsp); 1865 } 1866 rfs4_dbe_unlock(fp->dbe); 1867 } 1868 1869 /* 1870 * A delegation is assumed to be present on the file associated with 1871 * "state". Check to see if the delegation matches is associated with 1872 * the same client as referenced by "state". If it is not, TRUE is 1873 * returned. If the delegation DOES match the client (or no 1874 * delegation is present), return FALSE. 1875 * Assume the state entry and file entry are locked. 1876 */ 1877 bool_t 1878 rfs4_is_deleg(rfs4_state_t *state) 1879 { 1880 rfs4_deleg_state_t *dsp; 1881 rfs4_file_t *fp = state->finfo; 1882 rfs4_client_t *cp = state->owner->client; 1883 1884 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1885 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1886 dsp = dsp->delegationlist.next->dsp) { 1887 if (cp != dsp->client) { 1888 return (TRUE); 1889 } 1890 } 1891 return (FALSE); 1892 } 1893 1894 void 1895 rfs4_disable_delegation(void) 1896 { 1897 mutex_enter(&rfs4_deleg_lock); 1898 rfs4_deleg_disabled++; 1899 mutex_exit(&rfs4_deleg_lock); 1900 } 1901 1902 void 1903 rfs4_enable_delegation(void) 1904 { 1905 mutex_enter(&rfs4_deleg_lock); 1906 ASSERT(rfs4_deleg_disabled > 0); 1907 rfs4_deleg_disabled--; 1908 mutex_exit(&rfs4_deleg_lock); 1909 } 1910 1911 void 1912 rfs4_mon_hold(void *arg) 1913 { 1914 rfs4_file_t *fp = arg; 1915 1916 rfs4_dbe_hold(fp->dbe); 1917 } 1918 1919 void 1920 rfs4_mon_rele(void *arg) 1921 { 1922 rfs4_file_t *fp = arg; 1923 1924 rfs4_dbe_rele_nolock(fp->dbe); 1925 } 1926