1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2018 Nexenta Systems, Inc. 29 */ 30 31 #include <sys/systm.h> 32 #include <rpc/auth.h> 33 #include <rpc/clnt.h> 34 #include <nfs/nfs4_kprot.h> 35 #include <nfs/nfs4.h> 36 #include <nfs/lm.h> 37 #include <sys/cmn_err.h> 38 #include <sys/disp.h> 39 #include <sys/sdt.h> 40 41 #include <sys/pathname.h> 42 43 #include <sys/strsubr.h> 44 #include <sys/ddi.h> 45 46 #include <sys/vnode.h> 47 #include <sys/sdt.h> 48 #include <inet/common.h> 49 #include <inet/ip.h> 50 #include <inet/ip6.h> 51 52 #define MAX_READ_DELEGATIONS 5 53 54 static int rfs4_deleg_disabled; 55 static int rfs4_max_setup_cb_tries = 5; 56 57 #ifdef DEBUG 58 59 int rfs4_cb_null; 60 int rfs4_cb_debug; 61 int rfs4_deleg_debug; 62 63 #endif 64 65 static void rfs4_recall_file(rfs4_file_t *, 66 void (*recall)(rfs4_deleg_state_t *, bool_t), 67 bool_t, rfs4_client_t *); 68 static void rfs4_revoke_file(rfs4_file_t *); 69 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 70 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 71 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 73 open_delegation_type4, int *); 74 75 /* 76 * Convert a universal address to an transport specific 77 * address using inet_pton. 78 */ 79 static int 80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 81 { 82 int dots = 0, i, j, len, k; 83 unsigned char c; 84 in_port_t port = 0; 85 86 len = strlen(ua); 87 88 for (i = len-1; i >= 0; i--) { 89 90 if (ua[i] == '.') 91 dots++; 92 93 if (dots == 2) { 94 95 ua[i] = '\0'; 96 /* 97 * We use k to remember were to stick '.' back, since 98 * ua was kmem_allocateded from the pool len+1. 99 */ 100 k = i; 101 if (inet_pton(af, ua, ap) == 1) { 102 103 c = 0; 104 105 for (j = i+1; j < len; j++) { 106 if (ua[j] == '.') { 107 port = c << 8; 108 c = 0; 109 } else if (ua[j] >= '0' && 110 ua[j] <= '9') { 111 c *= 10; 112 c += ua[j] - '0'; 113 } else { 114 ua[k] = '.'; 115 return (EINVAL); 116 } 117 } 118 port += c; 119 120 *pp = htons(port); 121 122 ua[k] = '.'; 123 return (0); 124 } else { 125 ua[k] = '.'; 126 return (EINVAL); 127 } 128 } 129 } 130 131 return (EINVAL); 132 } 133 134 /* 135 * Update the delegation policy with the 136 * value of "new_policy" 137 */ 138 void 139 rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy) 140 { 141 rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER); 142 nsrv4->nfs4_deleg_policy = new_policy; 143 rw_exit(&nsrv4->deleg_policy_lock); 144 } 145 146 void 147 rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4) 148 { 149 rw_enter(&nsrv4->deleg_policy_lock, RW_READER); 150 } 151 152 void 153 rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4) 154 { 155 rw_exit(&nsrv4->deleg_policy_lock); 156 } 157 158 srv_deleg_policy_t 159 nfs4_get_deleg_policy() 160 { 161 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 162 return (nsrv4->nfs4_deleg_policy); 163 } 164 165 166 /* 167 * This free function is to be used when the client struct is being 168 * released and nothing at all is needed of the callback info any 169 * longer. 170 */ 171 void 172 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 173 { 174 char *addr = cbp->cb_callback.cb_location.r_addr; 175 char *netid = cbp->cb_callback.cb_location.r_netid; 176 177 /* Free old address if any */ 178 179 if (addr) 180 kmem_free(addr, strlen(addr) + 1); 181 if (netid) 182 kmem_free(netid, strlen(netid) + 1); 183 184 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 185 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 186 187 if (addr) 188 kmem_free(addr, strlen(addr) + 1); 189 if (netid) 190 kmem_free(netid, strlen(netid) + 1); 191 192 if (cbp->cb_chc_free) { 193 rfs4_cb_chflush(cbp); 194 } 195 } 196 197 /* 198 * The server uses this to check the callback path supplied by the 199 * client. The callback connection is marked "in progress" while this 200 * work is going on and then eventually marked either OK or FAILED. 201 * This work can be done as part of a separate thread and at the end 202 * of this the thread will exit or it may be done such that the caller 203 * will continue with other work. 204 */ 205 static void 206 rfs4_do_cb_null(rfs4_client_t *cp) 207 { 208 struct timeval tv; 209 CLIENT *ch; 210 rfs4_cbstate_t newstate; 211 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 212 213 mutex_enter(cbp->cb_lock); 214 /* If another thread is doing CB_NULL RPC then return */ 215 if (cbp->cb_nullcaller == TRUE) { 216 mutex_exit(cbp->cb_lock); 217 rfs4_client_rele(cp); 218 zthread_exit(); 219 } 220 221 /* Mark the cbinfo as having a thread in the NULL callback */ 222 cbp->cb_nullcaller = TRUE; 223 224 /* 225 * Are there other threads still using the cbinfo client 226 * handles? If so, this thread must wait before going and 227 * mucking aroiund with the callback information 228 */ 229 while (cbp->cb_refcnt != 0) 230 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 231 232 /* 233 * This thread itself may find that new callback info has 234 * arrived and is set up to handle this case and redrive the 235 * call to the client's callback server. 236 */ 237 retry: 238 if (cbp->cb_newer.cb_new == TRUE && 239 cbp->cb_newer.cb_confirmed == TRUE) { 240 char *addr = cbp->cb_callback.cb_location.r_addr; 241 char *netid = cbp->cb_callback.cb_location.r_netid; 242 243 /* 244 * Free the old stuff if it exists; may be the first 245 * time through this path 246 */ 247 if (addr) 248 kmem_free(addr, strlen(addr) + 1); 249 if (netid) 250 kmem_free(netid, strlen(netid) + 1); 251 252 /* Move over the addr/netid */ 253 cbp->cb_callback.cb_location.r_addr = 254 cbp->cb_newer.cb_callback.cb_location.r_addr; 255 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 256 cbp->cb_callback.cb_location.r_netid = 257 cbp->cb_newer.cb_callback.cb_location.r_netid; 258 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 259 260 /* Get the program number */ 261 cbp->cb_callback.cb_program = 262 cbp->cb_newer.cb_callback.cb_program; 263 cbp->cb_newer.cb_callback.cb_program = 0; 264 265 /* Don't forget the protocol's "cb_ident" field */ 266 cbp->cb_ident = cbp->cb_newer.cb_ident; 267 cbp->cb_newer.cb_ident = 0; 268 269 /* no longer new */ 270 cbp->cb_newer.cb_new = FALSE; 271 cbp->cb_newer.cb_confirmed = FALSE; 272 273 /* get rid of the old client handles that may exist */ 274 rfs4_cb_chflush(cbp); 275 276 cbp->cb_state = CB_NONE; 277 cbp->cb_timefailed = 0; /* reset the clock */ 278 cbp->cb_notified_of_cb_path_down = TRUE; 279 } 280 281 if (cbp->cb_state != CB_NONE) { 282 cv_broadcast(cbp->cb_cv); /* let the others know */ 283 cbp->cb_nullcaller = FALSE; 284 mutex_exit(cbp->cb_lock); 285 rfs4_client_rele(cp); 286 zthread_exit(); 287 } 288 289 /* mark rfs4_client_t as CALLBACK NULL in progress */ 290 cbp->cb_state = CB_INPROG; 291 mutex_exit(cbp->cb_lock); 292 293 /* get/generate a client handle */ 294 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 295 mutex_enter(cbp->cb_lock); 296 cbp->cb_state = CB_BAD; 297 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 298 goto retry; 299 } 300 301 302 tv.tv_sec = 30; 303 tv.tv_usec = 0; 304 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 305 newstate = CB_BAD; 306 } else { 307 newstate = CB_OK; 308 #ifdef DEBUG 309 rfs4_cb_null++; 310 #endif 311 } 312 313 /* Check to see if the client has specified new callback info */ 314 mutex_enter(cbp->cb_lock); 315 rfs4_cb_freech(cbp, ch, TRUE); 316 if (cbp->cb_newer.cb_new == TRUE && 317 cbp->cb_newer.cb_confirmed == TRUE) { 318 goto retry; /* give the CB_NULL another chance */ 319 } 320 321 cbp->cb_state = newstate; 322 if (cbp->cb_state == CB_BAD) 323 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 324 325 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 326 cbp->cb_nullcaller = FALSE; 327 mutex_exit(cbp->cb_lock); 328 rfs4_client_rele(cp); 329 zthread_exit(); 330 } 331 332 /* 333 * Given a client struct, inspect the callback info to see if the 334 * callback path is up and available. 335 * 336 * If new callback path is available and no one has set it up then 337 * try to set it up. If setup is not successful after 5 tries (5 secs) 338 * then gives up and returns NULL. 339 * 340 * If callback path is being initialized, then wait for the CB_NULL RPC 341 * call to occur. 342 */ 343 static rfs4_cbinfo_t * 344 rfs4_cbinfo_hold(rfs4_client_t *cp) 345 { 346 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 347 int retries = 0; 348 349 mutex_enter(cbp->cb_lock); 350 351 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 352 /* 353 * Looks like a new callback path may be available and 354 * noone has set it up. 355 */ 356 mutex_exit(cbp->cb_lock); 357 rfs4_dbe_hold(cp->rc_dbe); 358 rfs4_do_cb_null(cp); /* caller will release client hold */ 359 360 mutex_enter(cbp->cb_lock); 361 /* 362 * If callback path is no longer new, or it's being setup 363 * then stop and wait for it to be done. 364 */ 365 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 366 break; 367 mutex_exit(cbp->cb_lock); 368 369 if (++retries >= rfs4_max_setup_cb_tries) 370 return (NULL); 371 delay(hz); 372 mutex_enter(cbp->cb_lock); 373 } 374 375 /* Is there a thread working on doing the CB_NULL RPC? */ 376 if (cbp->cb_nullcaller == TRUE) 377 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 378 379 /* If the callback path is not okay (up and running), just quit */ 380 if (cbp->cb_state != CB_OK) { 381 mutex_exit(cbp->cb_lock); 382 return (NULL); 383 } 384 385 /* Let someone know we are using the current callback info */ 386 cbp->cb_refcnt++; 387 mutex_exit(cbp->cb_lock); 388 return (cbp); 389 } 390 391 /* 392 * The caller is done with the callback info. It may be that the 393 * caller's RPC failed and the NFSv4 client has actually provided new 394 * callback information. If so, let the caller know so they can 395 * advantage of this and maybe retry the RPC that originally failed. 396 */ 397 static int 398 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 399 { 400 int cb_new = FALSE; 401 402 mutex_enter(cbp->cb_lock); 403 404 /* The caller gets a chance to mark the callback info as bad */ 405 if (newstate != CB_NOCHANGE) 406 cbp->cb_state = newstate; 407 if (newstate == CB_FAILED) { 408 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 409 cbp->cb_notified_of_cb_path_down = FALSE; 410 } 411 412 cbp->cb_refcnt--; /* no longer using the information */ 413 414 /* 415 * A thread may be waiting on this one to finish and if so, 416 * let it know that it is okay to do the CB_NULL to the 417 * client's callback server. 418 */ 419 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 420 cv_broadcast(cbp->cb_cv_nullcaller); 421 422 /* 423 * If this is the last thread to use the callback info and 424 * there is new callback information to try and no thread is 425 * there ready to do the CB_NULL, then return true to teh 426 * caller so they can do the CB_NULL 427 */ 428 if (cbp->cb_refcnt == 0 && 429 cbp->cb_nullcaller == FALSE && 430 cbp->cb_newer.cb_new == TRUE && 431 cbp->cb_newer.cb_confirmed == TRUE) 432 cb_new = TRUE; 433 434 mutex_exit(cbp->cb_lock); 435 436 return (cb_new); 437 } 438 439 /* 440 * Given the information in the callback info struct, create a client 441 * handle that can be used by the server for its callback path. 442 */ 443 static CLIENT * 444 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 445 { 446 struct knetconfig knc; 447 vnode_t *vp; 448 struct sockaddr_in addr4; 449 struct sockaddr_in6 addr6; 450 void *addr, *taddr; 451 in_port_t *pp; 452 int af; 453 char *devnam; 454 struct netbuf nb; 455 int size; 456 CLIENT *ch = NULL; 457 int useresvport = 0; 458 459 mutex_enter(cbp->cb_lock); 460 461 if (cbp->cb_callback.cb_location.r_netid == NULL || 462 cbp->cb_callback.cb_location.r_addr == NULL) { 463 goto cb_init_out; 464 } 465 466 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 467 knc.knc_semantics = NC_TPI_COTS; 468 knc.knc_protofmly = "inet"; 469 knc.knc_proto = "tcp"; 470 devnam = "/dev/tcp"; 471 af = AF_INET; 472 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 473 == 0) { 474 knc.knc_semantics = NC_TPI_CLTS; 475 knc.knc_protofmly = "inet"; 476 knc.knc_proto = "udp"; 477 devnam = "/dev/udp"; 478 af = AF_INET; 479 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 480 == 0) { 481 knc.knc_semantics = NC_TPI_COTS; 482 knc.knc_protofmly = "inet6"; 483 knc.knc_proto = "tcp"; 484 devnam = "/dev/tcp6"; 485 af = AF_INET6; 486 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 487 == 0) { 488 knc.knc_semantics = NC_TPI_CLTS; 489 knc.knc_protofmly = "inet6"; 490 knc.knc_proto = "udp"; 491 devnam = "/dev/udp6"; 492 af = AF_INET6; 493 } else { 494 goto cb_init_out; 495 } 496 497 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 498 499 goto cb_init_out; 500 } 501 502 if (vp->v_type != VCHR) { 503 VN_RELE(vp); 504 goto cb_init_out; 505 } 506 507 knc.knc_rdev = vp->v_rdev; 508 509 VN_RELE(vp); 510 511 if (af == AF_INET) { 512 size = sizeof (addr4); 513 bzero(&addr4, size); 514 addr4.sin_family = (sa_family_t)af; 515 addr = &addr4.sin_addr; 516 pp = &addr4.sin_port; 517 taddr = &addr4; 518 } else /* AF_INET6 */ { 519 size = sizeof (addr6); 520 bzero(&addr6, size); 521 addr6.sin6_family = (sa_family_t)af; 522 addr = &addr6.sin6_addr; 523 pp = &addr6.sin6_port; 524 taddr = &addr6; 525 } 526 527 if (uaddr2sockaddr(af, 528 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 529 530 goto cb_init_out; 531 } 532 533 534 nb.maxlen = nb.len = size; 535 nb.buf = (char *)taddr; 536 537 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 538 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 539 540 ch = NULL; 541 } 542 543 /* turn off reserved port usage */ 544 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 545 546 cb_init_out: 547 mutex_exit(cbp->cb_lock); 548 return (ch); 549 } 550 551 /* 552 * Iterate over the client handle cache and 553 * destroy it. 554 */ 555 static void 556 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 557 { 558 CLIENT *ch; 559 560 while (cbp->cb_chc_free) { 561 cbp->cb_chc_free--; 562 ch = cbp->cb_chc[cbp->cb_chc_free]; 563 cbp->cb_chc[cbp->cb_chc_free] = NULL; 564 if (ch) { 565 if (ch->cl_auth) 566 auth_destroy(ch->cl_auth); 567 clnt_destroy(ch); 568 } 569 } 570 } 571 572 /* 573 * Return a client handle, either from a the small 574 * rfs4_client_t cache or one that we just created. 575 */ 576 static CLIENT * 577 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 578 { 579 CLIENT *cbch = NULL; 580 uint32_t zilch = 0; 581 582 mutex_enter(cbp->cb_lock); 583 584 if (cbp->cb_chc_free) { 585 cbp->cb_chc_free--; 586 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 587 mutex_exit(cbp->cb_lock); 588 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 589 return (cbch); 590 } 591 592 mutex_exit(cbp->cb_lock); 593 594 /* none free so make it now */ 595 cbch = rfs4_cbch_init(cbp); 596 597 return (cbch); 598 } 599 600 /* 601 * Return the client handle to the small cache or 602 * destroy it. 603 */ 604 static void 605 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 606 { 607 if (lockheld == FALSE) 608 mutex_enter(cbp->cb_lock); 609 610 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 611 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 612 if (lockheld == FALSE) 613 mutex_exit(cbp->cb_lock); 614 return; 615 } 616 if (lockheld == FALSE) 617 mutex_exit(cbp->cb_lock); 618 619 /* 620 * cache maxed out of free entries, obliterate 621 * this client handle, destroy it, throw it away. 622 */ 623 if (ch->cl_auth) 624 auth_destroy(ch->cl_auth); 625 clnt_destroy(ch); 626 } 627 628 /* 629 * With the supplied callback information - initialize the client 630 * callback data. If there is a callback in progress, save the 631 * callback info so that a thread can pick it up in the future. 632 */ 633 void 634 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 635 { 636 char *addr = NULL; 637 char *netid = NULL; 638 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 639 size_t len; 640 641 /* Set the call back for the client */ 642 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 643 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 644 len = strlen(cb->cb_location.r_addr) + 1; 645 addr = kmem_alloc(len, KM_SLEEP); 646 bcopy(cb->cb_location.r_addr, addr, len); 647 len = strlen(cb->cb_location.r_netid) + 1; 648 netid = kmem_alloc(len, KM_SLEEP); 649 bcopy(cb->cb_location.r_netid, netid, len); 650 } 651 /* ready to save the new information but first free old, if exists */ 652 mutex_enter(cbp->cb_lock); 653 654 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 655 656 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 657 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 658 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 659 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 660 661 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 662 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 663 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 664 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 665 666 cbp->cb_newer.cb_ident = cb_ident; 667 668 if (addr && *addr && netid && *netid) { 669 cbp->cb_newer.cb_new = TRUE; 670 cbp->cb_newer.cb_confirmed = FALSE; 671 } else { 672 cbp->cb_newer.cb_new = FALSE; 673 cbp->cb_newer.cb_confirmed = FALSE; 674 } 675 676 mutex_exit(cbp->cb_lock); 677 } 678 679 /* 680 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 681 * information may have been provided on SETCLIENTID and this call 682 * marks that information as confirmed and then starts a thread to 683 * test the callback path. 684 */ 685 void 686 rfs4_deleg_cb_check(rfs4_client_t *cp) 687 { 688 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE) 689 return; 690 691 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE; 692 693 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ 694 695 (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0, 696 minclsyspri); 697 } 698 699 static void 700 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 701 { 702 CB_RECALL4args *rec_argp; 703 704 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 705 if (rec_argp->fh.nfs_fh4_val) 706 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 707 } 708 709 /* ARGSUSED */ 710 static void 711 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 712 { 713 CB_GETATTR4args *argp; 714 715 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 716 if (argp->fh.nfs_fh4_val) 717 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 718 } 719 720 static void 721 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 722 { 723 int i, arglen; 724 nfs_cb_argop4 *argop; 725 726 /* 727 * First free any special args alloc'd for specific ops. 728 */ 729 arglen = args->array_len; 730 argop = args->array; 731 for (i = 0; i < arglen; i++, argop++) { 732 733 switch (argop->argop) { 734 case OP_CB_RECALL: 735 rfs4args_cb_recall_free(argop); 736 break; 737 738 case OP_CB_GETATTR: 739 rfs4args_cb_getattr_free(argop); 740 break; 741 742 default: 743 return; 744 } 745 } 746 747 if (args->tag.utf8string_len > 0) 748 UTF8STRING_FREE(args->tag) 749 750 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 751 if (resp) 752 xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 753 } 754 755 /* 756 * General callback routine for the server to the client. 757 */ 758 static enum clnt_stat 759 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 760 CB_COMPOUND4res *res, struct timeval timeout) 761 { 762 rfs4_cbinfo_t *cbp; 763 CLIENT *ch; 764 /* start with this in case cb_getch() fails */ 765 enum clnt_stat stat = RPC_FAILED; 766 767 res->tag.utf8string_val = NULL; 768 res->array = NULL; 769 770 retry: 771 cbp = rfs4_cbinfo_hold(cp); 772 if (cbp == NULL) 773 return (stat); 774 775 /* get a client handle */ 776 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 777 /* 778 * reset the cb_ident since it may have changed in 779 * rfs4_cbinfo_hold() 780 */ 781 args->callback_ident = cbp->cb_ident; 782 783 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 784 (caddr_t)args, xdr_CB_COMPOUND4res, 785 (caddr_t)res, timeout); 786 787 /* free client handle */ 788 rfs4_cb_freech(cbp, ch, FALSE); 789 } 790 791 /* 792 * If the rele says that there may be new callback info then 793 * retry this sequence and it may succeed as a result of the 794 * new callback path 795 */ 796 if (rfs4_cbinfo_rele(cbp, 797 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 798 goto retry; 799 800 return (stat); 801 } 802 803 /* 804 * Used by the NFSv4 server to get attributes for a file while 805 * handling the case where a file has been write delegated. For the 806 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 807 * not undertaken. This call site is maintained in case the server is 808 * updated in the future to handle write delegation space guarantees. 809 */ 810 nfsstat4 811 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 812 { 813 814 int error; 815 816 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 817 return (puterrno4(error)); 818 } 819 820 /* 821 * This is used everywhere in the v2/v3 server to allow the 822 * integration of all NFS versions and the support of delegation. For 823 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 824 * in the future to provide space guarantees for write delegations 825 * then this call site should be expanded to interact with the client. 826 */ 827 int 828 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 829 { 830 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 831 } 832 833 /* 834 * Place the actual cb_recall otw call to client. 835 */ 836 static void 837 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 838 { 839 CB_COMPOUND4args cb4_args; 840 CB_COMPOUND4res cb4_res; 841 CB_RECALL4args *rec_argp; 842 CB_RECALL4res *rec_resp; 843 nfs_cb_argop4 *argop; 844 int numops; 845 int argoplist_size; 846 struct timeval timeout; 847 nfs_fh4 *fhp; 848 enum clnt_stat call_stat; 849 850 /* 851 * set up the compound args 852 */ 853 numops = 1; /* CB_RECALL only */ 854 855 argoplist_size = numops * sizeof (nfs_cb_argop4); 856 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 857 argop->argop = OP_CB_RECALL; 858 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 859 860 (void) str_to_utf8("cb_recall", &cb4_args.tag); 861 cb4_args.minorversion = CB4_MINORVERSION; 862 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 863 cb4_args.array_len = numops; 864 cb4_args.array = argop; 865 866 /* 867 * fill in the args struct 868 */ 869 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 870 rec_argp->truncate = trunc; 871 872 fhp = &dsp->rds_finfo->rf_filehandle; 873 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 874 fhp->nfs_fh4_len, KM_SLEEP); 875 nfs_fh4_copy(fhp, &rec_argp->fh); 876 877 /* Keep track of when we did this for observability */ 878 dsp->rds_time_recalled = gethrestime_sec(); 879 880 /* 881 * Set up the timeout for the callback and make the actual call. 882 * Timeout will be 80% of the lease period for this server. 883 */ 884 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 885 timeout.tv_usec = 0; 886 887 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client, 888 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 889 890 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res, 891 timeout); 892 893 rec_resp = (cb4_res.array_len == 0) ? NULL : 894 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 895 896 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client, 897 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 898 899 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 900 rfs4_return_deleg(dsp, TRUE); 901 } 902 903 rfs4freeargres(&cb4_args, &cb4_res); 904 } 905 906 struct recall_arg { 907 rfs4_deleg_state_t *dsp; 908 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 909 bool_t trunc; 910 }; 911 912 static void 913 do_recall(struct recall_arg *arg) 914 { 915 rfs4_deleg_state_t *dsp = arg->dsp; 916 rfs4_file_t *fp = dsp->rds_finfo; 917 callb_cpr_t cpr_info; 918 kmutex_t cpr_lock; 919 920 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 921 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 922 923 /* 924 * It is possible that before this thread starts 925 * the client has send us a return_delegation, and 926 * if that is the case we do not need to send the 927 * recall callback. 928 */ 929 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) { 930 DTRACE_PROBE3(nfss__i__recall, 931 struct recall_arg *, arg, 932 struct rfs4_deleg_state_t *, dsp, 933 struct rfs4_file_t *, fp); 934 935 if (arg->recall) 936 (void) (*arg->recall)(dsp, arg->trunc); 937 } 938 939 mutex_enter(fp->rf_dinfo.rd_recall_lock); 940 /* 941 * Recall count may go negative if the parent thread that is 942 * creating the individual callback threads does not modify 943 * the recall_count field before the callback thread actually 944 * gets a response from the CB_RECALL 945 */ 946 fp->rf_dinfo.rd_recall_count--; 947 if (fp->rf_dinfo.rd_recall_count == 0) 948 cv_signal(fp->rf_dinfo.rd_recall_cv); 949 mutex_exit(fp->rf_dinfo.rd_recall_lock); 950 951 mutex_enter(&cpr_lock); 952 CALLB_CPR_EXIT(&cpr_info); 953 mutex_destroy(&cpr_lock); 954 955 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 956 kmem_free(arg, sizeof (struct recall_arg)); 957 zthread_exit(); 958 } 959 960 struct master_recall_args { 961 rfs4_file_t *fp; 962 void (*recall)(rfs4_deleg_state_t *, bool_t); 963 bool_t trunc; 964 }; 965 966 static void 967 do_recall_file(struct master_recall_args *map) 968 { 969 rfs4_file_t *fp = map->fp; 970 rfs4_deleg_state_t *dsp; 971 struct recall_arg *arg; 972 callb_cpr_t cpr_info; 973 kmutex_t cpr_lock; 974 int32_t recall_count; 975 976 rfs4_dbe_lock(fp->rf_dbe); 977 978 /* Recall already in progress ? */ 979 mutex_enter(fp->rf_dinfo.rd_recall_lock); 980 if (fp->rf_dinfo.rd_recall_count != 0) { 981 mutex_exit(fp->rf_dinfo.rd_recall_lock); 982 rfs4_dbe_rele_nolock(fp->rf_dbe); 983 rfs4_dbe_unlock(fp->rf_dbe); 984 kmem_free(map, sizeof (struct master_recall_args)); 985 zthread_exit(); 986 } 987 988 mutex_exit(fp->rf_dinfo.rd_recall_lock); 989 990 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 991 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 992 993 recall_count = 0; 994 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 995 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 996 997 rfs4_dbe_lock(dsp->rds_dbe); 998 /* 999 * if this delegation state 1000 * is being reaped skip it 1001 */ 1002 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) { 1003 rfs4_dbe_unlock(dsp->rds_dbe); 1004 continue; 1005 } 1006 1007 /* hold for receiving thread */ 1008 rfs4_dbe_hold(dsp->rds_dbe); 1009 rfs4_dbe_unlock(dsp->rds_dbe); 1010 1011 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1012 arg->recall = map->recall; 1013 arg->trunc = map->trunc; 1014 arg->dsp = dsp; 1015 1016 recall_count++; 1017 1018 (void) zthread_create(NULL, 0, do_recall, arg, 0, 1019 minclsyspri); 1020 } 1021 1022 rfs4_dbe_unlock(fp->rf_dbe); 1023 1024 mutex_enter(fp->rf_dinfo.rd_recall_lock); 1025 /* 1026 * Recall count may go negative if the parent thread that is 1027 * creating the individual callback threads does not modify 1028 * the recall_count field before the callback thread actually 1029 * gets a response from the CB_RECALL 1030 */ 1031 fp->rf_dinfo.rd_recall_count += recall_count; 1032 while (fp->rf_dinfo.rd_recall_count) 1033 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock); 1034 1035 mutex_exit(fp->rf_dinfo.rd_recall_lock); 1036 1037 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1038 rfs4_file_rele(fp); 1039 kmem_free(map, sizeof (struct master_recall_args)); 1040 mutex_enter(&cpr_lock); 1041 CALLB_CPR_EXIT(&cpr_info); 1042 mutex_destroy(&cpr_lock); 1043 zthread_exit(); 1044 } 1045 1046 static void 1047 rfs4_recall_file(rfs4_file_t *fp, 1048 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1049 bool_t trunc, rfs4_client_t *cp) 1050 { 1051 struct master_recall_args *args; 1052 1053 rfs4_dbe_lock(fp->rf_dbe); 1054 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1055 rfs4_dbe_unlock(fp->rf_dbe); 1056 return; 1057 } 1058 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */ 1059 1060 /* 1061 * Mark the time we started the recall processing. 1062 * If it has been previously recalled, do not reset the 1063 * timer since this is used for the revocation decision. 1064 */ 1065 if (fp->rf_dinfo.rd_time_recalled == 0) 1066 fp->rf_dinfo.rd_time_recalled = gethrestime_sec(); 1067 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */ 1068 /* Client causing recall not always available */ 1069 if (cp) 1070 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid; 1071 1072 rfs4_dbe_unlock(fp->rf_dbe); 1073 1074 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1075 args->fp = fp; 1076 args->recall = recall; 1077 args->trunc = trunc; 1078 1079 (void) zthread_create(NULL, 0, do_recall_file, args, 0, 1080 minclsyspri); 1081 } 1082 1083 void 1084 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1085 { 1086 time_t elapsed1, elapsed2; 1087 1088 if (fp->rf_dinfo.rd_time_recalled != 0) { 1089 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled; 1090 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite; 1091 /* First check to see if a revocation should occur */ 1092 if (elapsed1 > rfs4_lease_time && 1093 elapsed2 > rfs4_lease_time) { 1094 rfs4_revoke_file(fp); 1095 return; 1096 } 1097 /* 1098 * Next check to see if a recall should be done again 1099 * so quickly. 1100 */ 1101 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1102 return; 1103 } 1104 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1105 } 1106 1107 /* 1108 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1109 * open conflicts with the delegation. 1110 * Return true if we need recall otherwise false. 1111 * Assumes entry locks for sp and sp->rs_finfo are held. 1112 */ 1113 bool_t 1114 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1115 { 1116 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype; 1117 1118 switch (dtype) { 1119 case OPEN_DELEGATE_NONE: 1120 /* Not currently delegated so there is nothing to do */ 1121 return (FALSE); 1122 case OPEN_DELEGATE_READ: 1123 /* 1124 * If the access is only asking for READ then there is 1125 * no conflict and nothing to do. If it is asking 1126 * for write, then there will be conflict and the read 1127 * delegation should be recalled. 1128 */ 1129 if (access == OPEN4_SHARE_ACCESS_READ) 1130 return (FALSE); 1131 else 1132 return (TRUE); 1133 case OPEN_DELEGATE_WRITE: 1134 /* Check to see if this client has the delegation */ 1135 return (rfs4_is_deleg(sp)); 1136 } 1137 1138 return (FALSE); 1139 } 1140 1141 /* 1142 * Return the "best" allowable delegation available given the current 1143 * delegation type and the desired access and deny modes on the file. 1144 * At the point that this routine is called we know that the access and 1145 * deny modes are consistent with the file modes. 1146 */ 1147 static open_delegation_type4 1148 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1149 { 1150 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype; 1151 uint32_t access = sp->rs_share_access; 1152 uint32_t deny = sp->rs_share_deny; 1153 int readcnt = 0; 1154 int writecnt = 0; 1155 1156 switch (dtype) { 1157 case OPEN_DELEGATE_NONE: 1158 /* 1159 * Determine if more than just this OPEN have the file 1160 * open and if so, no delegation may be provided to 1161 * the client. 1162 */ 1163 if (access & OPEN4_SHARE_ACCESS_WRITE) 1164 writecnt++; 1165 if (access & OPEN4_SHARE_ACCESS_READ) 1166 readcnt++; 1167 1168 if (fp->rf_access_read > readcnt || 1169 fp->rf_access_write > writecnt) 1170 return (OPEN_DELEGATE_NONE); 1171 1172 /* 1173 * If the client is going to write, or if the client 1174 * has exclusive access, return a write delegation. 1175 */ 1176 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1177 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1178 return (OPEN_DELEGATE_WRITE); 1179 /* 1180 * If we don't want to write or we've haven't denied read 1181 * access to others, return a read delegation. 1182 */ 1183 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1184 (deny & ~OPEN4_SHARE_DENY_READ)) 1185 return (OPEN_DELEGATE_READ); 1186 1187 /* Shouldn't get here */ 1188 return (OPEN_DELEGATE_NONE); 1189 1190 case OPEN_DELEGATE_READ: 1191 /* 1192 * If the file is delegated for read but we wan't to 1193 * write or deny others to read then we can't delegate 1194 * the file. We shouldn't get here since the delegation should 1195 * have been recalled already. 1196 */ 1197 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1198 (deny & OPEN4_SHARE_DENY_READ)) 1199 return (OPEN_DELEGATE_NONE); 1200 return (OPEN_DELEGATE_READ); 1201 1202 case OPEN_DELEGATE_WRITE: 1203 return (OPEN_DELEGATE_WRITE); 1204 } 1205 1206 /* Shouldn't get here */ 1207 return (OPEN_DELEGATE_NONE); 1208 } 1209 1210 /* 1211 * Given the desired delegation type and the "history" of the file 1212 * determine the actual delegation type to return. 1213 */ 1214 static open_delegation_type4 1215 rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype, 1216 rfs4_dinfo_t *dinfo, clientid4 cid) 1217 { 1218 time_t elapsed; 1219 1220 if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1221 return (OPEN_DELEGATE_NONE); 1222 1223 /* 1224 * Has this file/delegation ever been recalled? If not then 1225 * no further checks for a delegation race need to be done. 1226 * However if a recall has occurred, then check to see if a 1227 * client has caused its own delegation recall to occur. If 1228 * not, then has a delegation for this file been returned 1229 * recently? If so, then do not assign a new delegation to 1230 * avoid a "delegation race" between the original client and 1231 * the new/conflicting client. 1232 */ 1233 if (dinfo->rd_ever_recalled == TRUE) { 1234 if (dinfo->rd_conflicted_client != cid) { 1235 elapsed = gethrestime_sec() - dinfo->rd_time_returned; 1236 if (elapsed < rfs4_lease_time) 1237 return (OPEN_DELEGATE_NONE); 1238 } 1239 } 1240 1241 /* Limit the number of read grants */ 1242 if (dtype == OPEN_DELEGATE_READ && 1243 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS) 1244 return (OPEN_DELEGATE_NONE); 1245 1246 /* 1247 * Should consider limiting total number of read/write 1248 * delegations the server will permit. 1249 */ 1250 1251 return (dtype); 1252 } 1253 1254 /* 1255 * Try and grant a delegation for an open give the state. The routine 1256 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1257 * 1258 * The state and associate file entry must be locked 1259 */ 1260 rfs4_deleg_state_t * 1261 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1262 { 1263 nfs4_srv_t *nsrv4; 1264 rfs4_file_t *fp = sp->rs_finfo; 1265 open_delegation_type4 dtype; 1266 int no_delegation; 1267 1268 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1269 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1270 1271 nsrv4 = nfs4_get_srv(); 1272 1273 /* Is the server even providing delegations? */ 1274 if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE || 1275 dreq == DELEG_NONE) { 1276 return (NULL); 1277 } 1278 1279 /* Check to see if delegations have been temporarily disabled */ 1280 mutex_enter(&nsrv4->deleg_lock); 1281 no_delegation = rfs4_deleg_disabled; 1282 mutex_exit(&nsrv4->deleg_lock); 1283 1284 if (no_delegation) 1285 return (NULL); 1286 1287 /* Don't grant a delegation if a deletion is impending. */ 1288 if (fp->rf_dinfo.rd_hold_grant > 0) { 1289 return (NULL); 1290 } 1291 1292 /* 1293 * Don't grant a delegation if there are any lock manager 1294 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1295 * if there are only read locks we should be able to grant a 1296 * read-only delegation), but it's good enough for now. 1297 * 1298 * MT safety: the lock manager checks for conflicting delegations 1299 * before processing a lock request. That check will block until 1300 * we are done here. So if the lock manager acquires a lock after 1301 * we decide to grant the delegation, the delegation will get 1302 * immediately recalled (if there's a conflict), so we're safe. 1303 */ 1304 if (lm_vp_active(fp->rf_vp)) { 1305 return (NULL); 1306 } 1307 1308 /* 1309 * Based on the type of delegation request passed in, take the 1310 * appropriate action (DELEG_NONE is handled above) 1311 */ 1312 switch (dreq) { 1313 1314 case DELEG_READ: 1315 case DELEG_WRITE: 1316 /* 1317 * The server "must" grant the delegation in this case. 1318 * Client is using open previous 1319 */ 1320 dtype = (open_delegation_type4)dreq; 1321 *recall = 1; 1322 break; 1323 case DELEG_ANY: 1324 /* 1325 * If a valid callback path does not exist, no delegation may 1326 * be granted. 1327 */ 1328 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK) 1329 return (NULL); 1330 1331 /* 1332 * If the original operation which caused time_rm_delayed 1333 * to be set hasn't been retried and completed for one 1334 * full lease period, clear it and allow delegations to 1335 * get granted again. 1336 */ 1337 if (fp->rf_dinfo.rd_time_rm_delayed > 0 && 1338 gethrestime_sec() > 1339 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time) 1340 fp->rf_dinfo.rd_time_rm_delayed = 0; 1341 1342 /* 1343 * If we are waiting for a delegation to be returned then 1344 * don't delegate this file. We do this for correctness as 1345 * well as if the file is being recalled we would likely 1346 * recall this file again. 1347 */ 1348 1349 if (fp->rf_dinfo.rd_time_recalled != 0 || 1350 fp->rf_dinfo.rd_time_rm_delayed != 0) 1351 return (NULL); 1352 1353 /* Get the "best" delegation candidate */ 1354 dtype = rfs4_check_delegation(sp, fp); 1355 1356 if (dtype == OPEN_DELEGATE_NONE) 1357 return (NULL); 1358 1359 /* 1360 * Based on policy and the history of the file get the 1361 * actual delegation. 1362 */ 1363 dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo, 1364 sp->rs_owner->ro_client->rc_clientid); 1365 1366 if (dtype == OPEN_DELEGATE_NONE) 1367 return (NULL); 1368 break; 1369 default: 1370 return (NULL); 1371 } 1372 1373 /* set the delegation for the state */ 1374 return (rfs4_deleg_state(sp, dtype, recall)); 1375 } 1376 1377 void 1378 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1379 nfsace4 *ace, int recall) 1380 { 1381 open_write_delegation4 *wp; 1382 open_read_delegation4 *rp; 1383 nfs_space_limit4 *spl; 1384 nfsace4 nace; 1385 1386 /* 1387 * We need to allocate a new copy of the who string. 1388 * this string will be freed by the rfs4_op_open dis_resfree 1389 * routine. We need to do this allocation since replays will 1390 * be allocated and rfs4_compound can't tell the difference from 1391 * a replay and an inital open. N.B. if an ace is passed in, it 1392 * the caller's responsibility to free it. 1393 */ 1394 1395 if (ace == NULL) { 1396 /* 1397 * Default is to deny all access, the client will have 1398 * to contact the server. XXX Do we want to actually 1399 * set a deny for every one, or do we simply want to 1400 * construct an entity that will match no one? 1401 */ 1402 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1403 nace.flag = 0; 1404 nace.access_mask = ACE4_VALID_MASK_BITS; 1405 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1406 } else { 1407 nace.type = ace->type; 1408 nace.flag = ace->flag; 1409 nace.access_mask = ace->access_mask; 1410 (void) utf8_copy(&ace->who, &nace.who); 1411 } 1412 1413 dp->delegation_type = dsp->rds_dtype; 1414 1415 switch (dsp->rds_dtype) { 1416 case OPEN_DELEGATE_NONE: 1417 break; 1418 case OPEN_DELEGATE_READ: 1419 rp = &dp->open_delegation4_u.read; 1420 rp->stateid = dsp->rds_delegid.stateid; 1421 rp->recall = (bool_t)recall; 1422 rp->permissions = nace; 1423 break; 1424 case OPEN_DELEGATE_WRITE: 1425 wp = &dp->open_delegation4_u.write; 1426 wp->stateid = dsp->rds_delegid.stateid; 1427 wp->recall = (bool_t)recall; 1428 spl = &wp->space_limit; 1429 spl->limitby = NFS_LIMIT_SIZE; 1430 spl->nfs_space_limit4_u.filesize = 0; 1431 wp->permissions = nace; 1432 break; 1433 } 1434 } 1435 1436 /* 1437 * Check if the file is delegated via the provided file struct. 1438 * Return TRUE if it is delegated. This is intended for use by 1439 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1440 * 1441 * Note that if the file is found to have a delegation, it is 1442 * recalled, unless the clientid of the caller matches the clientid of the 1443 * delegation. If the caller has specified, there is a slight delay 1444 * inserted in the hopes that the delegation will be returned quickly. 1445 */ 1446 bool_t 1447 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1448 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1449 { 1450 rfs4_deleg_state_t *dsp; 1451 1452 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 1453 1454 /* Is delegation enabled? */ 1455 if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) 1456 return (FALSE); 1457 1458 /* do we have a delegation on this file? */ 1459 rfs4_dbe_lock(fp->rf_dbe); 1460 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1461 if (is_rm) 1462 fp->rf_dinfo.rd_hold_grant++; 1463 rfs4_dbe_unlock(fp->rf_dbe); 1464 return (FALSE); 1465 } 1466 /* 1467 * do we have a write delegation on this file or are we 1468 * requesting write access to a file with any type of existing 1469 * delegation? 1470 */ 1471 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) { 1472 if (cp != NULL) { 1473 dsp = list_head(&fp->rf_delegstatelist); 1474 if (dsp == NULL) { 1475 rfs4_dbe_unlock(fp->rf_dbe); 1476 return (FALSE); 1477 } 1478 /* 1479 * Does the requestor already own the delegation? 1480 */ 1481 if (dsp->rds_client->rc_clientid == *(cp)) { 1482 rfs4_dbe_unlock(fp->rf_dbe); 1483 return (FALSE); 1484 } 1485 } 1486 1487 rfs4_dbe_unlock(fp->rf_dbe); 1488 rfs4_recall_deleg(fp, trunc, NULL); 1489 1490 if (!do_delay) { 1491 rfs4_dbe_lock(fp->rf_dbe); 1492 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1493 rfs4_dbe_unlock(fp->rf_dbe); 1494 return (TRUE); 1495 } 1496 1497 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1498 1499 rfs4_dbe_lock(fp->rf_dbe); 1500 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) { 1501 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1502 rfs4_dbe_unlock(fp->rf_dbe); 1503 return (TRUE); 1504 } 1505 } 1506 if (is_rm) 1507 fp->rf_dinfo.rd_hold_grant++; 1508 rfs4_dbe_unlock(fp->rf_dbe); 1509 return (FALSE); 1510 } 1511 1512 /* 1513 * Check if the file is delegated in the case of a v2 or v3 access. 1514 * Return TRUE if it is delegated which in turn means that v2 should 1515 * drop the request and in the case of v3 JUKEBOX should be returned. 1516 */ 1517 bool_t 1518 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1519 { 1520 nfs4_srv_t *nsrv4; 1521 rfs4_file_t *fp; 1522 bool_t create = FALSE; 1523 bool_t rc = FALSE; 1524 1525 nsrv4 = nfs4_get_srv(); 1526 rfs4_hold_deleg_policy(nsrv4); 1527 1528 /* Is delegation enabled? */ 1529 if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1530 fp = rfs4_findfile(vp, NULL, &create); 1531 if (fp != NULL) { 1532 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1533 TRUE, FALSE, NULL)) { 1534 rc = TRUE; 1535 } 1536 rfs4_file_rele(fp); 1537 } 1538 } 1539 rfs4_rele_deleg_policy(nsrv4); 1540 return (rc); 1541 } 1542 1543 /* 1544 * Release a hold on the hold_grant counter which 1545 * prevents delegation from being granted while a remove 1546 * or a rename is in progress. 1547 */ 1548 void 1549 rfs4_clear_dont_grant(rfs4_file_t *fp) 1550 { 1551 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 1552 1553 if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) 1554 return; 1555 rfs4_dbe_lock(fp->rf_dbe); 1556 ASSERT(fp->rf_dinfo.rd_hold_grant > 0); 1557 fp->rf_dinfo.rd_hold_grant--; 1558 fp->rf_dinfo.rd_time_rm_delayed = 0; 1559 rfs4_dbe_unlock(fp->rf_dbe); 1560 } 1561 1562 /* 1563 * State support for delegation. 1564 * Set the state delegation type for this state; 1565 * This routine is called from open via rfs4_grant_delegation and the entry 1566 * locks on sp and sp->rs_finfo are assumed. 1567 */ 1568 static rfs4_deleg_state_t * 1569 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1570 { 1571 rfs4_file_t *fp = sp->rs_finfo; 1572 bool_t create = TRUE; 1573 rfs4_deleg_state_t *dsp; 1574 vnode_t *vp; 1575 int open_prev = *recall; 1576 int ret; 1577 int fflags = 0; 1578 1579 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1580 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1581 1582 /* Shouldn't happen */ 1583 if (fp->rf_dinfo.rd_recall_count != 0 || 1584 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1585 dtype != OPEN_DELEGATE_READ)) { 1586 return (NULL); 1587 } 1588 1589 /* Unlock to avoid deadlock */ 1590 rfs4_dbe_unlock(fp->rf_dbe); 1591 rfs4_dbe_unlock(sp->rs_dbe); 1592 1593 dsp = rfs4_finddeleg(sp, &create); 1594 1595 rfs4_dbe_lock(sp->rs_dbe); 1596 rfs4_dbe_lock(fp->rf_dbe); 1597 1598 if (dsp == NULL) 1599 return (NULL); 1600 1601 /* 1602 * It is possible that since we dropped the lock 1603 * in order to call finddeleg, the rfs4_file_t 1604 * was marked such that we should not grant a 1605 * delegation, if so bail out. 1606 */ 1607 if (fp->rf_dinfo.rd_hold_grant > 0) { 1608 rfs4_deleg_state_rele(dsp); 1609 return (NULL); 1610 } 1611 1612 if (create == FALSE) { 1613 if (sp->rs_owner->ro_client == dsp->rds_client && 1614 dsp->rds_dtype == dtype) { 1615 return (dsp); 1616 } else { 1617 rfs4_deleg_state_rele(dsp); 1618 return (NULL); 1619 } 1620 } 1621 1622 /* 1623 * Check that this file has not been delegated to another 1624 * client 1625 */ 1626 if (fp->rf_dinfo.rd_recall_count != 0 || 1627 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE || 1628 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1629 dtype != OPEN_DELEGATE_READ)) { 1630 rfs4_deleg_state_rele(dsp); 1631 return (NULL); 1632 } 1633 1634 vp = fp->rf_vp; 1635 /* vnevent_support returns 0 if file system supports vnevents */ 1636 if (vnevent_support(vp, NULL)) { 1637 rfs4_deleg_state_rele(dsp); 1638 return (NULL); 1639 } 1640 1641 /* Calculate the fflags for this OPEN. */ 1642 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) 1643 fflags |= FREAD; 1644 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) 1645 fflags |= FWRITE; 1646 1647 *recall = 0; 1648 /* 1649 * Before granting a delegation we need to know if anyone else has 1650 * opened the file in a conflicting mode. However, first we need to 1651 * know how we opened the file to check the counts properly. 1652 */ 1653 if (dtype == OPEN_DELEGATE_READ) { 1654 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1655 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1656 vn_is_mapped(vp, V_WRITE)) { 1657 if (open_prev) { 1658 *recall = 1; 1659 } else { 1660 rfs4_deleg_state_rele(dsp); 1661 return (NULL); 1662 } 1663 } 1664 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1665 rfs4_mon_hold, rfs4_mon_rele); 1666 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1667 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1668 vn_is_mapped(vp, V_WRITE)) { 1669 if (open_prev) { 1670 *recall = 1; 1671 } else { 1672 (void) fem_uninstall(vp, deleg_rdops, 1673 (void *)fp); 1674 rfs4_deleg_state_rele(dsp); 1675 return (NULL); 1676 } 1677 } 1678 /* 1679 * Because a client can hold onto a delegation after the 1680 * file has been closed, we need to keep track of the 1681 * access to this file. Otherwise the CIFS server would 1682 * not know about the client accessing the file and could 1683 * inappropriately grant an OPLOCK. 1684 * fem_install() returns EBUSY when asked to install a 1685 * OPUNIQ monitor more than once. Therefore, check the 1686 * return code because we only want this done once. 1687 */ 1688 if (ret == 0) 1689 vn_open_upgrade(vp, FREAD); 1690 } else { /* WRITE */ 1691 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1692 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1693 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1694 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1695 vn_is_mapped(vp, V_RDORWR)) { 1696 if (open_prev) { 1697 *recall = 1; 1698 } else { 1699 rfs4_deleg_state_rele(dsp); 1700 return (NULL); 1701 } 1702 } 1703 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1704 rfs4_mon_hold, rfs4_mon_rele); 1705 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1706 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1707 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1708 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1709 vn_is_mapped(vp, V_RDORWR)) { 1710 if (open_prev) { 1711 *recall = 1; 1712 } else { 1713 (void) fem_uninstall(vp, deleg_wrops, 1714 (void *)fp); 1715 rfs4_deleg_state_rele(dsp); 1716 return (NULL); 1717 } 1718 } 1719 /* 1720 * Because a client can hold onto a delegation after the 1721 * file has been closed, we need to keep track of the 1722 * access to this file. Otherwise the CIFS server would 1723 * not know about the client accessing the file and could 1724 * inappropriately grant an OPLOCK. 1725 * fem_install() returns EBUSY when asked to install a 1726 * OPUNIQ monitor more than once. Therefore, check the 1727 * return code because we only want this done once. 1728 */ 1729 if (ret == 0) 1730 vn_open_upgrade(vp, FREAD|FWRITE); 1731 } 1732 /* Place on delegation list for file */ 1733 ASSERT(!list_link_active(&dsp->rds_node)); 1734 list_insert_tail(&fp->rf_delegstatelist, dsp); 1735 1736 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype; 1737 1738 /* Update delegation stats for this file */ 1739 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec(); 1740 1741 /* reset since this is a new delegation */ 1742 fp->rf_dinfo.rd_conflicted_client = 0; 1743 fp->rf_dinfo.rd_ever_recalled = FALSE; 1744 1745 if (dtype == OPEN_DELEGATE_READ) 1746 fp->rf_dinfo.rd_rdgrants++; 1747 else 1748 fp->rf_dinfo.rd_wrgrants++; 1749 1750 return (dsp); 1751 } 1752 1753 /* 1754 * State routine for the server when a delegation is returned. 1755 */ 1756 void 1757 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1758 { 1759 rfs4_file_t *fp = dsp->rds_finfo; 1760 open_delegation_type4 dtypewas; 1761 1762 rfs4_dbe_lock(fp->rf_dbe); 1763 1764 /* nothing to do if no longer on list */ 1765 if (!list_link_active(&dsp->rds_node)) { 1766 rfs4_dbe_unlock(fp->rf_dbe); 1767 return; 1768 } 1769 1770 /* Remove state from recall list */ 1771 list_remove(&fp->rf_delegstatelist, dsp); 1772 1773 if (list_is_empty(&fp->rf_delegstatelist)) { 1774 dtypewas = fp->rf_dinfo.rd_dtype; 1775 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE; 1776 rfs4_dbe_cv_broadcast(fp->rf_dbe); 1777 1778 /* if file system was unshared, the vp will be NULL */ 1779 if (fp->rf_vp != NULL) { 1780 /* 1781 * Once a delegation is no longer held by any client, 1782 * the monitor is uninstalled. At this point, the 1783 * client must send OPEN otw, so we don't need the 1784 * reference on the vnode anymore. The open 1785 * downgrade removes the reference put on earlier. 1786 */ 1787 if (dtypewas == OPEN_DELEGATE_READ) { 1788 (void) fem_uninstall(fp->rf_vp, deleg_rdops, 1789 (void *)fp); 1790 vn_open_downgrade(fp->rf_vp, FREAD); 1791 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1792 (void) fem_uninstall(fp->rf_vp, deleg_wrops, 1793 (void *)fp); 1794 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE); 1795 } 1796 } 1797 } 1798 1799 switch (dsp->rds_dtype) { 1800 case OPEN_DELEGATE_READ: 1801 fp->rf_dinfo.rd_rdgrants--; 1802 break; 1803 case OPEN_DELEGATE_WRITE: 1804 fp->rf_dinfo.rd_wrgrants--; 1805 break; 1806 default: 1807 break; 1808 } 1809 1810 /* used in the policy decision */ 1811 fp->rf_dinfo.rd_time_returned = gethrestime_sec(); 1812 1813 /* 1814 * reset the time_recalled field so future delegations are not 1815 * accidentally revoked 1816 */ 1817 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0) 1818 fp->rf_dinfo.rd_time_recalled = 0; 1819 1820 rfs4_dbe_unlock(fp->rf_dbe); 1821 1822 rfs4_dbe_lock(dsp->rds_dbe); 1823 1824 dsp->rds_dtype = OPEN_DELEGATE_NONE; 1825 1826 if (revoked == TRUE) 1827 dsp->rds_time_revoked = gethrestime_sec(); 1828 1829 rfs4_dbe_invalidate(dsp->rds_dbe); 1830 1831 rfs4_dbe_unlock(dsp->rds_dbe); 1832 1833 if (revoked == TRUE) { 1834 rfs4_dbe_lock(dsp->rds_client->rc_dbe); 1835 dsp->rds_client->rc_deleg_revoked++; /* observability */ 1836 rfs4_dbe_unlock(dsp->rds_client->rc_dbe); 1837 } 1838 } 1839 1840 static void 1841 rfs4_revoke_file(rfs4_file_t *fp) 1842 { 1843 rfs4_deleg_state_t *dsp; 1844 1845 /* 1846 * The lock for rfs4_file_t must be held when traversing the 1847 * delegation list but that lock needs to be released to call 1848 * rfs4_return_deleg() 1849 */ 1850 rfs4_dbe_lock(fp->rf_dbe); 1851 while ((dsp = list_head(&fp->rf_delegstatelist)) != NULL) { 1852 rfs4_dbe_hold(dsp->rds_dbe); 1853 rfs4_dbe_unlock(fp->rf_dbe); 1854 rfs4_return_deleg(dsp, TRUE); 1855 rfs4_deleg_state_rele(dsp); 1856 rfs4_dbe_lock(fp->rf_dbe); 1857 } 1858 rfs4_dbe_unlock(fp->rf_dbe); 1859 } 1860 1861 /* 1862 * A delegation is assumed to be present on the file associated with 1863 * "sp". Check to see if the delegation matches is associated with 1864 * the same client as referenced by "sp". If it is not, TRUE is 1865 * returned. If the delegation DOES match the client (or no 1866 * delegation is present), return FALSE. 1867 * Assume the state entry and file entry are locked. 1868 */ 1869 bool_t 1870 rfs4_is_deleg(rfs4_state_t *sp) 1871 { 1872 rfs4_deleg_state_t *dsp; 1873 rfs4_file_t *fp = sp->rs_finfo; 1874 rfs4_client_t *cp = sp->rs_owner->ro_client; 1875 1876 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1877 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1878 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1879 if (cp != dsp->rds_client) { 1880 return (TRUE); 1881 } 1882 } 1883 return (FALSE); 1884 } 1885 1886 void 1887 rfs4_disable_delegation(void) 1888 { 1889 nfs4_srv_t *nsrv4; 1890 1891 nsrv4 = nfs4_get_srv(); 1892 mutex_enter(&nsrv4->deleg_lock); 1893 rfs4_deleg_disabled++; 1894 mutex_exit(&nsrv4->deleg_lock); 1895 } 1896 1897 void 1898 rfs4_enable_delegation(void) 1899 { 1900 nfs4_srv_t *nsrv4; 1901 1902 nsrv4 = nfs4_get_srv(); 1903 mutex_enter(&nsrv4->deleg_lock); 1904 ASSERT(rfs4_deleg_disabled > 0); 1905 rfs4_deleg_disabled--; 1906 mutex_exit(&nsrv4->deleg_lock); 1907 } 1908 1909 void 1910 rfs4_mon_hold(void *arg) 1911 { 1912 rfs4_file_t *fp = arg; 1913 1914 rfs4_dbe_hold(fp->rf_dbe); 1915 } 1916 1917 void 1918 rfs4_mon_rele(void *arg) 1919 { 1920 rfs4_file_t *fp = arg; 1921 1922 rfs4_dbe_rele_nolock(fp->rf_dbe); 1923 } 1924