1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2018 Nexenta Systems, Inc. 29 */ 30 31 #include <sys/systm.h> 32 #include <rpc/auth.h> 33 #include <rpc/clnt.h> 34 #include <nfs/nfs4_kprot.h> 35 #include <nfs/nfs4.h> 36 #include <nfs/lm.h> 37 #include <sys/cmn_err.h> 38 #include <sys/disp.h> 39 #include <sys/sdt.h> 40 41 #include <sys/pathname.h> 42 43 #include <sys/strsubr.h> 44 #include <sys/ddi.h> 45 46 #include <sys/vnode.h> 47 #include <sys/sdt.h> 48 #include <inet/common.h> 49 #include <inet/ip.h> 50 #include <inet/ip6.h> 51 52 #define MAX_READ_DELEGATIONS 5 53 54 static int rfs4_deleg_wlp = 5; 55 static int rfs4_deleg_disabled; 56 static int rfs4_max_setup_cb_tries = 5; 57 58 #ifdef DEBUG 59 60 static int rfs4_test_cbgetattr_fail = 0; 61 int rfs4_cb_null; 62 int rfs4_cb_debug; 63 int rfs4_deleg_debug; 64 65 #endif 66 67 static void rfs4_recall_file(rfs4_file_t *, 68 void (*recall)(rfs4_deleg_state_t *, bool_t), 69 bool_t, rfs4_client_t *); 70 static void rfs4_revoke_file(rfs4_file_t *); 71 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 72 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 73 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 75 open_delegation_type4, int *); 76 77 /* 78 * Convert a universal address to an transport specific 79 * address using inet_pton. 80 */ 81 static int 82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 83 { 84 int dots = 0, i, j, len, k; 85 unsigned char c; 86 in_port_t port = 0; 87 88 len = strlen(ua); 89 90 for (i = len-1; i >= 0; i--) { 91 92 if (ua[i] == '.') 93 dots++; 94 95 if (dots == 2) { 96 97 ua[i] = '\0'; 98 /* 99 * We use k to remember were to stick '.' back, since 100 * ua was kmem_allocateded from the pool len+1. 101 */ 102 k = i; 103 if (inet_pton(af, ua, ap) == 1) { 104 105 c = 0; 106 107 for (j = i+1; j < len; j++) { 108 if (ua[j] == '.') { 109 port = c << 8; 110 c = 0; 111 } else if (ua[j] >= '0' && 112 ua[j] <= '9') { 113 c *= 10; 114 c += ua[j] - '0'; 115 } else { 116 ua[k] = '.'; 117 return (EINVAL); 118 } 119 } 120 port += c; 121 122 *pp = htons(port); 123 124 ua[k] = '.'; 125 return (0); 126 } else { 127 ua[k] = '.'; 128 return (EINVAL); 129 } 130 } 131 } 132 133 return (EINVAL); 134 } 135 136 /* 137 * Update the delegation policy with the 138 * value of "new_policy" 139 */ 140 void 141 rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy) 142 { 143 rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER); 144 nsrv4->nfs4_deleg_policy = new_policy; 145 rw_exit(&nsrv4->deleg_policy_lock); 146 } 147 148 void 149 rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4) 150 { 151 rw_enter(&nsrv4->deleg_policy_lock, RW_READER); 152 } 153 154 void 155 rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4) 156 { 157 rw_exit(&nsrv4->deleg_policy_lock); 158 } 159 160 srv_deleg_policy_t 161 nfs4_get_deleg_policy() 162 { 163 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 164 return (nsrv4->nfs4_deleg_policy); 165 } 166 167 168 /* 169 * This free function is to be used when the client struct is being 170 * released and nothing at all is needed of the callback info any 171 * longer. 172 */ 173 void 174 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 175 { 176 char *addr = cbp->cb_callback.cb_location.r_addr; 177 char *netid = cbp->cb_callback.cb_location.r_netid; 178 179 /* Free old address if any */ 180 181 if (addr) 182 kmem_free(addr, strlen(addr) + 1); 183 if (netid) 184 kmem_free(netid, strlen(netid) + 1); 185 186 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 187 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 188 189 if (addr) 190 kmem_free(addr, strlen(addr) + 1); 191 if (netid) 192 kmem_free(netid, strlen(netid) + 1); 193 194 if (cbp->cb_chc_free) { 195 rfs4_cb_chflush(cbp); 196 } 197 } 198 199 /* 200 * The server uses this to check the callback path supplied by the 201 * client. The callback connection is marked "in progress" while this 202 * work is going on and then eventually marked either OK or FAILED. 203 * This work can be done as part of a separate thread and at the end 204 * of this the thread will exit or it may be done such that the caller 205 * will continue with other work. 206 */ 207 static void 208 rfs4_do_cb_null(rfs4_client_t *cp) 209 { 210 struct timeval tv; 211 CLIENT *ch; 212 rfs4_cbstate_t newstate; 213 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 214 215 mutex_enter(cbp->cb_lock); 216 /* If another thread is doing CB_NULL RPC then return */ 217 if (cbp->cb_nullcaller == TRUE) { 218 mutex_exit(cbp->cb_lock); 219 rfs4_client_rele(cp); 220 zthread_exit(); 221 } 222 223 /* Mark the cbinfo as having a thread in the NULL callback */ 224 cbp->cb_nullcaller = TRUE; 225 226 /* 227 * Are there other threads still using the cbinfo client 228 * handles? If so, this thread must wait before going and 229 * mucking aroiund with the callback information 230 */ 231 while (cbp->cb_refcnt != 0) 232 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 233 234 /* 235 * This thread itself may find that new callback info has 236 * arrived and is set up to handle this case and redrive the 237 * call to the client's callback server. 238 */ 239 retry: 240 if (cbp->cb_newer.cb_new == TRUE && 241 cbp->cb_newer.cb_confirmed == TRUE) { 242 char *addr = cbp->cb_callback.cb_location.r_addr; 243 char *netid = cbp->cb_callback.cb_location.r_netid; 244 245 /* 246 * Free the old stuff if it exists; may be the first 247 * time through this path 248 */ 249 if (addr) 250 kmem_free(addr, strlen(addr) + 1); 251 if (netid) 252 kmem_free(netid, strlen(netid) + 1); 253 254 /* Move over the addr/netid */ 255 cbp->cb_callback.cb_location.r_addr = 256 cbp->cb_newer.cb_callback.cb_location.r_addr; 257 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 258 cbp->cb_callback.cb_location.r_netid = 259 cbp->cb_newer.cb_callback.cb_location.r_netid; 260 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 261 262 /* Get the program number */ 263 cbp->cb_callback.cb_program = 264 cbp->cb_newer.cb_callback.cb_program; 265 cbp->cb_newer.cb_callback.cb_program = 0; 266 267 /* Don't forget the protocol's "cb_ident" field */ 268 cbp->cb_ident = cbp->cb_newer.cb_ident; 269 cbp->cb_newer.cb_ident = 0; 270 271 /* no longer new */ 272 cbp->cb_newer.cb_new = FALSE; 273 cbp->cb_newer.cb_confirmed = FALSE; 274 275 /* get rid of the old client handles that may exist */ 276 rfs4_cb_chflush(cbp); 277 278 cbp->cb_state = CB_NONE; 279 cbp->cb_timefailed = 0; /* reset the clock */ 280 cbp->cb_notified_of_cb_path_down = TRUE; 281 } 282 283 if (cbp->cb_state != CB_NONE) { 284 cv_broadcast(cbp->cb_cv); /* let the others know */ 285 cbp->cb_nullcaller = FALSE; 286 mutex_exit(cbp->cb_lock); 287 rfs4_client_rele(cp); 288 zthread_exit(); 289 } 290 291 /* mark rfs4_client_t as CALLBACK NULL in progress */ 292 cbp->cb_state = CB_INPROG; 293 mutex_exit(cbp->cb_lock); 294 295 /* get/generate a client handle */ 296 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 297 mutex_enter(cbp->cb_lock); 298 cbp->cb_state = CB_BAD; 299 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 300 goto retry; 301 } 302 303 304 tv.tv_sec = 30; 305 tv.tv_usec = 0; 306 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 307 newstate = CB_BAD; 308 } else { 309 newstate = CB_OK; 310 #ifdef DEBUG 311 rfs4_cb_null++; 312 #endif 313 } 314 315 /* Check to see if the client has specified new callback info */ 316 mutex_enter(cbp->cb_lock); 317 rfs4_cb_freech(cbp, ch, TRUE); 318 if (cbp->cb_newer.cb_new == TRUE && 319 cbp->cb_newer.cb_confirmed == TRUE) { 320 goto retry; /* give the CB_NULL another chance */ 321 } 322 323 cbp->cb_state = newstate; 324 if (cbp->cb_state == CB_BAD) 325 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 326 327 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 328 cbp->cb_nullcaller = FALSE; 329 mutex_exit(cbp->cb_lock); 330 rfs4_client_rele(cp); 331 zthread_exit(); 332 } 333 334 /* 335 * Given a client struct, inspect the callback info to see if the 336 * callback path is up and available. 337 * 338 * If new callback path is available and no one has set it up then 339 * try to set it up. If setup is not successful after 5 tries (5 secs) 340 * then gives up and returns NULL. 341 * 342 * If callback path is being initialized, then wait for the CB_NULL RPC 343 * call to occur. 344 */ 345 static rfs4_cbinfo_t * 346 rfs4_cbinfo_hold(rfs4_client_t *cp) 347 { 348 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 349 int retries = 0; 350 351 mutex_enter(cbp->cb_lock); 352 353 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 354 /* 355 * Looks like a new callback path may be available and 356 * noone has set it up. 357 */ 358 mutex_exit(cbp->cb_lock); 359 rfs4_dbe_hold(cp->rc_dbe); 360 rfs4_do_cb_null(cp); /* caller will release client hold */ 361 362 mutex_enter(cbp->cb_lock); 363 /* 364 * If callback path is no longer new, or it's being setup 365 * then stop and wait for it to be done. 366 */ 367 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 368 break; 369 mutex_exit(cbp->cb_lock); 370 371 if (++retries >= rfs4_max_setup_cb_tries) 372 return (NULL); 373 delay(hz); 374 mutex_enter(cbp->cb_lock); 375 } 376 377 /* Is there a thread working on doing the CB_NULL RPC? */ 378 if (cbp->cb_nullcaller == TRUE) 379 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 380 381 /* If the callback path is not okay (up and running), just quit */ 382 if (cbp->cb_state != CB_OK) { 383 mutex_exit(cbp->cb_lock); 384 return (NULL); 385 } 386 387 /* Let someone know we are using the current callback info */ 388 cbp->cb_refcnt++; 389 mutex_exit(cbp->cb_lock); 390 return (cbp); 391 } 392 393 /* 394 * The caller is done with the callback info. It may be that the 395 * caller's RPC failed and the NFSv4 client has actually provided new 396 * callback information. If so, let the caller know so they can 397 * advantage of this and maybe retry the RPC that originally failed. 398 */ 399 static int 400 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 401 { 402 int cb_new = FALSE; 403 404 mutex_enter(cbp->cb_lock); 405 406 /* The caller gets a chance to mark the callback info as bad */ 407 if (newstate != CB_NOCHANGE) 408 cbp->cb_state = newstate; 409 if (newstate == CB_FAILED) { 410 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 411 cbp->cb_notified_of_cb_path_down = FALSE; 412 } 413 414 cbp->cb_refcnt--; /* no longer using the information */ 415 416 /* 417 * A thread may be waiting on this one to finish and if so, 418 * let it know that it is okay to do the CB_NULL to the 419 * client's callback server. 420 */ 421 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 422 cv_broadcast(cbp->cb_cv_nullcaller); 423 424 /* 425 * If this is the last thread to use the callback info and 426 * there is new callback information to try and no thread is 427 * there ready to do the CB_NULL, then return true to teh 428 * caller so they can do the CB_NULL 429 */ 430 if (cbp->cb_refcnt == 0 && 431 cbp->cb_nullcaller == FALSE && 432 cbp->cb_newer.cb_new == TRUE && 433 cbp->cb_newer.cb_confirmed == TRUE) 434 cb_new = TRUE; 435 436 mutex_exit(cbp->cb_lock); 437 438 return (cb_new); 439 } 440 441 /* 442 * Given the information in the callback info struct, create a client 443 * handle that can be used by the server for its callback path. 444 */ 445 static CLIENT * 446 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 447 { 448 struct knetconfig knc; 449 vnode_t *vp; 450 struct sockaddr_in addr4; 451 struct sockaddr_in6 addr6; 452 void *addr, *taddr; 453 in_port_t *pp; 454 int af; 455 char *devnam; 456 struct netbuf nb; 457 int size; 458 CLIENT *ch = NULL; 459 int useresvport = 0; 460 461 mutex_enter(cbp->cb_lock); 462 463 if (cbp->cb_callback.cb_location.r_netid == NULL || 464 cbp->cb_callback.cb_location.r_addr == NULL) { 465 goto cb_init_out; 466 } 467 468 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 469 knc.knc_semantics = NC_TPI_COTS; 470 knc.knc_protofmly = "inet"; 471 knc.knc_proto = "tcp"; 472 devnam = "/dev/tcp"; 473 af = AF_INET; 474 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 475 == 0) { 476 knc.knc_semantics = NC_TPI_CLTS; 477 knc.knc_protofmly = "inet"; 478 knc.knc_proto = "udp"; 479 devnam = "/dev/udp"; 480 af = AF_INET; 481 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 482 == 0) { 483 knc.knc_semantics = NC_TPI_COTS; 484 knc.knc_protofmly = "inet6"; 485 knc.knc_proto = "tcp"; 486 devnam = "/dev/tcp6"; 487 af = AF_INET6; 488 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 489 == 0) { 490 knc.knc_semantics = NC_TPI_CLTS; 491 knc.knc_protofmly = "inet6"; 492 knc.knc_proto = "udp"; 493 devnam = "/dev/udp6"; 494 af = AF_INET6; 495 } else { 496 goto cb_init_out; 497 } 498 499 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 500 501 goto cb_init_out; 502 } 503 504 if (vp->v_type != VCHR) { 505 VN_RELE(vp); 506 goto cb_init_out; 507 } 508 509 knc.knc_rdev = vp->v_rdev; 510 511 VN_RELE(vp); 512 513 if (af == AF_INET) { 514 size = sizeof (addr4); 515 bzero(&addr4, size); 516 addr4.sin_family = (sa_family_t)af; 517 addr = &addr4.sin_addr; 518 pp = &addr4.sin_port; 519 taddr = &addr4; 520 } else /* AF_INET6 */ { 521 size = sizeof (addr6); 522 bzero(&addr6, size); 523 addr6.sin6_family = (sa_family_t)af; 524 addr = &addr6.sin6_addr; 525 pp = &addr6.sin6_port; 526 taddr = &addr6; 527 } 528 529 if (uaddr2sockaddr(af, 530 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 531 532 goto cb_init_out; 533 } 534 535 536 nb.maxlen = nb.len = size; 537 nb.buf = (char *)taddr; 538 539 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 540 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 541 542 ch = NULL; 543 } 544 545 /* turn off reserved port usage */ 546 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 547 548 cb_init_out: 549 mutex_exit(cbp->cb_lock); 550 return (ch); 551 } 552 553 /* 554 * Iterate over the client handle cache and 555 * destroy it. 556 */ 557 static void 558 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 559 { 560 CLIENT *ch; 561 562 while (cbp->cb_chc_free) { 563 cbp->cb_chc_free--; 564 ch = cbp->cb_chc[cbp->cb_chc_free]; 565 cbp->cb_chc[cbp->cb_chc_free] = NULL; 566 if (ch) { 567 if (ch->cl_auth) 568 auth_destroy(ch->cl_auth); 569 clnt_destroy(ch); 570 } 571 } 572 } 573 574 /* 575 * Return a client handle, either from a the small 576 * rfs4_client_t cache or one that we just created. 577 */ 578 static CLIENT * 579 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 580 { 581 CLIENT *cbch = NULL; 582 uint32_t zilch = 0; 583 584 mutex_enter(cbp->cb_lock); 585 586 if (cbp->cb_chc_free) { 587 cbp->cb_chc_free--; 588 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 589 mutex_exit(cbp->cb_lock); 590 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 591 return (cbch); 592 } 593 594 mutex_exit(cbp->cb_lock); 595 596 /* none free so make it now */ 597 cbch = rfs4_cbch_init(cbp); 598 599 return (cbch); 600 } 601 602 /* 603 * Return the client handle to the small cache or 604 * destroy it. 605 */ 606 static void 607 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 608 { 609 if (lockheld == FALSE) 610 mutex_enter(cbp->cb_lock); 611 612 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 613 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 614 if (lockheld == FALSE) 615 mutex_exit(cbp->cb_lock); 616 return; 617 } 618 if (lockheld == FALSE) 619 mutex_exit(cbp->cb_lock); 620 621 /* 622 * cache maxed out of free entries, obliterate 623 * this client handle, destroy it, throw it away. 624 */ 625 if (ch->cl_auth) 626 auth_destroy(ch->cl_auth); 627 clnt_destroy(ch); 628 } 629 630 /* 631 * With the supplied callback information - initialize the client 632 * callback data. If there is a callback in progress, save the 633 * callback info so that a thread can pick it up in the future. 634 */ 635 void 636 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 637 { 638 char *addr = NULL; 639 char *netid = NULL; 640 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 641 size_t len; 642 643 /* Set the call back for the client */ 644 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 645 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 646 len = strlen(cb->cb_location.r_addr) + 1; 647 addr = kmem_alloc(len, KM_SLEEP); 648 bcopy(cb->cb_location.r_addr, addr, len); 649 len = strlen(cb->cb_location.r_netid) + 1; 650 netid = kmem_alloc(len, KM_SLEEP); 651 bcopy(cb->cb_location.r_netid, netid, len); 652 } 653 /* ready to save the new information but first free old, if exists */ 654 mutex_enter(cbp->cb_lock); 655 656 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 657 658 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 659 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 660 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 661 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 662 663 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 664 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 665 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 666 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 667 668 cbp->cb_newer.cb_ident = cb_ident; 669 670 if (addr && *addr && netid && *netid) { 671 cbp->cb_newer.cb_new = TRUE; 672 cbp->cb_newer.cb_confirmed = FALSE; 673 } else { 674 cbp->cb_newer.cb_new = FALSE; 675 cbp->cb_newer.cb_confirmed = FALSE; 676 } 677 678 mutex_exit(cbp->cb_lock); 679 } 680 681 /* 682 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 683 * information may have been provided on SETCLIENTID and this call 684 * marks that information as confirmed and then starts a thread to 685 * test the callback path. 686 */ 687 void 688 rfs4_deleg_cb_check(rfs4_client_t *cp) 689 { 690 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE) 691 return; 692 693 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE; 694 695 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ 696 697 (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0, 698 minclsyspri); 699 } 700 701 static void 702 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 703 { 704 CB_RECALL4args *rec_argp; 705 706 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 707 if (rec_argp->fh.nfs_fh4_val) 708 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 709 } 710 711 /* ARGSUSED */ 712 static void 713 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 714 { 715 CB_GETATTR4args *argp; 716 717 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 718 if (argp->fh.nfs_fh4_val) 719 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 720 } 721 722 static void 723 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 724 { 725 int i, arglen; 726 nfs_cb_argop4 *argop; 727 728 /* 729 * First free any special args alloc'd for specific ops. 730 */ 731 arglen = args->array_len; 732 argop = args->array; 733 for (i = 0; i < arglen; i++, argop++) { 734 735 switch (argop->argop) { 736 case OP_CB_RECALL: 737 rfs4args_cb_recall_free(argop); 738 break; 739 740 case OP_CB_GETATTR: 741 rfs4args_cb_getattr_free(argop); 742 break; 743 744 default: 745 return; 746 } 747 } 748 749 if (args->tag.utf8string_len > 0) 750 UTF8STRING_FREE(args->tag) 751 752 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 753 if (resp) 754 xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 755 } 756 757 /* 758 * General callback routine for the server to the client. 759 */ 760 static enum clnt_stat 761 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 762 CB_COMPOUND4res *res, struct timeval timeout) 763 { 764 rfs4_cbinfo_t *cbp; 765 CLIENT *ch; 766 /* start with this in case cb_getch() fails */ 767 enum clnt_stat stat = RPC_FAILED; 768 769 res->tag.utf8string_val = NULL; 770 res->array = NULL; 771 772 retry: 773 cbp = rfs4_cbinfo_hold(cp); 774 if (cbp == NULL) 775 return (stat); 776 777 /* get a client handle */ 778 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 779 /* 780 * reset the cb_ident since it may have changed in 781 * rfs4_cbinfo_hold() 782 */ 783 args->callback_ident = cbp->cb_ident; 784 785 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 786 (caddr_t)args, xdr_CB_COMPOUND4res, 787 (caddr_t)res, timeout); 788 789 /* free client handle */ 790 rfs4_cb_freech(cbp, ch, FALSE); 791 } 792 793 /* 794 * If the rele says that there may be new callback info then 795 * retry this sequence and it may succeed as a result of the 796 * new callback path 797 */ 798 if (rfs4_cbinfo_rele(cbp, 799 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 800 goto retry; 801 802 return (stat); 803 } 804 805 /* 806 * Used by the NFSv4 server to get attributes for a file while 807 * handling the case where a file has been write delegated. For the 808 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 809 * not undertaken. This call site is maintained in case the server is 810 * updated in the future to handle write delegation space guarantees. 811 */ 812 nfsstat4 813 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 814 { 815 816 int error; 817 818 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 819 return (puterrno4(error)); 820 } 821 822 /* 823 * This is used everywhere in the v2/v3 server to allow the 824 * integration of all NFS versions and the support of delegation. For 825 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 826 * in the future to provide space guarantees for write delegations 827 * then this call site should be expanded to interact with the client. 828 */ 829 int 830 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 831 { 832 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 833 } 834 835 /* 836 * Place the actual cb_recall otw call to client. 837 */ 838 static void 839 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 840 { 841 CB_COMPOUND4args cb4_args; 842 CB_COMPOUND4res cb4_res; 843 CB_RECALL4args *rec_argp; 844 CB_RECALL4res *rec_resp; 845 nfs_cb_argop4 *argop; 846 int numops; 847 int argoplist_size; 848 struct timeval timeout; 849 nfs_fh4 *fhp; 850 enum clnt_stat call_stat; 851 852 /* 853 * set up the compound args 854 */ 855 numops = 1; /* CB_RECALL only */ 856 857 argoplist_size = numops * sizeof (nfs_cb_argop4); 858 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 859 argop->argop = OP_CB_RECALL; 860 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 861 862 (void) str_to_utf8("cb_recall", &cb4_args.tag); 863 cb4_args.minorversion = CB4_MINORVERSION; 864 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 865 cb4_args.array_len = numops; 866 cb4_args.array = argop; 867 868 /* 869 * fill in the args struct 870 */ 871 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 872 rec_argp->truncate = trunc; 873 874 fhp = &dsp->rds_finfo->rf_filehandle; 875 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 876 fhp->nfs_fh4_len, KM_SLEEP); 877 nfs_fh4_copy(fhp, &rec_argp->fh); 878 879 /* Keep track of when we did this for observability */ 880 dsp->rds_time_recalled = gethrestime_sec(); 881 882 /* 883 * Set up the timeout for the callback and make the actual call. 884 * Timeout will be 80% of the lease period for this server. 885 */ 886 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 887 timeout.tv_usec = 0; 888 889 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client, 890 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 891 892 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res, 893 timeout); 894 895 rec_resp = (cb4_res.array_len == 0) ? NULL : 896 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 897 898 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client, 899 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 900 901 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 902 rfs4_return_deleg(dsp, TRUE); 903 } 904 905 rfs4freeargres(&cb4_args, &cb4_res); 906 } 907 908 struct recall_arg { 909 rfs4_deleg_state_t *dsp; 910 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 911 bool_t trunc; 912 }; 913 914 static void 915 do_recall(struct recall_arg *arg) 916 { 917 rfs4_deleg_state_t *dsp = arg->dsp; 918 rfs4_file_t *fp = dsp->rds_finfo; 919 callb_cpr_t cpr_info; 920 kmutex_t cpr_lock; 921 922 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 923 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 924 925 /* 926 * It is possible that before this thread starts 927 * the client has send us a return_delegation, and 928 * if that is the case we do not need to send the 929 * recall callback. 930 */ 931 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) { 932 DTRACE_PROBE3(nfss__i__recall, 933 struct recall_arg *, arg, 934 struct rfs4_deleg_state_t *, dsp, 935 struct rfs4_file_t *, fp); 936 937 if (arg->recall) 938 (void) (*arg->recall)(dsp, arg->trunc); 939 } 940 941 mutex_enter(fp->rf_dinfo.rd_recall_lock); 942 /* 943 * Recall count may go negative if the parent thread that is 944 * creating the individual callback threads does not modify 945 * the recall_count field before the callback thread actually 946 * gets a response from the CB_RECALL 947 */ 948 fp->rf_dinfo.rd_recall_count--; 949 if (fp->rf_dinfo.rd_recall_count == 0) 950 cv_signal(fp->rf_dinfo.rd_recall_cv); 951 mutex_exit(fp->rf_dinfo.rd_recall_lock); 952 953 mutex_enter(&cpr_lock); 954 CALLB_CPR_EXIT(&cpr_info); 955 mutex_destroy(&cpr_lock); 956 957 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 958 kmem_free(arg, sizeof (struct recall_arg)); 959 zthread_exit(); 960 } 961 962 struct master_recall_args { 963 rfs4_file_t *fp; 964 void (*recall)(rfs4_deleg_state_t *, bool_t); 965 bool_t trunc; 966 }; 967 968 static void 969 do_recall_file(struct master_recall_args *map) 970 { 971 rfs4_file_t *fp = map->fp; 972 rfs4_deleg_state_t *dsp; 973 struct recall_arg *arg; 974 callb_cpr_t cpr_info; 975 kmutex_t cpr_lock; 976 int32_t recall_count; 977 978 rfs4_dbe_lock(fp->rf_dbe); 979 980 /* Recall already in progress ? */ 981 mutex_enter(fp->rf_dinfo.rd_recall_lock); 982 if (fp->rf_dinfo.rd_recall_count != 0) { 983 mutex_exit(fp->rf_dinfo.rd_recall_lock); 984 rfs4_dbe_rele_nolock(fp->rf_dbe); 985 rfs4_dbe_unlock(fp->rf_dbe); 986 kmem_free(map, sizeof (struct master_recall_args)); 987 zthread_exit(); 988 } 989 990 mutex_exit(fp->rf_dinfo.rd_recall_lock); 991 992 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 993 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 994 995 recall_count = 0; 996 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 997 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 998 999 rfs4_dbe_lock(dsp->rds_dbe); 1000 /* 1001 * if this delegation state 1002 * is being reaped skip it 1003 */ 1004 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) { 1005 rfs4_dbe_unlock(dsp->rds_dbe); 1006 continue; 1007 } 1008 1009 /* hold for receiving thread */ 1010 rfs4_dbe_hold(dsp->rds_dbe); 1011 rfs4_dbe_unlock(dsp->rds_dbe); 1012 1013 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1014 arg->recall = map->recall; 1015 arg->trunc = map->trunc; 1016 arg->dsp = dsp; 1017 1018 recall_count++; 1019 1020 (void) zthread_create(NULL, 0, do_recall, arg, 0, 1021 minclsyspri); 1022 } 1023 1024 rfs4_dbe_unlock(fp->rf_dbe); 1025 1026 mutex_enter(fp->rf_dinfo.rd_recall_lock); 1027 /* 1028 * Recall count may go negative if the parent thread that is 1029 * creating the individual callback threads does not modify 1030 * the recall_count field before the callback thread actually 1031 * gets a response from the CB_RECALL 1032 */ 1033 fp->rf_dinfo.rd_recall_count += recall_count; 1034 while (fp->rf_dinfo.rd_recall_count) 1035 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock); 1036 1037 mutex_exit(fp->rf_dinfo.rd_recall_lock); 1038 1039 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1040 rfs4_file_rele(fp); 1041 kmem_free(map, sizeof (struct master_recall_args)); 1042 mutex_enter(&cpr_lock); 1043 CALLB_CPR_EXIT(&cpr_info); 1044 mutex_destroy(&cpr_lock); 1045 zthread_exit(); 1046 } 1047 1048 static void 1049 rfs4_recall_file(rfs4_file_t *fp, 1050 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1051 bool_t trunc, rfs4_client_t *cp) 1052 { 1053 struct master_recall_args *args; 1054 1055 rfs4_dbe_lock(fp->rf_dbe); 1056 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1057 rfs4_dbe_unlock(fp->rf_dbe); 1058 return; 1059 } 1060 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */ 1061 1062 /* 1063 * Mark the time we started the recall processing. 1064 * If it has been previously recalled, do not reset the 1065 * timer since this is used for the revocation decision. 1066 */ 1067 if (fp->rf_dinfo.rd_time_recalled == 0) 1068 fp->rf_dinfo.rd_time_recalled = gethrestime_sec(); 1069 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */ 1070 /* Client causing recall not always available */ 1071 if (cp) 1072 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid; 1073 1074 rfs4_dbe_unlock(fp->rf_dbe); 1075 1076 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1077 args->fp = fp; 1078 args->recall = recall; 1079 args->trunc = trunc; 1080 1081 (void) zthread_create(NULL, 0, do_recall_file, args, 0, 1082 minclsyspri); 1083 } 1084 1085 void 1086 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1087 { 1088 time_t elapsed1, elapsed2; 1089 1090 if (fp->rf_dinfo.rd_time_recalled != 0) { 1091 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled; 1092 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite; 1093 /* First check to see if a revocation should occur */ 1094 if (elapsed1 > rfs4_lease_time && 1095 elapsed2 > rfs4_lease_time) { 1096 rfs4_revoke_file(fp); 1097 return; 1098 } 1099 /* 1100 * Next check to see if a recall should be done again 1101 * so quickly. 1102 */ 1103 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1104 return; 1105 } 1106 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1107 } 1108 1109 /* 1110 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1111 * open conflicts with the delegation. 1112 * Return true if we need recall otherwise false. 1113 * Assumes entry locks for sp and sp->rs_finfo are held. 1114 */ 1115 bool_t 1116 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1117 { 1118 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype; 1119 1120 switch (dtype) { 1121 case OPEN_DELEGATE_NONE: 1122 /* Not currently delegated so there is nothing to do */ 1123 return (FALSE); 1124 case OPEN_DELEGATE_READ: 1125 /* 1126 * If the access is only asking for READ then there is 1127 * no conflict and nothing to do. If it is asking 1128 * for write, then there will be conflict and the read 1129 * delegation should be recalled. 1130 */ 1131 if (access == OPEN4_SHARE_ACCESS_READ) 1132 return (FALSE); 1133 else 1134 return (TRUE); 1135 case OPEN_DELEGATE_WRITE: 1136 /* Check to see if this client has the delegation */ 1137 return (rfs4_is_deleg(sp)); 1138 } 1139 1140 return (FALSE); 1141 } 1142 1143 /* 1144 * Return the "best" allowable delegation available given the current 1145 * delegation type and the desired access and deny modes on the file. 1146 * At the point that this routine is called we know that the access and 1147 * deny modes are consistent with the file modes. 1148 */ 1149 static open_delegation_type4 1150 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1151 { 1152 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype; 1153 uint32_t access = sp->rs_share_access; 1154 uint32_t deny = sp->rs_share_deny; 1155 int readcnt = 0; 1156 int writecnt = 0; 1157 1158 switch (dtype) { 1159 case OPEN_DELEGATE_NONE: 1160 /* 1161 * Determine if more than just this OPEN have the file 1162 * open and if so, no delegation may be provided to 1163 * the client. 1164 */ 1165 if (access & OPEN4_SHARE_ACCESS_WRITE) 1166 writecnt++; 1167 if (access & OPEN4_SHARE_ACCESS_READ) 1168 readcnt++; 1169 1170 if (fp->rf_access_read > readcnt || 1171 fp->rf_access_write > writecnt) 1172 return (OPEN_DELEGATE_NONE); 1173 1174 /* 1175 * If the client is going to write, or if the client 1176 * has exclusive access, return a write delegation. 1177 */ 1178 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1179 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1180 return (OPEN_DELEGATE_WRITE); 1181 /* 1182 * If we don't want to write or we've haven't denied read 1183 * access to others, return a read delegation. 1184 */ 1185 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1186 (deny & ~OPEN4_SHARE_DENY_READ)) 1187 return (OPEN_DELEGATE_READ); 1188 1189 /* Shouldn't get here */ 1190 return (OPEN_DELEGATE_NONE); 1191 1192 case OPEN_DELEGATE_READ: 1193 /* 1194 * If the file is delegated for read but we wan't to 1195 * write or deny others to read then we can't delegate 1196 * the file. We shouldn't get here since the delegation should 1197 * have been recalled already. 1198 */ 1199 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1200 (deny & OPEN4_SHARE_DENY_READ)) 1201 return (OPEN_DELEGATE_NONE); 1202 return (OPEN_DELEGATE_READ); 1203 1204 case OPEN_DELEGATE_WRITE: 1205 return (OPEN_DELEGATE_WRITE); 1206 } 1207 1208 /* Shouldn't get here */ 1209 return (OPEN_DELEGATE_NONE); 1210 } 1211 1212 /* 1213 * Given the desired delegation type and the "history" of the file 1214 * determine the actual delegation type to return. 1215 */ 1216 static open_delegation_type4 1217 rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype, 1218 rfs4_dinfo_t *dinfo, clientid4 cid) 1219 { 1220 time_t elapsed; 1221 1222 if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1223 return (OPEN_DELEGATE_NONE); 1224 1225 /* 1226 * Has this file/delegation ever been recalled? If not then 1227 * no further checks for a delegation race need to be done. 1228 * However if a recall has occurred, then check to see if a 1229 * client has caused its own delegation recall to occur. If 1230 * not, then has a delegation for this file been returned 1231 * recently? If so, then do not assign a new delegation to 1232 * avoid a "delegation race" between the original client and 1233 * the new/conflicting client. 1234 */ 1235 if (dinfo->rd_ever_recalled == TRUE) { 1236 if (dinfo->rd_conflicted_client != cid) { 1237 elapsed = gethrestime_sec() - dinfo->rd_time_returned; 1238 if (elapsed < rfs4_lease_time) 1239 return (OPEN_DELEGATE_NONE); 1240 } 1241 } 1242 1243 /* Limit the number of read grants */ 1244 if (dtype == OPEN_DELEGATE_READ && 1245 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS) 1246 return (OPEN_DELEGATE_NONE); 1247 1248 /* 1249 * Should consider limiting total number of read/write 1250 * delegations the server will permit. 1251 */ 1252 1253 return (dtype); 1254 } 1255 1256 /* 1257 * Try and grant a delegation for an open give the state. The routine 1258 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1259 * 1260 * The state and associate file entry must be locked 1261 */ 1262 rfs4_deleg_state_t * 1263 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1264 { 1265 nfs4_srv_t *nsrv4; 1266 rfs4_file_t *fp = sp->rs_finfo; 1267 open_delegation_type4 dtype; 1268 int no_delegation; 1269 1270 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1271 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1272 1273 nsrv4 = nfs4_get_srv(); 1274 1275 /* Is the server even providing delegations? */ 1276 if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE || 1277 dreq == DELEG_NONE) { 1278 return (NULL); 1279 } 1280 1281 /* Check to see if delegations have been temporarily disabled */ 1282 mutex_enter(&nsrv4->deleg_lock); 1283 no_delegation = rfs4_deleg_disabled; 1284 mutex_exit(&nsrv4->deleg_lock); 1285 1286 if (no_delegation) 1287 return (NULL); 1288 1289 /* Don't grant a delegation if a deletion is impending. */ 1290 if (fp->rf_dinfo.rd_hold_grant > 0) { 1291 return (NULL); 1292 } 1293 1294 /* 1295 * Don't grant a delegation if there are any lock manager 1296 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1297 * if there are only read locks we should be able to grant a 1298 * read-only delegation), but it's good enough for now. 1299 * 1300 * MT safety: the lock manager checks for conflicting delegations 1301 * before processing a lock request. That check will block until 1302 * we are done here. So if the lock manager acquires a lock after 1303 * we decide to grant the delegation, the delegation will get 1304 * immediately recalled (if there's a conflict), so we're safe. 1305 */ 1306 if (lm_vp_active(fp->rf_vp)) { 1307 return (NULL); 1308 } 1309 1310 /* 1311 * Based on the type of delegation request passed in, take the 1312 * appropriate action (DELEG_NONE is handled above) 1313 */ 1314 switch (dreq) { 1315 1316 case DELEG_READ: 1317 case DELEG_WRITE: 1318 /* 1319 * The server "must" grant the delegation in this case. 1320 * Client is using open previous 1321 */ 1322 dtype = (open_delegation_type4)dreq; 1323 *recall = 1; 1324 break; 1325 case DELEG_ANY: 1326 /* 1327 * If a valid callback path does not exist, no delegation may 1328 * be granted. 1329 */ 1330 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK) 1331 return (NULL); 1332 1333 /* 1334 * If the original operation which caused time_rm_delayed 1335 * to be set hasn't been retried and completed for one 1336 * full lease period, clear it and allow delegations to 1337 * get granted again. 1338 */ 1339 if (fp->rf_dinfo.rd_time_rm_delayed > 0 && 1340 gethrestime_sec() > 1341 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time) 1342 fp->rf_dinfo.rd_time_rm_delayed = 0; 1343 1344 /* 1345 * If we are waiting for a delegation to be returned then 1346 * don't delegate this file. We do this for correctness as 1347 * well as if the file is being recalled we would likely 1348 * recall this file again. 1349 */ 1350 1351 if (fp->rf_dinfo.rd_time_recalled != 0 || 1352 fp->rf_dinfo.rd_time_rm_delayed != 0) 1353 return (NULL); 1354 1355 /* Get the "best" delegation candidate */ 1356 dtype = rfs4_check_delegation(sp, fp); 1357 1358 if (dtype == OPEN_DELEGATE_NONE) 1359 return (NULL); 1360 1361 /* 1362 * Based on policy and the history of the file get the 1363 * actual delegation. 1364 */ 1365 dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo, 1366 sp->rs_owner->ro_client->rc_clientid); 1367 1368 if (dtype == OPEN_DELEGATE_NONE) 1369 return (NULL); 1370 break; 1371 default: 1372 return (NULL); 1373 } 1374 1375 /* set the delegation for the state */ 1376 return (rfs4_deleg_state(sp, dtype, recall)); 1377 } 1378 1379 void 1380 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1381 nfsace4 *ace, int recall) 1382 { 1383 open_write_delegation4 *wp; 1384 open_read_delegation4 *rp; 1385 nfs_space_limit4 *spl; 1386 nfsace4 nace; 1387 1388 /* 1389 * We need to allocate a new copy of the who string. 1390 * this string will be freed by the rfs4_op_open dis_resfree 1391 * routine. We need to do this allocation since replays will 1392 * be allocated and rfs4_compound can't tell the difference from 1393 * a replay and an inital open. N.B. if an ace is passed in, it 1394 * the caller's responsibility to free it. 1395 */ 1396 1397 if (ace == NULL) { 1398 /* 1399 * Default is to deny all access, the client will have 1400 * to contact the server. XXX Do we want to actually 1401 * set a deny for every one, or do we simply want to 1402 * construct an entity that will match no one? 1403 */ 1404 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1405 nace.flag = 0; 1406 nace.access_mask = ACE4_VALID_MASK_BITS; 1407 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1408 } else { 1409 nace.type = ace->type; 1410 nace.flag = ace->flag; 1411 nace.access_mask = ace->access_mask; 1412 (void) utf8_copy(&ace->who, &nace.who); 1413 } 1414 1415 dp->delegation_type = dsp->rds_dtype; 1416 1417 switch (dsp->rds_dtype) { 1418 case OPEN_DELEGATE_NONE: 1419 break; 1420 case OPEN_DELEGATE_READ: 1421 rp = &dp->open_delegation4_u.read; 1422 rp->stateid = dsp->rds_delegid.stateid; 1423 rp->recall = (bool_t)recall; 1424 rp->permissions = nace; 1425 break; 1426 case OPEN_DELEGATE_WRITE: 1427 wp = &dp->open_delegation4_u.write; 1428 wp->stateid = dsp->rds_delegid.stateid; 1429 wp->recall = (bool_t)recall; 1430 spl = &wp->space_limit; 1431 spl->limitby = NFS_LIMIT_SIZE; 1432 spl->nfs_space_limit4_u.filesize = 0; 1433 wp->permissions = nace; 1434 break; 1435 } 1436 } 1437 1438 /* 1439 * Check if the file is delegated via the provided file struct. 1440 * Return TRUE if it is delegated. This is intended for use by 1441 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1442 * 1443 * Note that if the file is found to have a delegation, it is 1444 * recalled, unless the clientid of the caller matches the clientid of the 1445 * delegation. If the caller has specified, there is a slight delay 1446 * inserted in the hopes that the delegation will be returned quickly. 1447 */ 1448 bool_t 1449 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1450 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1451 { 1452 rfs4_deleg_state_t *dsp; 1453 1454 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 1455 1456 /* Is delegation enabled? */ 1457 if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) 1458 return (FALSE); 1459 1460 /* do we have a delegation on this file? */ 1461 rfs4_dbe_lock(fp->rf_dbe); 1462 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1463 if (is_rm) 1464 fp->rf_dinfo.rd_hold_grant++; 1465 rfs4_dbe_unlock(fp->rf_dbe); 1466 return (FALSE); 1467 } 1468 /* 1469 * do we have a write delegation on this file or are we 1470 * requesting write access to a file with any type of existing 1471 * delegation? 1472 */ 1473 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) { 1474 if (cp != NULL) { 1475 dsp = list_head(&fp->rf_delegstatelist); 1476 if (dsp == NULL) { 1477 rfs4_dbe_unlock(fp->rf_dbe); 1478 return (FALSE); 1479 } 1480 /* 1481 * Does the requestor already own the delegation? 1482 */ 1483 if (dsp->rds_client->rc_clientid == *(cp)) { 1484 rfs4_dbe_unlock(fp->rf_dbe); 1485 return (FALSE); 1486 } 1487 } 1488 1489 rfs4_dbe_unlock(fp->rf_dbe); 1490 rfs4_recall_deleg(fp, trunc, NULL); 1491 1492 if (!do_delay) { 1493 rfs4_dbe_lock(fp->rf_dbe); 1494 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1495 rfs4_dbe_unlock(fp->rf_dbe); 1496 return (TRUE); 1497 } 1498 1499 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1500 1501 rfs4_dbe_lock(fp->rf_dbe); 1502 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) { 1503 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1504 rfs4_dbe_unlock(fp->rf_dbe); 1505 return (TRUE); 1506 } 1507 } 1508 if (is_rm) 1509 fp->rf_dinfo.rd_hold_grant++; 1510 rfs4_dbe_unlock(fp->rf_dbe); 1511 return (FALSE); 1512 } 1513 1514 /* 1515 * Check if the file is delegated in the case of a v2 or v3 access. 1516 * Return TRUE if it is delegated which in turn means that v2 should 1517 * drop the request and in the case of v3 JUKEBOX should be returned. 1518 */ 1519 bool_t 1520 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1521 { 1522 nfs4_srv_t *nsrv4; 1523 rfs4_file_t *fp; 1524 bool_t create = FALSE; 1525 bool_t rc = FALSE; 1526 1527 nsrv4 = nfs4_get_srv(); 1528 rfs4_hold_deleg_policy(nsrv4); 1529 1530 /* Is delegation enabled? */ 1531 if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1532 fp = rfs4_findfile(vp, NULL, &create); 1533 if (fp != NULL) { 1534 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1535 TRUE, FALSE, NULL)) { 1536 rc = TRUE; 1537 } 1538 rfs4_file_rele(fp); 1539 } 1540 } 1541 rfs4_rele_deleg_policy(nsrv4); 1542 return (rc); 1543 } 1544 1545 /* 1546 * Release a hold on the hold_grant counter which 1547 * prevents delegation from being granted while a remove 1548 * or a rename is in progress. 1549 */ 1550 void 1551 rfs4_clear_dont_grant(rfs4_file_t *fp) 1552 { 1553 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 1554 1555 if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) 1556 return; 1557 rfs4_dbe_lock(fp->rf_dbe); 1558 ASSERT(fp->rf_dinfo.rd_hold_grant > 0); 1559 fp->rf_dinfo.rd_hold_grant--; 1560 fp->rf_dinfo.rd_time_rm_delayed = 0; 1561 rfs4_dbe_unlock(fp->rf_dbe); 1562 } 1563 1564 /* 1565 * State support for delegation. 1566 * Set the state delegation type for this state; 1567 * This routine is called from open via rfs4_grant_delegation and the entry 1568 * locks on sp and sp->rs_finfo are assumed. 1569 */ 1570 static rfs4_deleg_state_t * 1571 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1572 { 1573 rfs4_file_t *fp = sp->rs_finfo; 1574 bool_t create = TRUE; 1575 rfs4_deleg_state_t *dsp; 1576 vnode_t *vp; 1577 int open_prev = *recall; 1578 int ret; 1579 int fflags = 0; 1580 1581 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1582 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1583 1584 /* Shouldn't happen */ 1585 if (fp->rf_dinfo.rd_recall_count != 0 || 1586 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1587 dtype != OPEN_DELEGATE_READ)) { 1588 return (NULL); 1589 } 1590 1591 /* Unlock to avoid deadlock */ 1592 rfs4_dbe_unlock(fp->rf_dbe); 1593 rfs4_dbe_unlock(sp->rs_dbe); 1594 1595 dsp = rfs4_finddeleg(sp, &create); 1596 1597 rfs4_dbe_lock(sp->rs_dbe); 1598 rfs4_dbe_lock(fp->rf_dbe); 1599 1600 if (dsp == NULL) 1601 return (NULL); 1602 1603 /* 1604 * It is possible that since we dropped the lock 1605 * in order to call finddeleg, the rfs4_file_t 1606 * was marked such that we should not grant a 1607 * delegation, if so bail out. 1608 */ 1609 if (fp->rf_dinfo.rd_hold_grant > 0) { 1610 rfs4_deleg_state_rele(dsp); 1611 return (NULL); 1612 } 1613 1614 if (create == FALSE) { 1615 if (sp->rs_owner->ro_client == dsp->rds_client && 1616 dsp->rds_dtype == dtype) { 1617 return (dsp); 1618 } else { 1619 rfs4_deleg_state_rele(dsp); 1620 return (NULL); 1621 } 1622 } 1623 1624 /* 1625 * Check that this file has not been delegated to another 1626 * client 1627 */ 1628 if (fp->rf_dinfo.rd_recall_count != 0 || 1629 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE || 1630 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1631 dtype != OPEN_DELEGATE_READ)) { 1632 rfs4_deleg_state_rele(dsp); 1633 return (NULL); 1634 } 1635 1636 vp = fp->rf_vp; 1637 /* vnevent_support returns 0 if file system supports vnevents */ 1638 if (vnevent_support(vp, NULL)) { 1639 rfs4_deleg_state_rele(dsp); 1640 return (NULL); 1641 } 1642 1643 /* Calculate the fflags for this OPEN. */ 1644 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) 1645 fflags |= FREAD; 1646 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) 1647 fflags |= FWRITE; 1648 1649 *recall = 0; 1650 /* 1651 * Before granting a delegation we need to know if anyone else has 1652 * opened the file in a conflicting mode. However, first we need to 1653 * know how we opened the file to check the counts properly. 1654 */ 1655 if (dtype == OPEN_DELEGATE_READ) { 1656 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1657 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1658 vn_is_mapped(vp, V_WRITE)) { 1659 if (open_prev) { 1660 *recall = 1; 1661 } else { 1662 rfs4_deleg_state_rele(dsp); 1663 return (NULL); 1664 } 1665 } 1666 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1667 rfs4_mon_hold, rfs4_mon_rele); 1668 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1669 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1670 vn_is_mapped(vp, V_WRITE)) { 1671 if (open_prev) { 1672 *recall = 1; 1673 } else { 1674 (void) fem_uninstall(vp, deleg_rdops, 1675 (void *)fp); 1676 rfs4_deleg_state_rele(dsp); 1677 return (NULL); 1678 } 1679 } 1680 /* 1681 * Because a client can hold onto a delegation after the 1682 * file has been closed, we need to keep track of the 1683 * access to this file. Otherwise the CIFS server would 1684 * not know about the client accessing the file and could 1685 * inappropriately grant an OPLOCK. 1686 * fem_install() returns EBUSY when asked to install a 1687 * OPUNIQ monitor more than once. Therefore, check the 1688 * return code because we only want this done once. 1689 */ 1690 if (ret == 0) 1691 vn_open_upgrade(vp, FREAD); 1692 } else { /* WRITE */ 1693 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1694 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1695 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1696 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1697 vn_is_mapped(vp, V_RDORWR)) { 1698 if (open_prev) { 1699 *recall = 1; 1700 } else { 1701 rfs4_deleg_state_rele(dsp); 1702 return (NULL); 1703 } 1704 } 1705 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1706 rfs4_mon_hold, rfs4_mon_rele); 1707 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1708 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1709 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1710 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1711 vn_is_mapped(vp, V_RDORWR)) { 1712 if (open_prev) { 1713 *recall = 1; 1714 } else { 1715 (void) fem_uninstall(vp, deleg_wrops, 1716 (void *)fp); 1717 rfs4_deleg_state_rele(dsp); 1718 return (NULL); 1719 } 1720 } 1721 /* 1722 * Because a client can hold onto a delegation after the 1723 * file has been closed, we need to keep track of the 1724 * access to this file. Otherwise the CIFS server would 1725 * not know about the client accessing the file and could 1726 * inappropriately grant an OPLOCK. 1727 * fem_install() returns EBUSY when asked to install a 1728 * OPUNIQ monitor more than once. Therefore, check the 1729 * return code because we only want this done once. 1730 */ 1731 if (ret == 0) 1732 vn_open_upgrade(vp, FREAD|FWRITE); 1733 } 1734 /* Place on delegation list for file */ 1735 ASSERT(!list_link_active(&dsp->rds_node)); 1736 list_insert_tail(&fp->rf_delegstatelist, dsp); 1737 1738 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype; 1739 1740 /* Update delegation stats for this file */ 1741 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec(); 1742 1743 /* reset since this is a new delegation */ 1744 fp->rf_dinfo.rd_conflicted_client = 0; 1745 fp->rf_dinfo.rd_ever_recalled = FALSE; 1746 1747 if (dtype == OPEN_DELEGATE_READ) 1748 fp->rf_dinfo.rd_rdgrants++; 1749 else 1750 fp->rf_dinfo.rd_wrgrants++; 1751 1752 return (dsp); 1753 } 1754 1755 /* 1756 * State routine for the server when a delegation is returned. 1757 */ 1758 void 1759 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1760 { 1761 rfs4_file_t *fp = dsp->rds_finfo; 1762 open_delegation_type4 dtypewas; 1763 1764 rfs4_dbe_lock(fp->rf_dbe); 1765 1766 /* nothing to do if no longer on list */ 1767 if (!list_link_active(&dsp->rds_node)) { 1768 rfs4_dbe_unlock(fp->rf_dbe); 1769 return; 1770 } 1771 1772 /* Remove state from recall list */ 1773 list_remove(&fp->rf_delegstatelist, dsp); 1774 1775 if (list_is_empty(&fp->rf_delegstatelist)) { 1776 dtypewas = fp->rf_dinfo.rd_dtype; 1777 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE; 1778 rfs4_dbe_cv_broadcast(fp->rf_dbe); 1779 1780 /* if file system was unshared, the vp will be NULL */ 1781 if (fp->rf_vp != NULL) { 1782 /* 1783 * Once a delegation is no longer held by any client, 1784 * the monitor is uninstalled. At this point, the 1785 * client must send OPEN otw, so we don't need the 1786 * reference on the vnode anymore. The open 1787 * downgrade removes the reference put on earlier. 1788 */ 1789 if (dtypewas == OPEN_DELEGATE_READ) { 1790 (void) fem_uninstall(fp->rf_vp, deleg_rdops, 1791 (void *)fp); 1792 vn_open_downgrade(fp->rf_vp, FREAD); 1793 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1794 (void) fem_uninstall(fp->rf_vp, deleg_wrops, 1795 (void *)fp); 1796 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE); 1797 } 1798 } 1799 } 1800 1801 switch (dsp->rds_dtype) { 1802 case OPEN_DELEGATE_READ: 1803 fp->rf_dinfo.rd_rdgrants--; 1804 break; 1805 case OPEN_DELEGATE_WRITE: 1806 fp->rf_dinfo.rd_wrgrants--; 1807 break; 1808 default: 1809 break; 1810 } 1811 1812 /* used in the policy decision */ 1813 fp->rf_dinfo.rd_time_returned = gethrestime_sec(); 1814 1815 /* 1816 * reset the time_recalled field so future delegations are not 1817 * accidentally revoked 1818 */ 1819 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0) 1820 fp->rf_dinfo.rd_time_recalled = 0; 1821 1822 rfs4_dbe_unlock(fp->rf_dbe); 1823 1824 rfs4_dbe_lock(dsp->rds_dbe); 1825 1826 dsp->rds_dtype = OPEN_DELEGATE_NONE; 1827 1828 if (revoked == TRUE) 1829 dsp->rds_time_revoked = gethrestime_sec(); 1830 1831 rfs4_dbe_invalidate(dsp->rds_dbe); 1832 1833 rfs4_dbe_unlock(dsp->rds_dbe); 1834 1835 if (revoked == TRUE) { 1836 rfs4_dbe_lock(dsp->rds_client->rc_dbe); 1837 dsp->rds_client->rc_deleg_revoked++; /* observability */ 1838 rfs4_dbe_unlock(dsp->rds_client->rc_dbe); 1839 } 1840 } 1841 1842 static void 1843 rfs4_revoke_file(rfs4_file_t *fp) 1844 { 1845 rfs4_deleg_state_t *dsp; 1846 1847 /* 1848 * The lock for rfs4_file_t must be held when traversing the 1849 * delegation list but that lock needs to be released to call 1850 * rfs4_return_deleg() 1851 */ 1852 rfs4_dbe_lock(fp->rf_dbe); 1853 while (dsp = list_head(&fp->rf_delegstatelist)) { 1854 rfs4_dbe_hold(dsp->rds_dbe); 1855 rfs4_dbe_unlock(fp->rf_dbe); 1856 rfs4_return_deleg(dsp, TRUE); 1857 rfs4_deleg_state_rele(dsp); 1858 rfs4_dbe_lock(fp->rf_dbe); 1859 } 1860 rfs4_dbe_unlock(fp->rf_dbe); 1861 } 1862 1863 /* 1864 * A delegation is assumed to be present on the file associated with 1865 * "sp". Check to see if the delegation matches is associated with 1866 * the same client as referenced by "sp". If it is not, TRUE is 1867 * returned. If the delegation DOES match the client (or no 1868 * delegation is present), return FALSE. 1869 * Assume the state entry and file entry are locked. 1870 */ 1871 bool_t 1872 rfs4_is_deleg(rfs4_state_t *sp) 1873 { 1874 rfs4_deleg_state_t *dsp; 1875 rfs4_file_t *fp = sp->rs_finfo; 1876 rfs4_client_t *cp = sp->rs_owner->ro_client; 1877 1878 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1879 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1880 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1881 if (cp != dsp->rds_client) { 1882 return (TRUE); 1883 } 1884 } 1885 return (FALSE); 1886 } 1887 1888 void 1889 rfs4_disable_delegation(void) 1890 { 1891 nfs4_srv_t *nsrv4; 1892 1893 nsrv4 = nfs4_get_srv(); 1894 mutex_enter(&nsrv4->deleg_lock); 1895 rfs4_deleg_disabled++; 1896 mutex_exit(&nsrv4->deleg_lock); 1897 } 1898 1899 void 1900 rfs4_enable_delegation(void) 1901 { 1902 nfs4_srv_t *nsrv4; 1903 1904 nsrv4 = nfs4_get_srv(); 1905 mutex_enter(&nsrv4->deleg_lock); 1906 ASSERT(rfs4_deleg_disabled > 0); 1907 rfs4_deleg_disabled--; 1908 mutex_exit(&nsrv4->deleg_lock); 1909 } 1910 1911 void 1912 rfs4_mon_hold(void *arg) 1913 { 1914 rfs4_file_t *fp = arg; 1915 1916 rfs4_dbe_hold(fp->rf_dbe); 1917 } 1918 1919 void 1920 rfs4_mon_rele(void *arg) 1921 { 1922 rfs4_file_t *fp = arg; 1923 1924 rfs4_dbe_rele_nolock(fp->rf_dbe); 1925 } 1926