1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/systm.h> 29 #include <rpc/auth.h> 30 #include <rpc/clnt.h> 31 #include <nfs/nfs4_kprot.h> 32 #include <nfs/nfs4.h> 33 #include <nfs/lm.h> 34 #include <sys/cmn_err.h> 35 #include <sys/disp.h> 36 #include <sys/sdt.h> 37 38 #include <sys/pathname.h> 39 40 #include <sys/strsubr.h> 41 #include <sys/ddi.h> 42 43 #include <sys/vnode.h> 44 #include <sys/sdt.h> 45 #include <inet/common.h> 46 #include <inet/ip.h> 47 #include <inet/ip6.h> 48 49 #define MAX_READ_DELEGATIONS 5 50 51 krwlock_t rfs4_deleg_policy_lock; 52 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 53 static int rfs4_deleg_wlp = 5; 54 kmutex_t rfs4_deleg_lock; 55 static int rfs4_deleg_disabled; 56 static int rfs4_max_setup_cb_tries = 5; 57 58 #ifdef DEBUG 59 60 static int rfs4_test_cbgetattr_fail = 0; 61 int rfs4_cb_null; 62 int rfs4_cb_debug; 63 int rfs4_deleg_debug; 64 65 #endif 66 67 static void rfs4_recall_file(rfs4_file_t *, 68 void (*recall)(rfs4_deleg_state_t *, bool_t), 69 bool_t, rfs4_client_t *); 70 static void rfs4_revoke_file(rfs4_file_t *); 71 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 72 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 73 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 75 open_delegation_type4, int *); 76 77 /* 78 * Convert a universal address to an transport specific 79 * address using inet_pton. 80 */ 81 static int 82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 83 { 84 int dots = 0, i, j, len, k; 85 unsigned char c; 86 in_port_t port = 0; 87 88 len = strlen(ua); 89 90 for (i = len-1; i >= 0; i--) { 91 92 if (ua[i] == '.') 93 dots++; 94 95 if (dots == 2) { 96 97 ua[i] = '\0'; 98 /* 99 * We use k to remember were to stick '.' back, since 100 * ua was kmem_allocateded from the pool len+1. 101 */ 102 k = i; 103 if (inet_pton(af, ua, ap) == 1) { 104 105 c = 0; 106 107 for (j = i+1; j < len; j++) { 108 if (ua[j] == '.') { 109 port = c << 8; 110 c = 0; 111 } else if (ua[j] >= '0' && 112 ua[j] <= '9') { 113 c *= 10; 114 c += ua[j] - '0'; 115 } else { 116 ua[k] = '.'; 117 return (EINVAL); 118 } 119 } 120 port += c; 121 122 *pp = htons(port); 123 124 ua[k] = '.'; 125 return (0); 126 } else { 127 ua[k] = '.'; 128 return (EINVAL); 129 } 130 } 131 } 132 133 return (EINVAL); 134 } 135 136 /* 137 * Update the delegation policy with the 138 * value of "new_policy" 139 */ 140 void 141 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 142 { 143 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 144 rfs4_deleg_policy = new_policy; 145 rw_exit(&rfs4_deleg_policy_lock); 146 } 147 148 void 149 rfs4_hold_deleg_policy(void) 150 { 151 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 152 } 153 154 void 155 rfs4_rele_deleg_policy(void) 156 { 157 rw_exit(&rfs4_deleg_policy_lock); 158 } 159 160 161 /* 162 * This free function is to be used when the client struct is being 163 * released and nothing at all is needed of the callback info any 164 * longer. 165 */ 166 void 167 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 168 { 169 char *addr = cbp->cb_callback.cb_location.r_addr; 170 char *netid = cbp->cb_callback.cb_location.r_netid; 171 172 /* Free old address if any */ 173 174 if (addr) 175 kmem_free(addr, strlen(addr) + 1); 176 if (netid) 177 kmem_free(netid, strlen(netid) + 1); 178 179 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 180 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 181 182 if (addr) 183 kmem_free(addr, strlen(addr) + 1); 184 if (netid) 185 kmem_free(netid, strlen(netid) + 1); 186 187 if (cbp->cb_chc_free) { 188 rfs4_cb_chflush(cbp); 189 } 190 } 191 192 /* 193 * The server uses this to check the callback path supplied by the 194 * client. The callback connection is marked "in progress" while this 195 * work is going on and then eventually marked either OK or FAILED. 196 * This work can be done as part of a separate thread and at the end 197 * of this the thread will exit or it may be done such that the caller 198 * will continue with other work. 199 */ 200 static void 201 rfs4_do_cb_null(rfs4_client_t *cp) 202 { 203 struct timeval tv; 204 CLIENT *ch; 205 rfs4_cbstate_t newstate; 206 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 207 208 mutex_enter(cbp->cb_lock); 209 /* If another thread is doing CB_NULL RPC then return */ 210 if (cbp->cb_nullcaller == TRUE) { 211 mutex_exit(cbp->cb_lock); 212 rfs4_client_rele(cp); 213 return; 214 } 215 216 /* Mark the cbinfo as having a thread in the NULL callback */ 217 cbp->cb_nullcaller = TRUE; 218 219 /* 220 * Are there other threads still using the cbinfo client 221 * handles? If so, this thread must wait before going and 222 * mucking aroiund with the callback information 223 */ 224 while (cbp->cb_refcnt != 0) 225 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 226 227 /* 228 * This thread itself may find that new callback info has 229 * arrived and is set up to handle this case and redrive the 230 * call to the client's callback server. 231 */ 232 retry: 233 if (cbp->cb_newer.cb_new == TRUE && 234 cbp->cb_newer.cb_confirmed == TRUE) { 235 char *addr = cbp->cb_callback.cb_location.r_addr; 236 char *netid = cbp->cb_callback.cb_location.r_netid; 237 238 /* 239 * Free the old stuff if it exists; may be the first 240 * time through this path 241 */ 242 if (addr) 243 kmem_free(addr, strlen(addr) + 1); 244 if (netid) 245 kmem_free(netid, strlen(netid) + 1); 246 247 /* Move over the addr/netid */ 248 cbp->cb_callback.cb_location.r_addr = 249 cbp->cb_newer.cb_callback.cb_location.r_addr; 250 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 251 cbp->cb_callback.cb_location.r_netid = 252 cbp->cb_newer.cb_callback.cb_location.r_netid; 253 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 254 255 /* Get the program number */ 256 cbp->cb_callback.cb_program = 257 cbp->cb_newer.cb_callback.cb_program; 258 cbp->cb_newer.cb_callback.cb_program = 0; 259 260 /* Don't forget the protocol's "cb_ident" field */ 261 cbp->cb_ident = cbp->cb_newer.cb_ident; 262 cbp->cb_newer.cb_ident = 0; 263 264 /* no longer new */ 265 cbp->cb_newer.cb_new = FALSE; 266 cbp->cb_newer.cb_confirmed = FALSE; 267 268 /* get rid of the old client handles that may exist */ 269 rfs4_cb_chflush(cbp); 270 271 cbp->cb_state = CB_NONE; 272 cbp->cb_timefailed = 0; /* reset the clock */ 273 cbp->cb_notified_of_cb_path_down = TRUE; 274 } 275 276 if (cbp->cb_state != CB_NONE) { 277 cv_broadcast(cbp->cb_cv); /* let the others know */ 278 cbp->cb_nullcaller = FALSE; 279 mutex_exit(cbp->cb_lock); 280 rfs4_client_rele(cp); 281 return; 282 } 283 284 /* mark rfs4_client_t as CALLBACK NULL in progress */ 285 cbp->cb_state = CB_INPROG; 286 mutex_exit(cbp->cb_lock); 287 288 /* get/generate a client handle */ 289 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 290 mutex_enter(cbp->cb_lock); 291 cbp->cb_state = CB_BAD; 292 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 293 goto retry; 294 } 295 296 297 tv.tv_sec = 30; 298 tv.tv_usec = 0; 299 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 300 newstate = CB_BAD; 301 } else { 302 newstate = CB_OK; 303 #ifdef DEBUG 304 rfs4_cb_null++; 305 #endif 306 } 307 308 /* Check to see if the client has specified new callback info */ 309 mutex_enter(cbp->cb_lock); 310 rfs4_cb_freech(cbp, ch, TRUE); 311 if (cbp->cb_newer.cb_new == TRUE && 312 cbp->cb_newer.cb_confirmed == TRUE) { 313 goto retry; /* give the CB_NULL another chance */ 314 } 315 316 cbp->cb_state = newstate; 317 if (cbp->cb_state == CB_BAD) 318 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 319 320 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 321 cbp->cb_nullcaller = FALSE; 322 mutex_exit(cbp->cb_lock); 323 324 rfs4_client_rele(cp); 325 } 326 327 /* 328 * Given a client struct, inspect the callback info to see if the 329 * callback path is up and available. 330 * 331 * If new callback path is available and no one has set it up then 332 * try to set it up. If setup is not successful after 5 tries (5 secs) 333 * then gives up and returns NULL. 334 * 335 * If callback path is being initialized, then wait for the CB_NULL RPC 336 * call to occur. 337 */ 338 static rfs4_cbinfo_t * 339 rfs4_cbinfo_hold(rfs4_client_t *cp) 340 { 341 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 342 int retries = 0; 343 344 mutex_enter(cbp->cb_lock); 345 346 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 347 /* 348 * Looks like a new callback path may be available and 349 * noone has set it up. 350 */ 351 mutex_exit(cbp->cb_lock); 352 rfs4_dbe_hold(cp->rc_dbe); 353 rfs4_do_cb_null(cp); /* caller will release client hold */ 354 355 mutex_enter(cbp->cb_lock); 356 /* 357 * If callback path is no longer new, or it's being setup 358 * then stop and wait for it to be done. 359 */ 360 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE) 361 break; 362 mutex_exit(cbp->cb_lock); 363 364 if (++retries >= rfs4_max_setup_cb_tries) 365 return (NULL); 366 delay(hz); 367 mutex_enter(cbp->cb_lock); 368 } 369 370 /* Is there a thread working on doing the CB_NULL RPC? */ 371 if (cbp->cb_nullcaller == TRUE) 372 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 373 374 /* If the callback path is not okay (up and running), just quit */ 375 if (cbp->cb_state != CB_OK) { 376 mutex_exit(cbp->cb_lock); 377 return (NULL); 378 } 379 380 /* Let someone know we are using the current callback info */ 381 cbp->cb_refcnt++; 382 mutex_exit(cbp->cb_lock); 383 return (cbp); 384 } 385 386 /* 387 * The caller is done with the callback info. It may be that the 388 * caller's RPC failed and the NFSv4 client has actually provided new 389 * callback information. If so, let the caller know so they can 390 * advantage of this and maybe retry the RPC that originally failed. 391 */ 392 static int 393 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 394 { 395 int cb_new = FALSE; 396 397 mutex_enter(cbp->cb_lock); 398 399 /* The caller gets a chance to mark the callback info as bad */ 400 if (newstate != CB_NOCHANGE) 401 cbp->cb_state = newstate; 402 if (newstate == CB_FAILED) { 403 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 404 cbp->cb_notified_of_cb_path_down = FALSE; 405 } 406 407 cbp->cb_refcnt--; /* no longer using the information */ 408 409 /* 410 * A thread may be waiting on this one to finish and if so, 411 * let it know that it is okay to do the CB_NULL to the 412 * client's callback server. 413 */ 414 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 415 cv_broadcast(cbp->cb_cv_nullcaller); 416 417 /* 418 * If this is the last thread to use the callback info and 419 * there is new callback information to try and no thread is 420 * there ready to do the CB_NULL, then return true to teh 421 * caller so they can do the CB_NULL 422 */ 423 if (cbp->cb_refcnt == 0 && 424 cbp->cb_nullcaller == FALSE && 425 cbp->cb_newer.cb_new == TRUE && 426 cbp->cb_newer.cb_confirmed == TRUE) 427 cb_new = TRUE; 428 429 mutex_exit(cbp->cb_lock); 430 431 return (cb_new); 432 } 433 434 /* 435 * Given the information in the callback info struct, create a client 436 * handle that can be used by the server for its callback path. 437 */ 438 static CLIENT * 439 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 440 { 441 struct knetconfig knc; 442 vnode_t *vp; 443 struct sockaddr_in addr4; 444 struct sockaddr_in6 addr6; 445 void *addr, *taddr; 446 in_port_t *pp; 447 int af; 448 char *devnam; 449 struct netbuf nb; 450 int size; 451 CLIENT *ch = NULL; 452 int useresvport = 0; 453 454 mutex_enter(cbp->cb_lock); 455 456 if (cbp->cb_callback.cb_location.r_netid == NULL || 457 cbp->cb_callback.cb_location.r_addr == NULL) { 458 goto cb_init_out; 459 } 460 461 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 462 knc.knc_semantics = NC_TPI_COTS; 463 knc.knc_protofmly = "inet"; 464 knc.knc_proto = "tcp"; 465 devnam = "/dev/tcp"; 466 af = AF_INET; 467 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 468 == 0) { 469 knc.knc_semantics = NC_TPI_CLTS; 470 knc.knc_protofmly = "inet"; 471 knc.knc_proto = "udp"; 472 devnam = "/dev/udp"; 473 af = AF_INET; 474 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 475 == 0) { 476 knc.knc_semantics = NC_TPI_COTS; 477 knc.knc_protofmly = "inet6"; 478 knc.knc_proto = "tcp"; 479 devnam = "/dev/tcp6"; 480 af = AF_INET6; 481 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 482 == 0) { 483 knc.knc_semantics = NC_TPI_CLTS; 484 knc.knc_protofmly = "inet6"; 485 knc.knc_proto = "udp"; 486 devnam = "/dev/udp6"; 487 af = AF_INET6; 488 } else { 489 goto cb_init_out; 490 } 491 492 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 493 494 goto cb_init_out; 495 } 496 497 if (vp->v_type != VCHR) { 498 VN_RELE(vp); 499 goto cb_init_out; 500 } 501 502 knc.knc_rdev = vp->v_rdev; 503 504 VN_RELE(vp); 505 506 if (af == AF_INET) { 507 size = sizeof (addr4); 508 bzero(&addr4, size); 509 addr4.sin_family = (sa_family_t)af; 510 addr = &addr4.sin_addr; 511 pp = &addr4.sin_port; 512 taddr = &addr4; 513 } else /* AF_INET6 */ { 514 size = sizeof (addr6); 515 bzero(&addr6, size); 516 addr6.sin6_family = (sa_family_t)af; 517 addr = &addr6.sin6_addr; 518 pp = &addr6.sin6_port; 519 taddr = &addr6; 520 } 521 522 if (uaddr2sockaddr(af, 523 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 524 525 goto cb_init_out; 526 } 527 528 529 nb.maxlen = nb.len = size; 530 nb.buf = (char *)taddr; 531 532 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 533 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 534 535 ch = NULL; 536 } 537 538 /* turn off reserved port usage */ 539 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 540 541 cb_init_out: 542 mutex_exit(cbp->cb_lock); 543 return (ch); 544 } 545 546 /* 547 * Iterate over the client handle cache and 548 * destroy it. 549 */ 550 static void 551 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 552 { 553 CLIENT *ch; 554 555 while (cbp->cb_chc_free) { 556 cbp->cb_chc_free--; 557 ch = cbp->cb_chc[cbp->cb_chc_free]; 558 cbp->cb_chc[cbp->cb_chc_free] = NULL; 559 if (ch) { 560 if (ch->cl_auth) 561 auth_destroy(ch->cl_auth); 562 clnt_destroy(ch); 563 } 564 } 565 } 566 567 /* 568 * Return a client handle, either from a the small 569 * rfs4_client_t cache or one that we just created. 570 */ 571 static CLIENT * 572 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 573 { 574 CLIENT *cbch = NULL; 575 uint32_t zilch = 0; 576 577 mutex_enter(cbp->cb_lock); 578 579 if (cbp->cb_chc_free) { 580 cbp->cb_chc_free--; 581 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 582 mutex_exit(cbp->cb_lock); 583 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 584 return (cbch); 585 } 586 587 mutex_exit(cbp->cb_lock); 588 589 /* none free so make it now */ 590 cbch = rfs4_cbch_init(cbp); 591 592 return (cbch); 593 } 594 595 /* 596 * Return the client handle to the small cache or 597 * destroy it. 598 */ 599 static void 600 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 601 { 602 if (lockheld == FALSE) 603 mutex_enter(cbp->cb_lock); 604 605 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 606 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 607 if (lockheld == FALSE) 608 mutex_exit(cbp->cb_lock); 609 return; 610 } 611 if (lockheld == FALSE) 612 mutex_exit(cbp->cb_lock); 613 614 /* 615 * cache maxed out of free entries, obliterate 616 * this client handle, destroy it, throw it away. 617 */ 618 if (ch->cl_auth) 619 auth_destroy(ch->cl_auth); 620 clnt_destroy(ch); 621 } 622 623 /* 624 * With the supplied callback information - initialize the client 625 * callback data. If there is a callback in progress, save the 626 * callback info so that a thread can pick it up in the future. 627 */ 628 void 629 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 630 { 631 char *addr = NULL; 632 char *netid = NULL; 633 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo; 634 size_t len; 635 636 /* Set the call back for the client */ 637 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 638 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 639 len = strlen(cb->cb_location.r_addr) + 1; 640 addr = kmem_alloc(len, KM_SLEEP); 641 bcopy(cb->cb_location.r_addr, addr, len); 642 len = strlen(cb->cb_location.r_netid) + 1; 643 netid = kmem_alloc(len, KM_SLEEP); 644 bcopy(cb->cb_location.r_netid, netid, len); 645 } 646 /* ready to save the new information but first free old, if exists */ 647 mutex_enter(cbp->cb_lock); 648 649 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 650 651 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 652 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 653 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 654 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 655 656 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 657 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 658 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 659 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 660 661 cbp->cb_newer.cb_ident = cb_ident; 662 663 if (addr && *addr && netid && *netid) { 664 cbp->cb_newer.cb_new = TRUE; 665 cbp->cb_newer.cb_confirmed = FALSE; 666 } else { 667 cbp->cb_newer.cb_new = FALSE; 668 cbp->cb_newer.cb_confirmed = FALSE; 669 } 670 671 mutex_exit(cbp->cb_lock); 672 } 673 674 /* 675 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 676 * information may have been provided on SETCLIENTID and this call 677 * marks that information as confirmed and then starts a thread to 678 * test the callback path. 679 */ 680 void 681 rfs4_deleg_cb_check(rfs4_client_t *cp) 682 { 683 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE) 684 return; 685 686 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE; 687 688 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ 689 690 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 691 minclsyspri); 692 } 693 694 static void 695 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 696 { 697 CB_RECALL4args *rec_argp; 698 699 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 700 if (rec_argp->fh.nfs_fh4_val) 701 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 702 } 703 704 /* ARGSUSED */ 705 static void 706 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 707 { 708 CB_GETATTR4args *argp; 709 710 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 711 if (argp->fh.nfs_fh4_val) 712 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 713 } 714 715 static void 716 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 717 { 718 int i, arglen; 719 nfs_cb_argop4 *argop; 720 721 /* 722 * First free any special args alloc'd for specific ops. 723 */ 724 arglen = args->array_len; 725 argop = args->array; 726 for (i = 0; i < arglen; i++, argop++) { 727 728 switch (argop->argop) { 729 case OP_CB_RECALL: 730 rfs4args_cb_recall_free(argop); 731 break; 732 733 case OP_CB_GETATTR: 734 rfs4args_cb_getattr_free(argop); 735 break; 736 737 default: 738 return; 739 } 740 } 741 742 if (args->tag.utf8string_len > 0) 743 UTF8STRING_FREE(args->tag) 744 745 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 746 if (resp) 747 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 748 } 749 750 /* 751 * General callback routine for the server to the client. 752 */ 753 static enum clnt_stat 754 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 755 CB_COMPOUND4res *res, struct timeval timeout) 756 { 757 rfs4_cbinfo_t *cbp; 758 CLIENT *ch; 759 /* start with this in case cb_getch() fails */ 760 enum clnt_stat stat = RPC_FAILED; 761 762 res->tag.utf8string_val = NULL; 763 res->array = NULL; 764 765 retry: 766 cbp = rfs4_cbinfo_hold(cp); 767 if (cbp == NULL) 768 return (stat); 769 770 /* get a client handle */ 771 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 772 /* 773 * reset the cb_ident since it may have changed in 774 * rfs4_cbinfo_hold() 775 */ 776 args->callback_ident = cbp->cb_ident; 777 778 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 779 (caddr_t)args, xdr_CB_COMPOUND4res, 780 (caddr_t)res, timeout); 781 782 /* free client handle */ 783 rfs4_cb_freech(cbp, ch, FALSE); 784 } 785 786 /* 787 * If the rele says that there may be new callback info then 788 * retry this sequence and it may succeed as a result of the 789 * new callback path 790 */ 791 if (rfs4_cbinfo_rele(cbp, 792 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 793 goto retry; 794 795 return (stat); 796 } 797 798 /* 799 * Used by the NFSv4 server to get attributes for a file while 800 * handling the case where a file has been write delegated. For the 801 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 802 * not undertaken. This call site is maintained in case the server is 803 * updated in the future to handle write delegation space guarantees. 804 */ 805 nfsstat4 806 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 807 { 808 809 int error; 810 811 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 812 return (puterrno4(error)); 813 } 814 815 /* 816 * This is used everywhere in the v2/v3 server to allow the 817 * integration of all NFS versions and the support of delegation. For 818 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 819 * in the future to provide space guarantees for write delegations 820 * then this call site should be expanded to interact with the client. 821 */ 822 int 823 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 824 { 825 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 826 } 827 828 /* 829 * Place the actual cb_recall otw call to client. 830 */ 831 static void 832 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 833 { 834 CB_COMPOUND4args cb4_args; 835 CB_COMPOUND4res cb4_res; 836 CB_RECALL4args *rec_argp; 837 CB_RECALL4res *rec_resp; 838 nfs_cb_argop4 *argop; 839 int numops; 840 int argoplist_size; 841 struct timeval timeout; 842 nfs_fh4 *fhp; 843 enum clnt_stat call_stat; 844 845 /* 846 * set up the compound args 847 */ 848 numops = 1; /* CB_RECALL only */ 849 850 argoplist_size = numops * sizeof (nfs_cb_argop4); 851 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 852 argop->argop = OP_CB_RECALL; 853 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 854 855 (void) str_to_utf8("cb_recall", &cb4_args.tag); 856 cb4_args.minorversion = CB4_MINORVERSION; 857 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 858 cb4_args.array_len = numops; 859 cb4_args.array = argop; 860 861 /* 862 * fill in the args struct 863 */ 864 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 865 rec_argp->truncate = trunc; 866 867 fhp = &dsp->rds_finfo->rf_filehandle; 868 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 869 fhp->nfs_fh4_len, KM_SLEEP); 870 nfs_fh4_copy(fhp, &rec_argp->fh); 871 872 /* Keep track of when we did this for observability */ 873 dsp->rds_time_recalled = gethrestime_sec(); 874 875 /* 876 * Set up the timeout for the callback and make the actual call. 877 * Timeout will be 80% of the lease period for this server. 878 */ 879 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 880 timeout.tv_usec = 0; 881 882 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client, 883 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 884 885 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res, 886 timeout); 887 888 rec_resp = (cb4_res.array_len == 0) ? NULL : 889 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 890 891 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client, 892 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 893 894 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 895 rfs4_return_deleg(dsp, TRUE); 896 } 897 898 rfs4freeargres(&cb4_args, &cb4_res); 899 } 900 901 struct recall_arg { 902 rfs4_deleg_state_t *dsp; 903 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 904 bool_t trunc; 905 }; 906 907 static void 908 do_recall(struct recall_arg *arg) 909 { 910 rfs4_deleg_state_t *dsp = arg->dsp; 911 rfs4_file_t *fp = dsp->rds_finfo; 912 callb_cpr_t cpr_info; 913 kmutex_t cpr_lock; 914 915 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 916 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 917 918 /* 919 * It is possible that before this thread starts 920 * the client has send us a return_delegation, and 921 * if that is the case we do not need to send the 922 * recall callback. 923 */ 924 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) { 925 DTRACE_PROBE3(nfss__i__recall, 926 struct recall_arg *, arg, 927 struct rfs4_deleg_state_t *, dsp, 928 struct rfs4_file_t *, fp); 929 930 if (arg->recall) 931 (void) (*arg->recall)(dsp, arg->trunc); 932 } 933 934 mutex_enter(fp->rf_dinfo.rd_recall_lock); 935 /* 936 * Recall count may go negative if the parent thread that is 937 * creating the individual callback threads does not modify 938 * the recall_count field before the callback thread actually 939 * gets a response from the CB_RECALL 940 */ 941 fp->rf_dinfo.rd_recall_count--; 942 if (fp->rf_dinfo.rd_recall_count == 0) 943 cv_signal(fp->rf_dinfo.rd_recall_cv); 944 mutex_exit(fp->rf_dinfo.rd_recall_lock); 945 946 mutex_enter(&cpr_lock); 947 CALLB_CPR_EXIT(&cpr_info); 948 mutex_destroy(&cpr_lock); 949 950 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 951 952 kmem_free(arg, sizeof (struct recall_arg)); 953 } 954 955 struct master_recall_args { 956 rfs4_file_t *fp; 957 void (*recall)(rfs4_deleg_state_t *, bool_t); 958 bool_t trunc; 959 }; 960 961 static void 962 do_recall_file(struct master_recall_args *map) 963 { 964 rfs4_file_t *fp = map->fp; 965 rfs4_deleg_state_t *dsp; 966 struct recall_arg *arg; 967 callb_cpr_t cpr_info; 968 kmutex_t cpr_lock; 969 int32_t recall_count; 970 971 rfs4_dbe_lock(fp->rf_dbe); 972 973 /* Recall already in progress ? */ 974 mutex_enter(fp->rf_dinfo.rd_recall_lock); 975 if (fp->rf_dinfo.rd_recall_count != 0) { 976 mutex_exit(fp->rf_dinfo.rd_recall_lock); 977 rfs4_dbe_rele_nolock(fp->rf_dbe); 978 rfs4_dbe_unlock(fp->rf_dbe); 979 kmem_free(map, sizeof (struct master_recall_args)); 980 return; 981 } 982 983 mutex_exit(fp->rf_dinfo.rd_recall_lock); 984 985 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 986 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 987 988 recall_count = 0; 989 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 990 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 991 992 rfs4_dbe_lock(dsp->rds_dbe); 993 /* 994 * if this delegation state 995 * is being reaped skip it 996 */ 997 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) { 998 rfs4_dbe_unlock(dsp->rds_dbe); 999 continue; 1000 } 1001 1002 /* hold for receiving thread */ 1003 rfs4_dbe_hold(dsp->rds_dbe); 1004 rfs4_dbe_unlock(dsp->rds_dbe); 1005 1006 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1007 arg->recall = map->recall; 1008 arg->trunc = map->trunc; 1009 arg->dsp = dsp; 1010 1011 recall_count++; 1012 1013 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1014 minclsyspri); 1015 } 1016 1017 rfs4_dbe_unlock(fp->rf_dbe); 1018 1019 mutex_enter(fp->rf_dinfo.rd_recall_lock); 1020 /* 1021 * Recall count may go negative if the parent thread that is 1022 * creating the individual callback threads does not modify 1023 * the recall_count field before the callback thread actually 1024 * gets a response from the CB_RECALL 1025 */ 1026 fp->rf_dinfo.rd_recall_count += recall_count; 1027 while (fp->rf_dinfo.rd_recall_count) 1028 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock); 1029 1030 mutex_exit(fp->rf_dinfo.rd_recall_lock); 1031 1032 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1033 rfs4_file_rele(fp); 1034 kmem_free(map, sizeof (struct master_recall_args)); 1035 mutex_enter(&cpr_lock); 1036 CALLB_CPR_EXIT(&cpr_info); 1037 mutex_destroy(&cpr_lock); 1038 } 1039 1040 static void 1041 rfs4_recall_file(rfs4_file_t *fp, 1042 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1043 bool_t trunc, rfs4_client_t *cp) 1044 { 1045 struct master_recall_args *args; 1046 1047 rfs4_dbe_lock(fp->rf_dbe); 1048 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1049 rfs4_dbe_unlock(fp->rf_dbe); 1050 return; 1051 } 1052 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */ 1053 1054 /* 1055 * Mark the time we started the recall processing. 1056 * If it has been previously recalled, do not reset the 1057 * timer since this is used for the revocation decision. 1058 */ 1059 if (fp->rf_dinfo.rd_time_recalled == 0) 1060 fp->rf_dinfo.rd_time_recalled = gethrestime_sec(); 1061 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */ 1062 /* Client causing recall not always available */ 1063 if (cp) 1064 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid; 1065 1066 rfs4_dbe_unlock(fp->rf_dbe); 1067 1068 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1069 args->fp = fp; 1070 args->recall = recall; 1071 args->trunc = trunc; 1072 1073 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1074 minclsyspri); 1075 } 1076 1077 void 1078 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1079 { 1080 time_t elapsed1, elapsed2; 1081 1082 if (fp->rf_dinfo.rd_time_recalled != 0) { 1083 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled; 1084 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite; 1085 /* First check to see if a revocation should occur */ 1086 if (elapsed1 > rfs4_lease_time && 1087 elapsed2 > rfs4_lease_time) { 1088 rfs4_revoke_file(fp); 1089 return; 1090 } 1091 /* 1092 * Next check to see if a recall should be done again 1093 * so quickly. 1094 */ 1095 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1096 return; 1097 } 1098 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1099 } 1100 1101 /* 1102 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1103 * open conflicts with the delegation. 1104 * Return true if we need recall otherwise false. 1105 * Assumes entry locks for sp and sp->rs_finfo are held. 1106 */ 1107 bool_t 1108 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1109 { 1110 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype; 1111 1112 switch (dtype) { 1113 case OPEN_DELEGATE_NONE: 1114 /* Not currently delegated so there is nothing to do */ 1115 return (FALSE); 1116 case OPEN_DELEGATE_READ: 1117 /* 1118 * If the access is only asking for READ then there is 1119 * no conflict and nothing to do. If it is asking 1120 * for write, then there will be conflict and the read 1121 * delegation should be recalled. 1122 */ 1123 if (access == OPEN4_SHARE_ACCESS_READ) 1124 return (FALSE); 1125 else 1126 return (TRUE); 1127 case OPEN_DELEGATE_WRITE: 1128 /* Check to see if this client has the delegation */ 1129 return (rfs4_is_deleg(sp)); 1130 } 1131 1132 return (FALSE); 1133 } 1134 1135 /* 1136 * Return the "best" allowable delegation available given the current 1137 * delegation type and the desired access and deny modes on the file. 1138 * At the point that this routine is called we know that the access and 1139 * deny modes are consistent with the file modes. 1140 */ 1141 static open_delegation_type4 1142 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1143 { 1144 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype; 1145 uint32_t access = sp->rs_share_access; 1146 uint32_t deny = sp->rs_share_deny; 1147 int readcnt = 0; 1148 int writecnt = 0; 1149 1150 switch (dtype) { 1151 case OPEN_DELEGATE_NONE: 1152 /* 1153 * Determine if more than just this OPEN have the file 1154 * open and if so, no delegation may be provided to 1155 * the client. 1156 */ 1157 if (access & OPEN4_SHARE_ACCESS_WRITE) 1158 writecnt++; 1159 if (access & OPEN4_SHARE_ACCESS_READ) 1160 readcnt++; 1161 1162 if (fp->rf_access_read > readcnt || 1163 fp->rf_access_write > writecnt) 1164 return (OPEN_DELEGATE_NONE); 1165 1166 /* 1167 * If the client is going to write, or if the client 1168 * has exclusive access, return a write delegation. 1169 */ 1170 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1171 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1172 return (OPEN_DELEGATE_WRITE); 1173 /* 1174 * If we don't want to write or we've haven't denied read 1175 * access to others, return a read delegation. 1176 */ 1177 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1178 (deny & ~OPEN4_SHARE_DENY_READ)) 1179 return (OPEN_DELEGATE_READ); 1180 1181 /* Shouldn't get here */ 1182 return (OPEN_DELEGATE_NONE); 1183 1184 case OPEN_DELEGATE_READ: 1185 /* 1186 * If the file is delegated for read but we wan't to 1187 * write or deny others to read then we can't delegate 1188 * the file. We shouldn't get here since the delegation should 1189 * have been recalled already. 1190 */ 1191 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1192 (deny & OPEN4_SHARE_DENY_READ)) 1193 return (OPEN_DELEGATE_NONE); 1194 return (OPEN_DELEGATE_READ); 1195 1196 case OPEN_DELEGATE_WRITE: 1197 return (OPEN_DELEGATE_WRITE); 1198 } 1199 1200 /* Shouldn't get here */ 1201 return (OPEN_DELEGATE_NONE); 1202 } 1203 1204 /* 1205 * Given the desired delegation type and the "history" of the file 1206 * determine the actual delegation type to return. 1207 */ 1208 static open_delegation_type4 1209 rfs4_delegation_policy(open_delegation_type4 dtype, 1210 rfs4_dinfo_t *dinfo, clientid4 cid) 1211 { 1212 time_t elapsed; 1213 1214 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1215 return (OPEN_DELEGATE_NONE); 1216 1217 /* 1218 * Has this file/delegation ever been recalled? If not then 1219 * no further checks for a delegation race need to be done. 1220 * However if a recall has occurred, then check to see if a 1221 * client has caused its own delegation recall to occur. If 1222 * not, then has a delegation for this file been returned 1223 * recently? If so, then do not assign a new delegation to 1224 * avoid a "delegation race" between the original client and 1225 * the new/conflicting client. 1226 */ 1227 if (dinfo->rd_ever_recalled == TRUE) { 1228 if (dinfo->rd_conflicted_client != cid) { 1229 elapsed = gethrestime_sec() - dinfo->rd_time_returned; 1230 if (elapsed < rfs4_lease_time) 1231 return (OPEN_DELEGATE_NONE); 1232 } 1233 } 1234 1235 /* Limit the number of read grants */ 1236 if (dtype == OPEN_DELEGATE_READ && 1237 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS) 1238 return (OPEN_DELEGATE_NONE); 1239 1240 /* 1241 * Should consider limiting total number of read/write 1242 * delegations the server will permit. 1243 */ 1244 1245 return (dtype); 1246 } 1247 1248 /* 1249 * Try and grant a delegation for an open give the state. The routine 1250 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1251 * 1252 * The state and associate file entry must be locked 1253 */ 1254 rfs4_deleg_state_t * 1255 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1256 { 1257 rfs4_file_t *fp = sp->rs_finfo; 1258 open_delegation_type4 dtype; 1259 int no_delegation; 1260 1261 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1262 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1263 1264 /* Is the server even providing delegations? */ 1265 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1266 return (NULL); 1267 1268 /* Check to see if delegations have been temporarily disabled */ 1269 mutex_enter(&rfs4_deleg_lock); 1270 no_delegation = rfs4_deleg_disabled; 1271 mutex_exit(&rfs4_deleg_lock); 1272 1273 if (no_delegation) 1274 return (NULL); 1275 1276 /* Don't grant a delegation if a deletion is impending. */ 1277 if (fp->rf_dinfo.rd_hold_grant > 0) { 1278 return (NULL); 1279 } 1280 1281 /* 1282 * Don't grant a delegation if there are any lock manager 1283 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1284 * if there are only read locks we should be able to grant a 1285 * read-only delegation), but it's good enough for now. 1286 * 1287 * MT safety: the lock manager checks for conflicting delegations 1288 * before processing a lock request. That check will block until 1289 * we are done here. So if the lock manager acquires a lock after 1290 * we decide to grant the delegation, the delegation will get 1291 * immediately recalled (if there's a conflict), so we're safe. 1292 */ 1293 if (lm_vp_active(fp->rf_vp)) { 1294 return (NULL); 1295 } 1296 1297 /* 1298 * Based on the type of delegation request passed in, take the 1299 * appropriate action (DELEG_NONE is handled above) 1300 */ 1301 switch (dreq) { 1302 1303 case DELEG_READ: 1304 case DELEG_WRITE: 1305 /* 1306 * The server "must" grant the delegation in this case. 1307 * Client is using open previous 1308 */ 1309 dtype = (open_delegation_type4)dreq; 1310 *recall = 1; 1311 break; 1312 case DELEG_ANY: 1313 /* 1314 * If a valid callback path does not exist, no delegation may 1315 * be granted. 1316 */ 1317 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK) 1318 return (NULL); 1319 1320 /* 1321 * If the original operation which caused time_rm_delayed 1322 * to be set hasn't been retried and completed for one 1323 * full lease period, clear it and allow delegations to 1324 * get granted again. 1325 */ 1326 if (fp->rf_dinfo.rd_time_rm_delayed > 0 && 1327 gethrestime_sec() > 1328 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time) 1329 fp->rf_dinfo.rd_time_rm_delayed = 0; 1330 1331 /* 1332 * If we are waiting for a delegation to be returned then 1333 * don't delegate this file. We do this for correctness as 1334 * well as if the file is being recalled we would likely 1335 * recall this file again. 1336 */ 1337 1338 if (fp->rf_dinfo.rd_time_recalled != 0 || 1339 fp->rf_dinfo.rd_time_rm_delayed != 0) 1340 return (NULL); 1341 1342 /* Get the "best" delegation candidate */ 1343 dtype = rfs4_check_delegation(sp, fp); 1344 1345 if (dtype == OPEN_DELEGATE_NONE) 1346 return (NULL); 1347 1348 /* 1349 * Based on policy and the history of the file get the 1350 * actual delegation. 1351 */ 1352 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo, 1353 sp->rs_owner->ro_client->rc_clientid); 1354 1355 if (dtype == OPEN_DELEGATE_NONE) 1356 return (NULL); 1357 break; 1358 default: 1359 return (NULL); 1360 } 1361 1362 /* set the delegation for the state */ 1363 return (rfs4_deleg_state(sp, dtype, recall)); 1364 } 1365 1366 void 1367 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1368 nfsace4 *ace, int recall) 1369 { 1370 open_write_delegation4 *wp; 1371 open_read_delegation4 *rp; 1372 nfs_space_limit4 *spl; 1373 nfsace4 nace; 1374 1375 /* 1376 * We need to allocate a new copy of the who string. 1377 * this string will be freed by the rfs4_op_open dis_resfree 1378 * routine. We need to do this allocation since replays will 1379 * be allocated and rfs4_compound can't tell the difference from 1380 * a replay and an inital open. N.B. if an ace is passed in, it 1381 * the caller's responsibility to free it. 1382 */ 1383 1384 if (ace == NULL) { 1385 /* 1386 * Default is to deny all access, the client will have 1387 * to contact the server. XXX Do we want to actually 1388 * set a deny for every one, or do we simply want to 1389 * construct an entity that will match no one? 1390 */ 1391 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1392 nace.flag = 0; 1393 nace.access_mask = ACE4_VALID_MASK_BITS; 1394 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1395 } else { 1396 nace.type = ace->type; 1397 nace.flag = ace->flag; 1398 nace.access_mask = ace->access_mask; 1399 (void) utf8_copy(&ace->who, &nace.who); 1400 } 1401 1402 dp->delegation_type = dsp->rds_dtype; 1403 1404 switch (dsp->rds_dtype) { 1405 case OPEN_DELEGATE_NONE: 1406 break; 1407 case OPEN_DELEGATE_READ: 1408 rp = &dp->open_delegation4_u.read; 1409 rp->stateid = dsp->rds_delegid.stateid; 1410 rp->recall = (bool_t)recall; 1411 rp->permissions = nace; 1412 break; 1413 case OPEN_DELEGATE_WRITE: 1414 wp = &dp->open_delegation4_u.write; 1415 wp->stateid = dsp->rds_delegid.stateid; 1416 wp->recall = (bool_t)recall; 1417 spl = &wp->space_limit; 1418 spl->limitby = NFS_LIMIT_SIZE; 1419 spl->nfs_space_limit4_u.filesize = 0; 1420 wp->permissions = nace; 1421 break; 1422 } 1423 } 1424 1425 /* 1426 * Check if the file is delegated via the provided file struct. 1427 * Return TRUE if it is delegated. This is intended for use by 1428 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1429 * 1430 * Note that if the file is found to have a delegation, it is 1431 * recalled, unless the clientid of the caller matches the clientid of the 1432 * delegation. If the caller has specified, there is a slight delay 1433 * inserted in the hopes that the delegation will be returned quickly. 1434 */ 1435 bool_t 1436 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1437 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1438 { 1439 rfs4_deleg_state_t *dsp; 1440 1441 /* Is delegation enabled? */ 1442 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1443 return (FALSE); 1444 1445 /* do we have a delegation on this file? */ 1446 rfs4_dbe_lock(fp->rf_dbe); 1447 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 1448 if (is_rm) 1449 fp->rf_dinfo.rd_hold_grant++; 1450 rfs4_dbe_unlock(fp->rf_dbe); 1451 return (FALSE); 1452 } 1453 /* 1454 * do we have a write delegation on this file or are we 1455 * requesting write access to a file with any type of existing 1456 * delegation? 1457 */ 1458 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) { 1459 if (cp != NULL) { 1460 dsp = list_head(&fp->rf_delegstatelist); 1461 if (dsp == NULL) { 1462 rfs4_dbe_unlock(fp->rf_dbe); 1463 return (FALSE); 1464 } 1465 /* 1466 * Does the requestor already own the delegation? 1467 */ 1468 if (dsp->rds_client->rc_clientid == *(cp)) { 1469 rfs4_dbe_unlock(fp->rf_dbe); 1470 return (FALSE); 1471 } 1472 } 1473 1474 rfs4_dbe_unlock(fp->rf_dbe); 1475 rfs4_recall_deleg(fp, trunc, NULL); 1476 1477 if (!do_delay) { 1478 rfs4_dbe_lock(fp->rf_dbe); 1479 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1480 rfs4_dbe_unlock(fp->rf_dbe); 1481 return (TRUE); 1482 } 1483 1484 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1485 1486 rfs4_dbe_lock(fp->rf_dbe); 1487 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) { 1488 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec(); 1489 rfs4_dbe_unlock(fp->rf_dbe); 1490 return (TRUE); 1491 } 1492 } 1493 if (is_rm) 1494 fp->rf_dinfo.rd_hold_grant++; 1495 rfs4_dbe_unlock(fp->rf_dbe); 1496 return (FALSE); 1497 } 1498 1499 /* 1500 * Check if the file is delegated in the case of a v2 or v3 access. 1501 * Return TRUE if it is delegated which in turn means that v2 should 1502 * drop the request and in the case of v3 JUKEBOX should be returned. 1503 */ 1504 bool_t 1505 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1506 { 1507 rfs4_file_t *fp; 1508 bool_t create = FALSE; 1509 bool_t rc = FALSE; 1510 1511 rfs4_hold_deleg_policy(); 1512 1513 /* Is delegation enabled? */ 1514 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1515 fp = rfs4_findfile(vp, NULL, &create); 1516 if (fp != NULL) { 1517 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1518 TRUE, FALSE, NULL)) { 1519 rc = TRUE; 1520 } 1521 rfs4_file_rele(fp); 1522 } 1523 } 1524 rfs4_rele_deleg_policy(); 1525 return (rc); 1526 } 1527 1528 /* 1529 * Release a hold on the hold_grant counter which 1530 * prevents delegation from being granted while a remove 1531 * or a rename is in progress. 1532 */ 1533 void 1534 rfs4_clear_dont_grant(rfs4_file_t *fp) 1535 { 1536 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1537 return; 1538 rfs4_dbe_lock(fp->rf_dbe); 1539 ASSERT(fp->rf_dinfo.rd_hold_grant > 0); 1540 fp->rf_dinfo.rd_hold_grant--; 1541 fp->rf_dinfo.rd_time_rm_delayed = 0; 1542 rfs4_dbe_unlock(fp->rf_dbe); 1543 } 1544 1545 /* 1546 * State support for delegation. 1547 * Set the state delegation type for this state; 1548 * This routine is called from open via rfs4_grant_delegation and the entry 1549 * locks on sp and sp->rs_finfo are assumed. 1550 */ 1551 static rfs4_deleg_state_t * 1552 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1553 { 1554 rfs4_file_t *fp = sp->rs_finfo; 1555 bool_t create = TRUE; 1556 rfs4_deleg_state_t *dsp; 1557 vnode_t *vp; 1558 int open_prev = *recall; 1559 int ret; 1560 int fflags = 0; 1561 1562 ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); 1563 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1564 1565 /* Shouldn't happen */ 1566 if (fp->rf_dinfo.rd_recall_count != 0 || 1567 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1568 dtype != OPEN_DELEGATE_READ)) { 1569 return (NULL); 1570 } 1571 1572 /* Unlock to avoid deadlock */ 1573 rfs4_dbe_unlock(fp->rf_dbe); 1574 rfs4_dbe_unlock(sp->rs_dbe); 1575 1576 dsp = rfs4_finddeleg(sp, &create); 1577 1578 rfs4_dbe_lock(sp->rs_dbe); 1579 rfs4_dbe_lock(fp->rf_dbe); 1580 1581 if (dsp == NULL) 1582 return (NULL); 1583 1584 /* 1585 * It is possible that since we dropped the lock 1586 * in order to call finddeleg, the rfs4_file_t 1587 * was marked such that we should not grant a 1588 * delegation, if so bail out. 1589 */ 1590 if (fp->rf_dinfo.rd_hold_grant > 0) { 1591 rfs4_deleg_state_rele(dsp); 1592 return (NULL); 1593 } 1594 1595 if (create == FALSE) { 1596 if (sp->rs_owner->ro_client == dsp->rds_client && 1597 dsp->rds_dtype == dtype) { 1598 return (dsp); 1599 } else { 1600 rfs4_deleg_state_rele(dsp); 1601 return (NULL); 1602 } 1603 } 1604 1605 /* 1606 * Check that this file has not been delegated to another 1607 * client 1608 */ 1609 if (fp->rf_dinfo.rd_recall_count != 0 || 1610 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE || 1611 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ && 1612 dtype != OPEN_DELEGATE_READ)) { 1613 rfs4_deleg_state_rele(dsp); 1614 return (NULL); 1615 } 1616 1617 vp = fp->rf_vp; 1618 /* vnevent_support returns 0 if file system supports vnevents */ 1619 if (vnevent_support(vp, NULL)) { 1620 rfs4_deleg_state_rele(dsp); 1621 return (NULL); 1622 } 1623 1624 /* Calculate the fflags for this OPEN. */ 1625 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ) 1626 fflags |= FREAD; 1627 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE) 1628 fflags |= FWRITE; 1629 1630 *recall = 0; 1631 /* 1632 * Before granting a delegation we need to know if anyone else has 1633 * opened the file in a conflicting mode. However, first we need to 1634 * know how we opened the file to check the counts properly. 1635 */ 1636 if (dtype == OPEN_DELEGATE_READ) { 1637 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1638 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1639 vn_is_mapped(vp, V_WRITE)) { 1640 if (open_prev) { 1641 *recall = 1; 1642 } else { 1643 rfs4_deleg_state_rele(dsp); 1644 return (NULL); 1645 } 1646 } 1647 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1648 rfs4_mon_hold, rfs4_mon_rele); 1649 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1650 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1651 vn_is_mapped(vp, V_WRITE)) { 1652 if (open_prev) { 1653 *recall = 1; 1654 } else { 1655 (void) fem_uninstall(vp, deleg_rdops, 1656 (void *)fp); 1657 rfs4_deleg_state_rele(dsp); 1658 return (NULL); 1659 } 1660 } 1661 /* 1662 * Because a client can hold onto a delegation after the 1663 * file has been closed, we need to keep track of the 1664 * access to this file. Otherwise the CIFS server would 1665 * not know about the client accessing the file and could 1666 * inappropriately grant an OPLOCK. 1667 * fem_install() returns EBUSY when asked to install a 1668 * OPUNIQ monitor more than once. Therefore, check the 1669 * return code because we only want this done once. 1670 */ 1671 if (ret == 0) 1672 vn_open_upgrade(vp, FREAD); 1673 } else { /* WRITE */ 1674 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1675 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1676 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1677 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1678 vn_is_mapped(vp, V_RDORWR)) { 1679 if (open_prev) { 1680 *recall = 1; 1681 } else { 1682 rfs4_deleg_state_rele(dsp); 1683 return (NULL); 1684 } 1685 } 1686 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1687 rfs4_mon_hold, rfs4_mon_rele); 1688 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1689 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1690 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1691 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1692 vn_is_mapped(vp, V_RDORWR)) { 1693 if (open_prev) { 1694 *recall = 1; 1695 } else { 1696 (void) fem_uninstall(vp, deleg_wrops, 1697 (void *)fp); 1698 rfs4_deleg_state_rele(dsp); 1699 return (NULL); 1700 } 1701 } 1702 /* 1703 * Because a client can hold onto a delegation after the 1704 * file has been closed, we need to keep track of the 1705 * access to this file. Otherwise the CIFS server would 1706 * not know about the client accessing the file and could 1707 * inappropriately grant an OPLOCK. 1708 * fem_install() returns EBUSY when asked to install a 1709 * OPUNIQ monitor more than once. Therefore, check the 1710 * return code because we only want this done once. 1711 */ 1712 if (ret == 0) 1713 vn_open_upgrade(vp, FREAD|FWRITE); 1714 } 1715 /* Place on delegation list for file */ 1716 ASSERT(!list_link_active(&dsp->rds_node)); 1717 list_insert_tail(&fp->rf_delegstatelist, dsp); 1718 1719 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype; 1720 1721 /* Update delegation stats for this file */ 1722 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec(); 1723 1724 /* reset since this is a new delegation */ 1725 fp->rf_dinfo.rd_conflicted_client = 0; 1726 fp->rf_dinfo.rd_ever_recalled = FALSE; 1727 1728 if (dtype == OPEN_DELEGATE_READ) 1729 fp->rf_dinfo.rd_rdgrants++; 1730 else 1731 fp->rf_dinfo.rd_wrgrants++; 1732 1733 return (dsp); 1734 } 1735 1736 /* 1737 * State routine for the server when a delegation is returned. 1738 */ 1739 void 1740 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1741 { 1742 rfs4_file_t *fp = dsp->rds_finfo; 1743 open_delegation_type4 dtypewas; 1744 1745 rfs4_dbe_lock(fp->rf_dbe); 1746 1747 /* nothing to do if no longer on list */ 1748 if (!list_link_active(&dsp->rds_node)) { 1749 rfs4_dbe_unlock(fp->rf_dbe); 1750 return; 1751 } 1752 1753 /* Remove state from recall list */ 1754 list_remove(&fp->rf_delegstatelist, dsp); 1755 1756 if (list_is_empty(&fp->rf_delegstatelist)) { 1757 dtypewas = fp->rf_dinfo.rd_dtype; 1758 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE; 1759 rfs4_dbe_cv_broadcast(fp->rf_dbe); 1760 1761 /* if file system was unshared, the vp will be NULL */ 1762 if (fp->rf_vp != NULL) { 1763 /* 1764 * Once a delegation is no longer held by any client, 1765 * the monitor is uninstalled. At this point, the 1766 * client must send OPEN otw, so we don't need the 1767 * reference on the vnode anymore. The open 1768 * downgrade removes the reference put on earlier. 1769 */ 1770 if (dtypewas == OPEN_DELEGATE_READ) { 1771 (void) fem_uninstall(fp->rf_vp, deleg_rdops, 1772 (void *)fp); 1773 vn_open_downgrade(fp->rf_vp, FREAD); 1774 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1775 (void) fem_uninstall(fp->rf_vp, deleg_wrops, 1776 (void *)fp); 1777 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE); 1778 } 1779 } 1780 } 1781 1782 switch (dsp->rds_dtype) { 1783 case OPEN_DELEGATE_READ: 1784 fp->rf_dinfo.rd_rdgrants--; 1785 break; 1786 case OPEN_DELEGATE_WRITE: 1787 fp->rf_dinfo.rd_wrgrants--; 1788 break; 1789 default: 1790 break; 1791 } 1792 1793 /* used in the policy decision */ 1794 fp->rf_dinfo.rd_time_returned = gethrestime_sec(); 1795 1796 /* 1797 * reset the time_recalled field so future delegations are not 1798 * accidentally revoked 1799 */ 1800 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0) 1801 fp->rf_dinfo.rd_time_recalled = 0; 1802 1803 rfs4_dbe_unlock(fp->rf_dbe); 1804 1805 rfs4_dbe_lock(dsp->rds_dbe); 1806 1807 dsp->rds_dtype = OPEN_DELEGATE_NONE; 1808 1809 if (revoked == TRUE) 1810 dsp->rds_time_revoked = gethrestime_sec(); 1811 1812 rfs4_dbe_invalidate(dsp->rds_dbe); 1813 1814 rfs4_dbe_unlock(dsp->rds_dbe); 1815 1816 if (revoked == TRUE) { 1817 rfs4_dbe_lock(dsp->rds_client->rc_dbe); 1818 dsp->rds_client->rc_deleg_revoked++; /* observability */ 1819 rfs4_dbe_unlock(dsp->rds_client->rc_dbe); 1820 } 1821 } 1822 1823 static void 1824 rfs4_revoke_file(rfs4_file_t *fp) 1825 { 1826 rfs4_deleg_state_t *dsp; 1827 1828 /* 1829 * The lock for rfs4_file_t must be held when traversing the 1830 * delegation list but that lock needs to be released to call 1831 * rfs4_return_deleg() 1832 */ 1833 rfs4_dbe_lock(fp->rf_dbe); 1834 while (dsp = list_head(&fp->rf_delegstatelist)) { 1835 rfs4_dbe_hold(dsp->rds_dbe); 1836 rfs4_dbe_unlock(fp->rf_dbe); 1837 rfs4_return_deleg(dsp, TRUE); 1838 rfs4_deleg_state_rele(dsp); 1839 rfs4_dbe_lock(fp->rf_dbe); 1840 } 1841 rfs4_dbe_unlock(fp->rf_dbe); 1842 } 1843 1844 /* 1845 * A delegation is assumed to be present on the file associated with 1846 * "sp". Check to see if the delegation matches is associated with 1847 * the same client as referenced by "sp". If it is not, TRUE is 1848 * returned. If the delegation DOES match the client (or no 1849 * delegation is present), return FALSE. 1850 * Assume the state entry and file entry are locked. 1851 */ 1852 bool_t 1853 rfs4_is_deleg(rfs4_state_t *sp) 1854 { 1855 rfs4_deleg_state_t *dsp; 1856 rfs4_file_t *fp = sp->rs_finfo; 1857 rfs4_client_t *cp = sp->rs_owner->ro_client; 1858 1859 ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); 1860 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL; 1861 dsp = list_next(&fp->rf_delegstatelist, dsp)) { 1862 if (cp != dsp->rds_client) { 1863 return (TRUE); 1864 } 1865 } 1866 return (FALSE); 1867 } 1868 1869 void 1870 rfs4_disable_delegation(void) 1871 { 1872 mutex_enter(&rfs4_deleg_lock); 1873 rfs4_deleg_disabled++; 1874 mutex_exit(&rfs4_deleg_lock); 1875 } 1876 1877 void 1878 rfs4_enable_delegation(void) 1879 { 1880 mutex_enter(&rfs4_deleg_lock); 1881 ASSERT(rfs4_deleg_disabled > 0); 1882 rfs4_deleg_disabled--; 1883 mutex_exit(&rfs4_deleg_lock); 1884 } 1885 1886 void 1887 rfs4_mon_hold(void *arg) 1888 { 1889 rfs4_file_t *fp = arg; 1890 1891 rfs4_dbe_hold(fp->rf_dbe); 1892 } 1893 1894 void 1895 rfs4_mon_rele(void *arg) 1896 { 1897 rfs4_file_t *fp = arg; 1898 1899 rfs4_dbe_rele_nolock(fp->rf_dbe); 1900 } 1901