1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/systm.h> 29 #include <rpc/auth.h> 30 #include <rpc/clnt.h> 31 #include <nfs/nfs4_kprot.h> 32 #include <nfs/nfs4.h> 33 #include <nfs/lm.h> 34 #include <sys/cmn_err.h> 35 #include <sys/disp.h> 36 #include <sys/sdt.h> 37 38 #include <sys/pathname.h> 39 40 #include <sys/strsubr.h> 41 #include <sys/ddi.h> 42 43 #include <sys/vnode.h> 44 #include <sys/sdt.h> 45 #include <inet/common.h> 46 #include <inet/ip.h> 47 #include <inet/ip6.h> 48 49 #define MAX_READ_DELEGATIONS 5 50 51 krwlock_t rfs4_deleg_policy_lock; 52 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; 53 static int rfs4_deleg_wlp = 5; 54 kmutex_t rfs4_deleg_lock; 55 static int rfs4_deleg_disabled; 56 57 #ifdef DEBUG 58 59 static int rfs4_test_cbgetattr_fail = 0; 60 int rfs4_cb_null; 61 int rfs4_cb_debug; 62 int rfs4_deleg_debug; 63 64 #endif 65 66 static void rfs4_recall_file(rfs4_file_t *, 67 void (*recall)(rfs4_deleg_state_t *, bool_t), 68 bool_t, rfs4_client_t *); 69 static void rfs4_revoke_deleg(rfs4_deleg_state_t *); 70 static void rfs4_revoke_file(rfs4_file_t *); 71 static void rfs4_cb_chflush(rfs4_cbinfo_t *); 72 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *); 73 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t); 74 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *, 75 open_delegation_type4, int *); 76 77 /* 78 * Convert a universal address to an transport specific 79 * address using inet_pton. 80 */ 81 static int 82 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) 83 { 84 int dots = 0, i, j, len, k; 85 unsigned char c; 86 in_port_t port = 0; 87 88 len = strlen(ua); 89 90 for (i = len-1; i >= 0; i--) { 91 92 if (ua[i] == '.') 93 dots++; 94 95 if (dots == 2) { 96 97 ua[i] = '\0'; 98 /* 99 * We use k to remember were to stick '.' back, since 100 * ua was kmem_allocateded from the pool len+1. 101 */ 102 k = i; 103 if (inet_pton(af, ua, ap) == 1) { 104 105 c = 0; 106 107 for (j = i+1; j < len; j++) { 108 if (ua[j] == '.') { 109 port = c << 8; 110 c = 0; 111 } else if (ua[j] >= '0' && 112 ua[j] <= '9') { 113 c *= 10; 114 c += ua[j] - '0'; 115 } else { 116 ua[k] = '.'; 117 return (EINVAL); 118 } 119 } 120 port += c; 121 122 123 /* reset to network order */ 124 if (af == AF_INET) { 125 *(uint32_t *)ap = 126 htonl(*(uint32_t *)ap); 127 *pp = htons(port); 128 } else { 129 int ix; 130 uint16_t *sap; 131 132 for (sap = ap, ix = 0; ix < 133 sizeof (struct in6_addr) / 134 sizeof (uint16_t); ix++) 135 sap[ix] = htons(sap[ix]); 136 137 *pp = htons(port); 138 } 139 140 ua[k] = '.'; 141 return (0); 142 } else { 143 ua[k] = '.'; 144 return (EINVAL); 145 } 146 } 147 } 148 149 return (EINVAL); 150 } 151 152 /* 153 * Update the delegation policy with the 154 * value of "new_policy" 155 */ 156 void 157 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) 158 { 159 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); 160 rfs4_deleg_policy = new_policy; 161 rw_exit(&rfs4_deleg_policy_lock); 162 } 163 164 void 165 rfs4_hold_deleg_policy(void) 166 { 167 rw_enter(&rfs4_deleg_policy_lock, RW_READER); 168 } 169 170 void 171 rfs4_rele_deleg_policy(void) 172 { 173 rw_exit(&rfs4_deleg_policy_lock); 174 } 175 176 177 /* 178 * This free function is to be used when the client struct is being 179 * released and nothing at all is needed of the callback info any 180 * longer. 181 */ 182 void 183 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp) 184 { 185 char *addr = cbp->cb_callback.cb_location.r_addr; 186 char *netid = cbp->cb_callback.cb_location.r_netid; 187 188 /* Free old address if any */ 189 190 if (addr) 191 kmem_free(addr, strlen(addr) + 1); 192 if (netid) 193 kmem_free(netid, strlen(netid) + 1); 194 195 addr = cbp->cb_newer.cb_callback.cb_location.r_addr; 196 netid = cbp->cb_newer.cb_callback.cb_location.r_netid; 197 198 if (addr) 199 kmem_free(addr, strlen(addr) + 1); 200 if (netid) 201 kmem_free(netid, strlen(netid) + 1); 202 203 if (cbp->cb_chc_free) { 204 rfs4_cb_chflush(cbp); 205 } 206 } 207 208 /* 209 * The server uses this to check the callback path supplied by the 210 * client. The callback connection is marked "in progress" while this 211 * work is going on and then eventually marked either OK or FAILED. 212 * This work can be done as part of a separate thread and at the end 213 * of this the thread will exit or it may be done such that the caller 214 * will continue with other work. 215 */ 216 static void 217 rfs4_do_cb_null(rfs4_client_t *cp) 218 { 219 struct timeval tv; 220 CLIENT *ch; 221 rfs4_cbstate_t newstate; 222 rfs4_cbinfo_t *cbp = &cp->cbinfo; 223 224 mutex_enter(cbp->cb_lock); 225 /* If another thread is doing CB_NULL RPC then return */ 226 if (cbp->cb_nullcaller == TRUE) { 227 mutex_exit(cbp->cb_lock); 228 rfs4_client_rele(cp); 229 return; 230 } 231 232 /* Mark the cbinfo as having a thread in the NULL callback */ 233 cbp->cb_nullcaller = TRUE; 234 235 /* 236 * Are there other threads still using the cbinfo client 237 * handles? If so, this thread must wait before going and 238 * mucking aroiund with the callback information 239 */ 240 while (cbp->cb_refcnt != 0) 241 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock); 242 243 /* 244 * This thread itself may find that new callback info has 245 * arrived and is set up to handle this case and redrive the 246 * call to the client's callback server. 247 */ 248 retry: 249 if (cbp->cb_newer.cb_new == TRUE && 250 cbp->cb_newer.cb_confirmed == TRUE) { 251 char *addr = cbp->cb_callback.cb_location.r_addr; 252 char *netid = cbp->cb_callback.cb_location.r_netid; 253 254 /* 255 * Free the old stuff if it exists; may be the first 256 * time through this path 257 */ 258 if (addr) 259 kmem_free(addr, strlen(addr) + 1); 260 if (netid) 261 kmem_free(netid, strlen(netid) + 1); 262 263 /* Move over the addr/netid */ 264 cbp->cb_callback.cb_location.r_addr = 265 cbp->cb_newer.cb_callback.cb_location.r_addr; 266 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL; 267 cbp->cb_callback.cb_location.r_netid = 268 cbp->cb_newer.cb_callback.cb_location.r_netid; 269 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL; 270 271 /* Get the program number */ 272 cbp->cb_callback.cb_program = 273 cbp->cb_newer.cb_callback.cb_program; 274 cbp->cb_newer.cb_callback.cb_program = 0; 275 276 /* Don't forget the protocol's "cb_ident" field */ 277 cbp->cb_ident = cbp->cb_newer.cb_ident; 278 cbp->cb_newer.cb_ident = 0; 279 280 /* no longer new */ 281 cbp->cb_newer.cb_new = FALSE; 282 cbp->cb_newer.cb_confirmed = FALSE; 283 284 /* get rid of the old client handles that may exist */ 285 rfs4_cb_chflush(cbp); 286 287 cbp->cb_state = CB_NONE; 288 cbp->cb_timefailed = 0; /* reset the clock */ 289 cbp->cb_notified_of_cb_path_down = TRUE; 290 } 291 292 if (cbp->cb_state != CB_NONE) { 293 cv_broadcast(cbp->cb_cv); /* let the others know */ 294 cbp->cb_nullcaller = FALSE; 295 mutex_exit(cbp->cb_lock); 296 rfs4_client_rele(cp); 297 return; 298 } 299 300 /* mark rfs4_client_t as CALLBACK NULL in progress */ 301 cbp->cb_state = CB_INPROG; 302 mutex_exit(cbp->cb_lock); 303 304 /* get/generate a client handle */ 305 if ((ch = rfs4_cb_getch(cbp)) == NULL) { 306 mutex_enter(cbp->cb_lock); 307 cbp->cb_state = CB_BAD; 308 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 309 goto retry; 310 } 311 312 313 tv.tv_sec = 30; 314 tv.tv_usec = 0; 315 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) { 316 newstate = CB_BAD; 317 } else { 318 newstate = CB_OK; 319 #ifdef DEBUG 320 rfs4_cb_null++; 321 #endif 322 } 323 324 /* Check to see if the client has specified new callback info */ 325 mutex_enter(cbp->cb_lock); 326 rfs4_cb_freech(cbp, ch, TRUE); 327 if (cbp->cb_newer.cb_new == TRUE && 328 cbp->cb_newer.cb_confirmed == TRUE) { 329 goto retry; /* give the CB_NULL another chance */ 330 } 331 332 cbp->cb_state = newstate; 333 if (cbp->cb_state == CB_BAD) 334 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 335 336 cv_broadcast(cbp->cb_cv); /* start up the other threads */ 337 cbp->cb_nullcaller = FALSE; 338 mutex_exit(cbp->cb_lock); 339 340 rfs4_client_rele(cp); 341 } 342 343 /* 344 * Given a client struct, inspect the callback info to see if the 345 * callback path is up and available. If it is being initialized, 346 * then wait for the CB_NULL RPC call to occur. 347 */ 348 static rfs4_cbinfo_t * 349 rfs4_cbinfo_hold(rfs4_client_t *cp) 350 { 351 rfs4_cbinfo_t *cbp = &cp->cbinfo; 352 353 retry: 354 mutex_enter(cbp->cb_lock); 355 356 if (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) { 357 /* 358 * Looks like a new callback path may be available and 359 * noone has set it up. 360 */ 361 mutex_exit(cbp->cb_lock); 362 rfs4_dbe_hold(cp->dbe); 363 rfs4_do_cb_null(cp); /* caller will release client hold */ 364 goto retry; 365 } 366 367 /* Is there a thread working on doing the CB_NULL RPC? */ 368 if (cbp->cb_nullcaller == TRUE) 369 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */ 370 371 /* If the callback path is not okay (up and running), just quit */ 372 if (cbp->cb_state != CB_OK) { 373 mutex_exit(cbp->cb_lock); 374 return (NULL); 375 } 376 377 /* Let someone know we are using the current callback info */ 378 cbp->cb_refcnt++; 379 mutex_exit(cbp->cb_lock); 380 return (cbp); 381 } 382 383 /* 384 * The caller is done with the callback info. It may be that the 385 * caller's RPC failed and the NFSv4 client has actually provided new 386 * callback information. If so, let the caller know so they can 387 * advantage of this and maybe retry the RPC that originally failed. 388 */ 389 static int 390 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate) 391 { 392 int cb_new = FALSE; 393 394 mutex_enter(cbp->cb_lock); 395 396 /* The caller gets a chance to mark the callback info as bad */ 397 if (newstate != CB_NOCHANGE) 398 cbp->cb_state = newstate; 399 if (newstate == CB_FAILED) { 400 cbp->cb_timefailed = gethrestime_sec(); /* observability */ 401 cbp->cb_notified_of_cb_path_down = FALSE; 402 } 403 404 cbp->cb_refcnt--; /* no longer using the information */ 405 406 /* 407 * A thread may be waiting on this one to finish and if so, 408 * let it know that it is okay to do the CB_NULL to the 409 * client's callback server. 410 */ 411 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller) 412 cv_broadcast(cbp->cb_cv_nullcaller); 413 414 /* 415 * If this is the last thread to use the callback info and 416 * there is new callback information to try and no thread is 417 * there ready to do the CB_NULL, then return true to teh 418 * caller so they can do the CB_NULL 419 */ 420 if (cbp->cb_refcnt == 0 && 421 cbp->cb_nullcaller == FALSE && 422 cbp->cb_newer.cb_new == TRUE && 423 cbp->cb_newer.cb_confirmed == TRUE) 424 cb_new = TRUE; 425 426 mutex_exit(cbp->cb_lock); 427 428 return (cb_new); 429 } 430 431 /* 432 * Given the information in the callback info struct, create a client 433 * handle that can be used by the server for its callback path. 434 */ 435 static CLIENT * 436 rfs4_cbch_init(rfs4_cbinfo_t *cbp) 437 { 438 struct knetconfig knc; 439 vnode_t *vp; 440 struct sockaddr_in addr4; 441 struct sockaddr_in6 addr6; 442 void *addr, *taddr; 443 in_port_t *pp; 444 int af; 445 char *devnam; 446 struct netbuf nb; 447 int size; 448 CLIENT *ch = NULL; 449 int useresvport = 0; 450 451 mutex_enter(cbp->cb_lock); 452 453 if (cbp->cb_callback.cb_location.r_netid == NULL || 454 cbp->cb_callback.cb_location.r_addr == NULL) { 455 goto cb_init_out; 456 } 457 458 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) { 459 knc.knc_semantics = NC_TPI_COTS; 460 knc.knc_protofmly = "inet"; 461 knc.knc_proto = "tcp"; 462 devnam = "/dev/tcp"; 463 af = AF_INET; 464 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp") 465 == 0) { 466 knc.knc_semantics = NC_TPI_CLTS; 467 knc.knc_protofmly = "inet"; 468 knc.knc_proto = "udp"; 469 devnam = "/dev/udp"; 470 af = AF_INET; 471 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6") 472 == 0) { 473 knc.knc_semantics = NC_TPI_COTS; 474 knc.knc_protofmly = "inet6"; 475 knc.knc_proto = "tcp"; 476 devnam = "/dev/tcp6"; 477 af = AF_INET6; 478 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6") 479 == 0) { 480 knc.knc_semantics = NC_TPI_CLTS; 481 knc.knc_protofmly = "inet6"; 482 knc.knc_proto = "udp"; 483 devnam = "/dev/udp6"; 484 af = AF_INET6; 485 } else { 486 goto cb_init_out; 487 } 488 489 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) { 490 491 goto cb_init_out; 492 } 493 494 if (vp->v_type != VCHR) { 495 VN_RELE(vp); 496 goto cb_init_out; 497 } 498 499 knc.knc_rdev = vp->v_rdev; 500 501 VN_RELE(vp); 502 503 if (af == AF_INET) { 504 size = sizeof (addr4); 505 bzero(&addr4, size); 506 addr4.sin_family = (sa_family_t)af; 507 addr = &addr4.sin_addr; 508 pp = &addr4.sin_port; 509 taddr = &addr4; 510 } else /* AF_INET6 */ { 511 size = sizeof (addr6); 512 bzero(&addr6, size); 513 addr6.sin6_family = (sa_family_t)af; 514 addr = &addr6.sin6_addr; 515 pp = &addr6.sin6_port; 516 taddr = &addr6; 517 } 518 519 if (uaddr2sockaddr(af, 520 cbp->cb_callback.cb_location.r_addr, addr, pp)) { 521 522 goto cb_init_out; 523 } 524 525 526 nb.maxlen = nb.len = size; 527 nb.buf = (char *)taddr; 528 529 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program, 530 NFS_CB, 0, 0, curthread->t_cred, &ch)) { 531 532 ch = NULL; 533 } 534 535 /* turn off reserved port usage */ 536 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport); 537 538 cb_init_out: 539 mutex_exit(cbp->cb_lock); 540 return (ch); 541 } 542 543 /* 544 * Iterate over the client handle cache and 545 * destroy it. 546 */ 547 static void 548 rfs4_cb_chflush(rfs4_cbinfo_t *cbp) 549 { 550 CLIENT *ch; 551 552 while (cbp->cb_chc_free) { 553 cbp->cb_chc_free--; 554 ch = cbp->cb_chc[cbp->cb_chc_free]; 555 cbp->cb_chc[cbp->cb_chc_free] = NULL; 556 if (ch) { 557 if (ch->cl_auth) 558 auth_destroy(ch->cl_auth); 559 clnt_destroy(ch); 560 } 561 } 562 } 563 564 /* 565 * Return a client handle, either from a the small 566 * rfs4_client_t cache or one that we just created. 567 */ 568 static CLIENT * 569 rfs4_cb_getch(rfs4_cbinfo_t *cbp) 570 { 571 CLIENT *cbch = NULL; 572 uint32_t zilch = 0; 573 574 mutex_enter(cbp->cb_lock); 575 576 if (cbp->cb_chc_free) { 577 cbp->cb_chc_free--; 578 cbch = cbp->cb_chc[ cbp->cb_chc_free ]; 579 mutex_exit(cbp->cb_lock); 580 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch); 581 return (cbch); 582 } 583 584 mutex_exit(cbp->cb_lock); 585 586 /* none free so make it now */ 587 cbch = rfs4_cbch_init(cbp); 588 589 return (cbch); 590 } 591 592 /* 593 * Return the client handle to the small cache or 594 * destroy it. 595 */ 596 static void 597 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld) 598 { 599 if (lockheld == FALSE) 600 mutex_enter(cbp->cb_lock); 601 602 if (cbp->cb_chc_free < RFS4_CBCH_MAX) { 603 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch; 604 if (lockheld == FALSE) 605 mutex_exit(cbp->cb_lock); 606 return; 607 } 608 if (lockheld == FALSE) 609 mutex_exit(cbp->cb_lock); 610 611 /* 612 * cache maxed out of free entries, obliterate 613 * this client handle, destroy it, throw it away. 614 */ 615 if (ch->cl_auth) 616 auth_destroy(ch->cl_auth); 617 clnt_destroy(ch); 618 } 619 620 /* 621 * With the supplied callback information - initialize the client 622 * callback data. If there is a callback in progress, save the 623 * callback info so that a thread can pick it up in the future. 624 */ 625 void 626 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident) 627 { 628 char *addr = NULL; 629 char *netid = NULL; 630 rfs4_cbinfo_t *cbp = &cp->cbinfo; 631 size_t len; 632 633 /* Set the call back for the client */ 634 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' && 635 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') { 636 len = strlen(cb->cb_location.r_addr) + 1; 637 addr = kmem_alloc(len, KM_SLEEP); 638 bcopy(cb->cb_location.r_addr, addr, len); 639 len = strlen(cb->cb_location.r_netid) + 1; 640 netid = kmem_alloc(len, KM_SLEEP); 641 bcopy(cb->cb_location.r_netid, netid, len); 642 } 643 /* ready to save the new information but first free old, if exists */ 644 mutex_enter(cbp->cb_lock); 645 646 cbp->cb_newer.cb_callback.cb_program = cb->cb_program; 647 648 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL) 649 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr, 650 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1); 651 cbp->cb_newer.cb_callback.cb_location.r_addr = addr; 652 653 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL) 654 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid, 655 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1); 656 cbp->cb_newer.cb_callback.cb_location.r_netid = netid; 657 658 cbp->cb_newer.cb_ident = cb_ident; 659 660 if (addr && *addr && netid && *netid) { 661 cbp->cb_newer.cb_new = TRUE; 662 cbp->cb_newer.cb_confirmed = FALSE; 663 } else { 664 cbp->cb_newer.cb_new = FALSE; 665 cbp->cb_newer.cb_confirmed = FALSE; 666 } 667 668 mutex_exit(cbp->cb_lock); 669 } 670 671 /* 672 * The server uses this when processing SETCLIENTID_CONFIRM. Callback 673 * information may have been provided on SETCLIENTID and this call 674 * marks that information as confirmed and then starts a thread to 675 * test the callback path. 676 */ 677 void 678 rfs4_deleg_cb_check(rfs4_client_t *cp) 679 { 680 if (cp->cbinfo.cb_newer.cb_new == FALSE) 681 return; 682 683 cp->cbinfo.cb_newer.cb_confirmed = TRUE; 684 685 rfs4_dbe_hold(cp->dbe); /* hold the client struct for thread */ 686 687 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, 688 minclsyspri); 689 } 690 691 static void 692 rfs4args_cb_recall_free(nfs_cb_argop4 *argop) 693 { 694 CB_RECALL4args *rec_argp; 695 696 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 697 if (rec_argp->fh.nfs_fh4_val) 698 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len); 699 } 700 701 /* ARGSUSED */ 702 static void 703 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop) 704 { 705 CB_GETATTR4args *argp; 706 707 argp = &argop->nfs_cb_argop4_u.opcbgetattr; 708 if (argp->fh.nfs_fh4_val) 709 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len); 710 } 711 712 static void 713 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp) 714 { 715 int i, arglen; 716 nfs_cb_argop4 *argop; 717 718 /* 719 * First free any special args alloc'd for specific ops. 720 */ 721 arglen = args->array_len; 722 argop = args->array; 723 for (i = 0; i < arglen; i++, argop++) { 724 725 switch (argop->argop) { 726 case OP_CB_RECALL: 727 rfs4args_cb_recall_free(argop); 728 break; 729 730 case OP_CB_GETATTR: 731 rfs4args_cb_getattr_free(argop); 732 break; 733 734 default: 735 return; 736 } 737 } 738 739 if (args->tag.utf8string_len > 0) 740 UTF8STRING_FREE(args->tag) 741 742 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4)); 743 if (resp) 744 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp); 745 } 746 747 /* 748 * General callback routine for the server to the client. 749 */ 750 static enum clnt_stat 751 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args, 752 CB_COMPOUND4res *res, struct timeval timeout) 753 { 754 rfs4_cbinfo_t *cbp; 755 CLIENT *ch; 756 /* start with this in case cb_getch() fails */ 757 enum clnt_stat stat = RPC_FAILED; 758 759 res->tag.utf8string_val = NULL; 760 res->array = NULL; 761 762 retry: 763 cbp = rfs4_cbinfo_hold(cp); 764 if (cbp == NULL) 765 return (stat); 766 767 /* get a client handle */ 768 if ((ch = rfs4_cb_getch(cbp)) != NULL) { 769 /* 770 * reset the cb_ident since it may have changed in 771 * rfs4_cbinfo_hold() 772 */ 773 args->callback_ident = cbp->cb_ident; 774 775 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv, 776 (caddr_t)args, xdr_CB_COMPOUND4res, 777 (caddr_t)res, timeout); 778 779 /* free client handle */ 780 rfs4_cb_freech(cbp, ch, FALSE); 781 } 782 783 /* 784 * If the rele says that there may be new callback info then 785 * retry this sequence and it may succeed as a result of the 786 * new callback path 787 */ 788 if (rfs4_cbinfo_rele(cbp, 789 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE) 790 goto retry; 791 792 return (stat); 793 } 794 795 /* 796 * Used by the NFSv4 server to get attributes for a file while 797 * handling the case where a file has been write delegated. For the 798 * time being, VOP_GETATTR() is called and CB_GETATTR processing is 799 * not undertaken. This call site is maintained in case the server is 800 * updated in the future to handle write delegation space guarantees. 801 */ 802 nfsstat4 803 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 804 { 805 uint_t mask; 806 int error; 807 808 mask = vap->va_mask; 809 error = VOP_GETATTR(vp, vap, flag, cr, NULL); 810 /* 811 * Some file systems clobber va_mask. it is probably wrong of 812 * them to do so, nonethless we practice defensive coding. 813 * See bug id 4276830. 814 */ 815 vap->va_mask = mask; 816 return (puterrno4(error)); 817 } 818 819 /* 820 * This is used everywhere in the v2/v3 server to allow the 821 * integration of all NFS versions and the support of delegation. For 822 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced 823 * in the future to provide space guarantees for write delegations 824 * then this call site should be expanded to interact with the client. 825 */ 826 int 827 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr) 828 { 829 return (VOP_GETATTR(vp, vap, flag, cr, NULL)); 830 } 831 832 /* 833 * Place the actual cb_recall otw call to client. 834 */ 835 static void 836 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc) 837 { 838 CB_COMPOUND4args cb4_args; 839 CB_COMPOUND4res cb4_res; 840 CB_RECALL4args *rec_argp; 841 CB_RECALL4res *rec_resp; 842 nfs_cb_argop4 *argop; 843 int numops; 844 int argoplist_size; 845 struct timeval timeout; 846 nfs_fh4 *fhp; 847 enum clnt_stat call_stat; 848 849 /* 850 * set up the compound args 851 */ 852 numops = 1; /* CB_RECALL only */ 853 854 argoplist_size = numops * sizeof (nfs_cb_argop4); 855 argop = kmem_zalloc(argoplist_size, KM_SLEEP); 856 argop->argop = OP_CB_RECALL; 857 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall; 858 859 (void) str_to_utf8("cb_recall", &cb4_args.tag); 860 cb4_args.minorversion = CB4_MINORVERSION; 861 /* cb4_args.callback_ident is set in rfs4_do_callback() */ 862 cb4_args.array_len = numops; 863 cb4_args.array = argop; 864 865 /* 866 * fill in the args struct 867 */ 868 bcopy(&dsp->delegid.stateid, &rec_argp->stateid, sizeof (stateid4)); 869 rec_argp->truncate = trunc; 870 871 fhp = &dsp->finfo->filehandle; 872 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) * 873 fhp->nfs_fh4_len, KM_SLEEP); 874 nfs_fh4_copy(fhp, &rec_argp->fh); 875 876 /* Keep track of when we did this for observability */ 877 dsp->time_recalled = gethrestime_sec(); 878 879 /* 880 * Set up the timeout for the callback and make the actual call. 881 * Timeout will be 80% of the lease period for this server. 882 */ 883 timeout.tv_sec = (rfs4_lease_time * 80) / 100; 884 timeout.tv_usec = 0; 885 886 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->client, 887 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp); 888 889 call_stat = rfs4_do_callback(dsp->client, &cb4_args, &cb4_res, timeout); 890 891 rec_resp = (cb4_res.array_len == 0) ? NULL : 892 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall; 893 894 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->client, 895 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp); 896 897 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) { 898 rfs4_revoke_deleg(dsp); 899 } 900 901 rfs4freeargres(&cb4_args, &cb4_res); 902 } 903 904 struct recall_arg { 905 rfs4_deleg_state_t *dsp; 906 void (*recall)(rfs4_deleg_state_t *, bool_t trunc); 907 bool_t trunc; 908 }; 909 910 static void 911 do_recall(struct recall_arg *arg) 912 { 913 rfs4_deleg_state_t *dsp = arg->dsp; 914 rfs4_file_t *fp = dsp->finfo; 915 callb_cpr_t cpr_info; 916 kmutex_t cpr_lock; 917 918 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 919 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall"); 920 921 /* 922 * It is possible that before this thread starts 923 * the client has send us a return_delegation, and 924 * if that is the case we do not need to send the 925 * recall callback. 926 */ 927 if (dsp->dtype != OPEN_DELEGATE_NONE) { 928 DTRACE_PROBE3(nfss__i__recall, 929 struct recall_arg *, arg, 930 struct rfs4_deleg_state_t *, dsp, 931 struct rfs4_file_t *, fp); 932 933 if (arg->recall) 934 (void) (*arg->recall)(dsp, arg->trunc); 935 } 936 937 mutex_enter(fp->dinfo->recall_lock); 938 /* 939 * Recall count may go negative if the parent thread that is 940 * creating the individual callback threads does not modify 941 * the recall_count field before the callback thread actually 942 * gets a response from the CB_RECALL 943 */ 944 fp->dinfo->recall_count--; 945 if (fp->dinfo->recall_count == 0) 946 cv_signal(fp->dinfo->recall_cv); 947 mutex_exit(fp->dinfo->recall_lock); 948 949 mutex_enter(&cpr_lock); 950 CALLB_CPR_EXIT(&cpr_info); 951 mutex_destroy(&cpr_lock); 952 953 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ 954 955 kmem_free(arg, sizeof (struct recall_arg)); 956 } 957 958 struct master_recall_args { 959 rfs4_file_t *fp; 960 void (*recall)(rfs4_deleg_state_t *, bool_t); 961 bool_t trunc; 962 }; 963 964 static void 965 do_recall_file(struct master_recall_args *map) 966 { 967 rfs4_file_t *fp = map->fp; 968 rfs4_deleg_state_t *dsp; 969 struct recall_arg *arg; 970 callb_cpr_t cpr_info; 971 kmutex_t cpr_lock; 972 int32_t recall_count; 973 974 rfs4_dbe_lock(fp->dbe); 975 976 /* Recall already in progress ? */ 977 mutex_enter(fp->dinfo->recall_lock); 978 if (fp->dinfo->recall_count != 0) { 979 mutex_exit(fp->dinfo->recall_lock); 980 rfs4_dbe_rele_nolock(fp->dbe); 981 rfs4_dbe_unlock(fp->dbe); 982 kmem_free(map, sizeof (struct master_recall_args)); 983 return; 984 } 985 986 mutex_exit(fp->dinfo->recall_lock); 987 988 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 989 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile"); 990 991 recall_count = 0; 992 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 993 dsp = dsp->delegationlist.next->dsp) { 994 995 rfs4_dbe_lock(dsp->dbe); 996 /* 997 * if this delegation state 998 * is being reaped skip it 999 */ 1000 if (rfs4_dbe_is_invalid(dsp->dbe)) { 1001 rfs4_dbe_unlock(dsp->dbe); 1002 continue; 1003 } 1004 1005 /* hold for receiving thread */ 1006 rfs4_dbe_hold(dsp->dbe); 1007 rfs4_dbe_unlock(dsp->dbe); 1008 1009 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP); 1010 arg->recall = map->recall; 1011 arg->trunc = map->trunc; 1012 arg->dsp = dsp; 1013 1014 recall_count++; 1015 1016 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, 1017 minclsyspri); 1018 } 1019 1020 rfs4_dbe_unlock(fp->dbe); 1021 1022 mutex_enter(fp->dinfo->recall_lock); 1023 /* 1024 * Recall count may go negative if the parent thread that is 1025 * creating the individual callback threads does not modify 1026 * the recall_count field before the callback thread actually 1027 * gets a response from the CB_RECALL 1028 */ 1029 fp->dinfo->recall_count += recall_count; 1030 while (fp->dinfo->recall_count) 1031 cv_wait(fp->dinfo->recall_cv, fp->dinfo->recall_lock); 1032 1033 mutex_exit(fp->dinfo->recall_lock); 1034 1035 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp); 1036 rfs4_file_rele(fp); 1037 kmem_free(map, sizeof (struct master_recall_args)); 1038 mutex_enter(&cpr_lock); 1039 CALLB_CPR_EXIT(&cpr_info); 1040 mutex_destroy(&cpr_lock); 1041 } 1042 1043 static void 1044 rfs4_recall_file(rfs4_file_t *fp, 1045 void (*recall)(rfs4_deleg_state_t *, bool_t trunc), 1046 bool_t trunc, rfs4_client_t *cp) 1047 { 1048 struct master_recall_args *args; 1049 1050 rfs4_dbe_lock(fp->dbe); 1051 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1052 rfs4_dbe_unlock(fp->dbe); 1053 return; 1054 } 1055 rfs4_dbe_hold(fp->dbe); /* hold for new thread */ 1056 1057 /* 1058 * Mark the time we started the recall processing. 1059 * If it has been previously recalled, do not reset the 1060 * timer since this is used for the revocation decision. 1061 */ 1062 if (fp->dinfo->time_recalled == 0) 1063 fp->dinfo->time_recalled = gethrestime_sec(); 1064 fp->dinfo->ever_recalled = TRUE; /* used for policy decision */ 1065 /* Client causing recall not always available */ 1066 if (cp) 1067 fp->dinfo->conflicted_client = cp->clientid; 1068 1069 rfs4_dbe_unlock(fp->dbe); 1070 1071 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP); 1072 args->fp = fp; 1073 args->recall = recall; 1074 args->trunc = trunc; 1075 1076 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, 1077 minclsyspri); 1078 } 1079 1080 void 1081 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp) 1082 { 1083 time_t elapsed1, elapsed2; 1084 1085 if (fp->dinfo->time_recalled != 0) { 1086 elapsed1 = gethrestime_sec() - fp->dinfo->time_recalled; 1087 elapsed2 = gethrestime_sec() - fp->dinfo->time_lastwrite; 1088 /* First check to see if a revocation should occur */ 1089 if (elapsed1 > rfs4_lease_time && 1090 elapsed2 > rfs4_lease_time) { 1091 rfs4_revoke_file(fp); 1092 return; 1093 } 1094 /* 1095 * Next check to see if a recall should be done again 1096 * so quickly. 1097 */ 1098 if (elapsed1 <= ((rfs4_lease_time * 20) / 100)) 1099 return; 1100 } 1101 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp); 1102 } 1103 1104 /* 1105 * rfs4_check_recall is called from rfs4_do_open to determine if the current 1106 * open conflicts with the delegation. 1107 * Return true if we need recall otherwise false. 1108 * Assumes entry locks for sp and sp->finfo are held. 1109 */ 1110 bool_t 1111 rfs4_check_recall(rfs4_state_t *sp, uint32_t access) 1112 { 1113 open_delegation_type4 dtype = sp->finfo->dinfo->dtype; 1114 1115 switch (dtype) { 1116 case OPEN_DELEGATE_NONE: 1117 /* Not currently delegated so there is nothing to do */ 1118 return (FALSE); 1119 case OPEN_DELEGATE_READ: 1120 /* 1121 * If the access is only asking for READ then there is 1122 * no conflict and nothing to do. If it is asking 1123 * for write, then there will be conflict and the read 1124 * delegation should be recalled. 1125 */ 1126 if (access == OPEN4_SHARE_ACCESS_READ) 1127 return (FALSE); 1128 else 1129 return (TRUE); 1130 case OPEN_DELEGATE_WRITE: 1131 /* Check to see if this client has the delegation */ 1132 return (rfs4_is_deleg(sp)); 1133 } 1134 1135 return (FALSE); 1136 } 1137 1138 /* 1139 * Return the "best" allowable delegation available given the current 1140 * delegation type and the desired access and deny modes on the file. 1141 * At the point that this routine is called we know that the access and 1142 * deny modes are consistent with the file modes. 1143 */ 1144 static open_delegation_type4 1145 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) 1146 { 1147 open_delegation_type4 dtype = fp->dinfo->dtype; 1148 uint32_t access = sp->share_access; 1149 uint32_t deny = sp->share_deny; 1150 int readcnt = 0; 1151 int writecnt = 0; 1152 1153 switch (dtype) { 1154 case OPEN_DELEGATE_NONE: 1155 /* 1156 * Determine if more than just this OPEN have the file 1157 * open and if so, no delegation may be provided to 1158 * the client. 1159 */ 1160 if (access & OPEN4_SHARE_ACCESS_WRITE) 1161 writecnt++; 1162 if (access & OPEN4_SHARE_ACCESS_READ) 1163 readcnt++; 1164 1165 if (fp->access_read > readcnt || fp->access_write > writecnt) 1166 return (OPEN_DELEGATE_NONE); 1167 1168 /* 1169 * If the client is going to write, or if the client 1170 * has exclusive access, return a write delegation. 1171 */ 1172 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1173 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) 1174 return (OPEN_DELEGATE_WRITE); 1175 /* 1176 * If we don't want to write or we've haven't denied read 1177 * access to others, return a read delegation. 1178 */ 1179 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) || 1180 (deny & ~OPEN4_SHARE_DENY_READ)) 1181 return (OPEN_DELEGATE_READ); 1182 1183 /* Shouldn't get here */ 1184 return (OPEN_DELEGATE_NONE); 1185 1186 case OPEN_DELEGATE_READ: 1187 /* 1188 * If the file is delegated for read but we wan't to 1189 * write or deny others to read then we can't delegate 1190 * the file. We shouldn't get here since the delegation should 1191 * have been recalled already. 1192 */ 1193 if ((access & OPEN4_SHARE_ACCESS_WRITE) || 1194 (deny & OPEN4_SHARE_DENY_READ)) 1195 return (OPEN_DELEGATE_NONE); 1196 return (OPEN_DELEGATE_READ); 1197 1198 case OPEN_DELEGATE_WRITE: 1199 return (OPEN_DELEGATE_WRITE); 1200 } 1201 1202 /* Shouldn't get here */ 1203 return (OPEN_DELEGATE_NONE); 1204 } 1205 1206 /* 1207 * Given the desired delegation type and the "history" of the file 1208 * determine the actual delegation type to return. 1209 */ 1210 static open_delegation_type4 1211 rfs4_delegation_policy(open_delegation_type4 dtype, 1212 rfs4_dinfo_t *dinfo, clientid4 cid) 1213 { 1214 time_t elapsed; 1215 1216 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) 1217 return (OPEN_DELEGATE_NONE); 1218 1219 /* 1220 * Has this file/delegation ever been recalled? If not then 1221 * no furhter checks for a delegation race need to be done. 1222 * However if a recall has occurred, then check to see if a 1223 * client has caused its own delegation recall to occur. If 1224 * not, then has a delegation for this file been returned 1225 * recently? If so, then do not assign a new delegation to 1226 * avoid a "delegation race" between the original client and 1227 * the new/conflicting client. 1228 */ 1229 if (dinfo->ever_recalled == TRUE) { 1230 if (dinfo->conflicted_client != cid) { 1231 elapsed = gethrestime_sec() - dinfo->time_returned; 1232 if (elapsed < rfs4_lease_time) 1233 return (OPEN_DELEGATE_NONE); 1234 } 1235 } 1236 1237 /* Limit the number of read grants */ 1238 if (dtype == OPEN_DELEGATE_READ && 1239 dinfo->rdgrants > MAX_READ_DELEGATIONS) 1240 return (OPEN_DELEGATE_NONE); 1241 1242 /* 1243 * Should consider limiting total number of read/write 1244 * delegations the server will permit. 1245 */ 1246 1247 return (dtype); 1248 } 1249 1250 /* 1251 * Try and grant a delegation for an open give the state. The routine 1252 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE. 1253 * 1254 * The state and associate file entry must be locked 1255 */ 1256 rfs4_deleg_state_t * 1257 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) 1258 { 1259 rfs4_file_t *fp = sp->finfo; 1260 open_delegation_type4 dtype; 1261 int no_delegation; 1262 1263 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1264 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1265 1266 /* Is the server even providing delegations? */ 1267 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) 1268 return (NULL); 1269 1270 /* Check to see if delegations have been temporarily disabled */ 1271 mutex_enter(&rfs4_deleg_lock); 1272 no_delegation = rfs4_deleg_disabled; 1273 mutex_exit(&rfs4_deleg_lock); 1274 1275 if (no_delegation) 1276 return (NULL); 1277 1278 /* Don't grant a delegation if a deletion is impending. */ 1279 if (fp->dinfo->hold_grant > 0) { 1280 return (NULL); 1281 } 1282 1283 /* 1284 * Don't grant a delegation if there are any lock manager 1285 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g., 1286 * if there are only read locks we should be able to grant a 1287 * read-only delegation), but it's good enough for now. 1288 * 1289 * MT safety: the lock manager checks for conflicting delegations 1290 * before processing a lock request. That check will block until 1291 * we are done here. So if the lock manager acquires a lock after 1292 * we decide to grant the delegation, the delegation will get 1293 * immediately recalled (if there's a conflict), so we're safe. 1294 */ 1295 if (lm_vp_active(fp->vp)) { 1296 return (NULL); 1297 } 1298 1299 /* 1300 * Based on the type of delegation request passed in, take the 1301 * appropriate action (DELEG_NONE is handled above) 1302 */ 1303 switch (dreq) { 1304 1305 case DELEG_READ: 1306 case DELEG_WRITE: 1307 /* 1308 * The server "must" grant the delegation in this case. 1309 * Client is using open previous 1310 */ 1311 dtype = (open_delegation_type4)dreq; 1312 *recall = 1; 1313 break; 1314 case DELEG_ANY: 1315 /* 1316 * If a valid callback path does not exist, no delegation may 1317 * be granted. 1318 */ 1319 if (sp->owner->client->cbinfo.cb_state != CB_OK) 1320 return (NULL); 1321 1322 /* 1323 * If the original operation which caused time_rm_delayed 1324 * to be set hasn't been retried and completed for one 1325 * full lease period, clear it and allow delegations to 1326 * get granted again. 1327 */ 1328 if (fp->dinfo->time_rm_delayed > 0 && 1329 gethrestime_sec() > 1330 fp->dinfo->time_rm_delayed + rfs4_lease_time) 1331 fp->dinfo->time_rm_delayed = 0; 1332 1333 /* 1334 * If we are waiting for a delegation to be returned then 1335 * don't delegate this file. We do this for correctness as 1336 * well as if the file is being recalled we would likely 1337 * recall this file again. 1338 */ 1339 1340 if (fp->dinfo->time_recalled != 0 || 1341 fp->dinfo->time_rm_delayed != 0) 1342 return (NULL); 1343 1344 /* Get the "best" delegation candidate */ 1345 dtype = rfs4_check_delegation(sp, fp); 1346 1347 if (dtype == OPEN_DELEGATE_NONE) 1348 return (NULL); 1349 1350 /* 1351 * Based on policy and the history of the file get the 1352 * actual delegation. 1353 */ 1354 dtype = rfs4_delegation_policy(dtype, fp->dinfo, 1355 sp->owner->client->clientid); 1356 1357 if (dtype == OPEN_DELEGATE_NONE) 1358 return (NULL); 1359 break; 1360 default: 1361 return (NULL); 1362 } 1363 1364 /* set the delegation for the state */ 1365 return (rfs4_deleg_state(sp, dtype, recall)); 1366 } 1367 1368 void 1369 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp, 1370 nfsace4 *ace, int recall) 1371 { 1372 open_write_delegation4 *wp; 1373 open_read_delegation4 *rp; 1374 nfs_space_limit4 *spl; 1375 nfsace4 nace; 1376 1377 /* 1378 * We need to allocate a new copy of the who string. 1379 * this string will be freed by the rfs4_op_open dis_resfree 1380 * routine. We need to do this allocation since replays will 1381 * be allocated and rfs4_compound can't tell the difference from 1382 * a replay and an inital open. N.B. if an ace is passed in, it 1383 * the caller's responsibility to free it. 1384 */ 1385 1386 if (ace == NULL) { 1387 /* 1388 * Default is to deny all access, the client will have 1389 * to contact the server. XXX Do we want to actually 1390 * set a deny for every one, or do we simply want to 1391 * construct an entity that will match no one? 1392 */ 1393 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE; 1394 nace.flag = 0; 1395 nace.access_mask = ACE4_VALID_MASK_BITS; 1396 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who); 1397 } else { 1398 nace.type = ace->type; 1399 nace.flag = ace->flag; 1400 nace.access_mask = ace->access_mask; 1401 (void) utf8_copy(&ace->who, &nace.who); 1402 } 1403 1404 dp->delegation_type = dsp->dtype; 1405 1406 switch (dsp->dtype) { 1407 case OPEN_DELEGATE_NONE: 1408 break; 1409 case OPEN_DELEGATE_READ: 1410 rp = &dp->open_delegation4_u.read; 1411 rp->stateid = dsp->delegid.stateid; 1412 rp->recall = (bool_t)recall; 1413 rp->permissions = nace; 1414 break; 1415 case OPEN_DELEGATE_WRITE: 1416 wp = &dp->open_delegation4_u.write; 1417 wp->stateid = dsp->delegid.stateid; 1418 wp->recall = (bool_t)recall; 1419 spl = &wp->space_limit; 1420 spl->limitby = NFS_LIMIT_SIZE; 1421 spl->nfs_space_limit4_u.filesize = 0; 1422 wp->permissions = nace; 1423 break; 1424 } 1425 } 1426 1427 /* 1428 * Check if the file is delegated via the provided file struct. 1429 * Return TRUE if it is delegated. This is intended for use by 1430 * the v4 server. The v2/v3 server code should use rfs4_check_delegated(). 1431 * 1432 * Note that if the file is found to have a delegation, it is 1433 * recalled, unless the clientid of the caller matches the clientid of the 1434 * delegation. If the caller has specified, there is a slight delay 1435 * inserted in the hopes that the delegation will be returned quickly. 1436 */ 1437 bool_t 1438 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, 1439 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp) 1440 { 1441 rfs4_deleg_state_t *dsp; 1442 1443 /* Is delegation enabled? */ 1444 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1445 return (FALSE); 1446 1447 /* do we have a delegation on this file? */ 1448 rfs4_dbe_lock(fp->dbe); 1449 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 1450 if (is_rm) 1451 fp->dinfo->hold_grant++; 1452 rfs4_dbe_unlock(fp->dbe); 1453 return (FALSE); 1454 } 1455 /* 1456 * do we have a write delegation on this file or are we 1457 * requesting write access to a file with any type of existing 1458 * delegation? 1459 */ 1460 if (mode == FWRITE || fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 1461 if (cp != NULL) { 1462 dsp = fp->delegationlist.next->dsp; 1463 if (dsp == NULL) { 1464 rfs4_dbe_unlock(fp->dbe); 1465 return (FALSE); 1466 } 1467 /* 1468 * Does the requestor already own the delegation? 1469 */ 1470 if (dsp->client->clientid == *(cp)) { 1471 rfs4_dbe_unlock(fp->dbe); 1472 return (FALSE); 1473 } 1474 } 1475 1476 rfs4_dbe_unlock(fp->dbe); 1477 rfs4_recall_deleg(fp, trunc, NULL); 1478 1479 if (!do_delay) { 1480 rfs4_dbe_lock(fp->dbe); 1481 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1482 rfs4_dbe_unlock(fp->dbe); 1483 return (TRUE); 1484 } 1485 1486 delay(NFS4_DELEGATION_CONFLICT_DELAY); 1487 1488 rfs4_dbe_lock(fp->dbe); 1489 if (fp->dinfo->dtype != OPEN_DELEGATE_NONE) { 1490 fp->dinfo->time_rm_delayed = gethrestime_sec(); 1491 rfs4_dbe_unlock(fp->dbe); 1492 return (TRUE); 1493 } 1494 } 1495 if (is_rm) 1496 fp->dinfo->hold_grant++; 1497 rfs4_dbe_unlock(fp->dbe); 1498 return (FALSE); 1499 } 1500 1501 /* 1502 * Check if the file is delegated in the case of a v2 or v3 access. 1503 * Return TRUE if it is delegated which in turn means that v2 should 1504 * drop the request and in the case of v3 JUKEBOX should be returned. 1505 */ 1506 bool_t 1507 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) 1508 { 1509 rfs4_file_t *fp; 1510 bool_t create = FALSE; 1511 bool_t rc = FALSE; 1512 1513 rfs4_hold_deleg_policy(); 1514 1515 /* Is delegation enabled? */ 1516 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { 1517 fp = rfs4_findfile(vp, NULL, &create); 1518 if (fp != NULL) { 1519 if (rfs4_check_delegated_byfp(mode, fp, trunc, 1520 TRUE, FALSE, NULL)) { 1521 rc = TRUE; 1522 } 1523 rfs4_file_rele(fp); 1524 } 1525 } 1526 rfs4_rele_deleg_policy(); 1527 return (rc); 1528 } 1529 1530 /* 1531 * Release a hold on the hold_grant counter which 1532 * prevents delegation from being granted while a remove 1533 * or a rename is in progress. 1534 */ 1535 void 1536 rfs4_clear_dont_grant(rfs4_file_t *fp) 1537 { 1538 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) 1539 return; 1540 rfs4_dbe_lock(fp->dbe); 1541 ASSERT(fp->dinfo->hold_grant > 0); 1542 fp->dinfo->hold_grant--; 1543 fp->dinfo->time_rm_delayed = 0; 1544 rfs4_dbe_unlock(fp->dbe); 1545 } 1546 1547 /* 1548 * State support for delegation. 1549 * Set the state delegation type for this state; 1550 * This routine is called from open via rfs4_grant_delegation and the entry 1551 * locks on sp and sp->finfo are assumed. 1552 */ 1553 static rfs4_deleg_state_t * 1554 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall) 1555 { 1556 rfs4_file_t *fp = sp->finfo; 1557 bool_t create = TRUE; 1558 rfs4_deleg_state_t *dsp; 1559 vnode_t *vp; 1560 int open_prev = *recall; 1561 int ret; 1562 int fflags = 0; 1563 1564 ASSERT(rfs4_dbe_islocked(sp->dbe)); 1565 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1566 1567 /* Shouldn't happen */ 1568 if (fp->dinfo->recall_count != 0 || 1569 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1570 dtype != OPEN_DELEGATE_READ)) { 1571 return (NULL); 1572 } 1573 1574 /* Unlock to avoid deadlock */ 1575 rfs4_dbe_unlock(fp->dbe); 1576 rfs4_dbe_unlock(sp->dbe); 1577 1578 dsp = rfs4_finddeleg(sp, &create); 1579 1580 rfs4_dbe_lock(sp->dbe); 1581 rfs4_dbe_lock(fp->dbe); 1582 1583 if (dsp == NULL) 1584 return (NULL); 1585 1586 /* 1587 * It is possible that since we dropped the lock 1588 * in order to call finddeleg, the rfs4_file_t 1589 * was marked such that we should not grant a 1590 * delegation, if so bail out. 1591 */ 1592 if (fp->dinfo->hold_grant > 0) { 1593 rfs4_deleg_state_rele(dsp); 1594 return (NULL); 1595 } 1596 1597 if (create == FALSE) { 1598 if (sp->owner->client == dsp->client && 1599 dsp->dtype == dtype) { 1600 return (dsp); 1601 } else { 1602 rfs4_deleg_state_rele(dsp); 1603 return (NULL); 1604 } 1605 } 1606 1607 /* 1608 * Check that this file has not been delegated to another 1609 * client 1610 */ 1611 if (fp->dinfo->recall_count != 0 || 1612 fp->dinfo->dtype == OPEN_DELEGATE_WRITE || 1613 (fp->dinfo->dtype == OPEN_DELEGATE_READ && 1614 dtype != OPEN_DELEGATE_READ)) { 1615 rfs4_deleg_state_rele(dsp); 1616 return (NULL); 1617 } 1618 1619 vp = fp->vp; 1620 /* vnevent_support returns 0 if file system supports vnevents */ 1621 if (vnevent_support(vp, NULL)) { 1622 rfs4_deleg_state_rele(dsp); 1623 return (NULL); 1624 } 1625 1626 /* Calculate the fflags for this OPEN. */ 1627 if (sp->share_access & OPEN4_SHARE_ACCESS_READ) 1628 fflags |= FREAD; 1629 if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) 1630 fflags |= FWRITE; 1631 1632 *recall = 0; 1633 /* 1634 * Before granting a delegation we need to know if anyone else has 1635 * opened the file in a conflicting mode. However, first we need to 1636 * know how we opened the file to check the counts properly. 1637 */ 1638 if (dtype == OPEN_DELEGATE_READ) { 1639 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1640 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1641 vn_is_mapped(vp, V_WRITE)) { 1642 if (open_prev) { 1643 *recall = 1; 1644 } else { 1645 rfs4_deleg_state_rele(dsp); 1646 return (NULL); 1647 } 1648 } 1649 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ, 1650 rfs4_mon_hold, rfs4_mon_rele); 1651 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1652 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1653 vn_is_mapped(vp, V_WRITE)) { 1654 if (open_prev) { 1655 *recall = 1; 1656 } else { 1657 (void) fem_uninstall(vp, deleg_rdops, 1658 (void *)fp); 1659 rfs4_deleg_state_rele(dsp); 1660 return (NULL); 1661 } 1662 } 1663 /* 1664 * Because a client can hold onto a delegation after the 1665 * file has been closed, we need to keep track of the 1666 * access to this file. Otherwise the CIFS server would 1667 * not know about the client accessing the file and could 1668 * inappropriately grant an OPLOCK. 1669 * fem_install() returns EBUSY when asked to install a 1670 * OPUNIQ monitor more than once. Therefore, check the 1671 * return code because we only want this done once. 1672 */ 1673 if (ret == 0) 1674 vn_open_upgrade(vp, FREAD); 1675 } else { /* WRITE */ 1676 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1677 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1678 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1679 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1680 vn_is_mapped(vp, V_RDORWR)) { 1681 if (open_prev) { 1682 *recall = 1; 1683 } else { 1684 rfs4_deleg_state_rele(dsp); 1685 return (NULL); 1686 } 1687 } 1688 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ, 1689 rfs4_mon_hold, rfs4_mon_rele); 1690 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) || 1691 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) || 1692 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) || 1693 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) || 1694 vn_is_mapped(vp, V_RDORWR)) { 1695 if (open_prev) { 1696 *recall = 1; 1697 } else { 1698 (void) fem_uninstall(vp, deleg_wrops, 1699 (void *)fp); 1700 rfs4_deleg_state_rele(dsp); 1701 return (NULL); 1702 } 1703 } 1704 /* 1705 * Because a client can hold onto a delegation after the 1706 * file has been closed, we need to keep track of the 1707 * access to this file. Otherwise the CIFS server would 1708 * not know about the client accessing the file and could 1709 * inappropriately grant an OPLOCK. 1710 * fem_install() returns EBUSY when asked to install a 1711 * OPUNIQ monitor more than once. Therefore, check the 1712 * return code because we only want this done once. 1713 */ 1714 if (ret == 0) 1715 vn_open_upgrade(vp, FREAD|FWRITE); 1716 } 1717 /* Place on delegation list for file */ 1718 insque(&dsp->delegationlist, fp->delegationlist.prev); 1719 1720 dsp->dtype = fp->dinfo->dtype = dtype; 1721 1722 /* Update delegation stats for this file */ 1723 fp->dinfo->time_lastgrant = gethrestime_sec(); 1724 1725 /* reset since this is a new delegation */ 1726 fp->dinfo->conflicted_client = 0; 1727 fp->dinfo->ever_recalled = FALSE; 1728 1729 if (dtype == OPEN_DELEGATE_READ) 1730 fp->dinfo->rdgrants++; 1731 else 1732 fp->dinfo->wrgrants++; 1733 1734 return (dsp); 1735 } 1736 1737 /* 1738 * State routine for the server when a delegation is returned. 1739 */ 1740 void 1741 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked) 1742 { 1743 rfs4_file_t *fp = dsp->finfo; 1744 open_delegation_type4 dtypewas; 1745 1746 rfs4_dbe_lock(fp->dbe); 1747 /* Remove state from recall list */ 1748 1749 remque(&dsp->delegationlist); 1750 dsp->delegationlist.next = dsp->delegationlist.prev = 1751 &dsp->delegationlist; 1752 1753 if (&fp->delegationlist == fp->delegationlist.next) { 1754 dtypewas = fp->dinfo->dtype; 1755 fp->dinfo->dtype = OPEN_DELEGATE_NONE; 1756 rfs4_dbe_cv_broadcast(fp->dbe); 1757 1758 /* if file system was unshared, the vp will be NULL */ 1759 if (fp->vp != NULL) { 1760 /* 1761 * Once a delegation is no longer held by any client, 1762 * the monitor is uninstalled. At this point, the 1763 * client must send OPEN otw, so we don't need the 1764 * reference on the vnode anymore. The open 1765 * downgrade removes the reference put on earlier. 1766 */ 1767 if (dtypewas == OPEN_DELEGATE_READ) { 1768 (void) fem_uninstall(fp->vp, deleg_rdops, 1769 (void *)fp); 1770 vn_open_downgrade(fp->vp, FREAD); 1771 } else if (dtypewas == OPEN_DELEGATE_WRITE) { 1772 (void) fem_uninstall(fp->vp, deleg_wrops, 1773 (void *)fp); 1774 vn_open_downgrade(fp->vp, FREAD|FWRITE); 1775 } 1776 } 1777 } 1778 1779 switch (dsp->dtype) { 1780 case OPEN_DELEGATE_READ: 1781 fp->dinfo->rdgrants--; 1782 break; 1783 case OPEN_DELEGATE_WRITE: 1784 fp->dinfo->wrgrants--; 1785 break; 1786 default: 1787 break; 1788 } 1789 1790 /* used in the policy decision */ 1791 fp->dinfo->time_returned = gethrestime_sec(); 1792 1793 /* 1794 * reset the time_recalled field so future delegations are not 1795 * accidentally revoked 1796 */ 1797 if ((fp->dinfo->rdgrants + fp->dinfo->wrgrants) == 0) 1798 fp->dinfo->time_recalled = 0; 1799 1800 rfs4_dbe_unlock(fp->dbe); 1801 1802 rfs4_dbe_lock(dsp->dbe); 1803 1804 dsp->dtype = OPEN_DELEGATE_NONE; 1805 1806 if (revoked == TRUE) 1807 dsp->time_revoked = gethrestime_sec(); 1808 1809 rfs4_dbe_invalidate(dsp->dbe); 1810 1811 rfs4_dbe_unlock(dsp->dbe); 1812 1813 if (revoked == TRUE) { 1814 rfs4_dbe_lock(dsp->client->dbe); 1815 dsp->client->deleg_revoked++; /* observability */ 1816 rfs4_dbe_unlock(dsp->client->dbe); 1817 } 1818 } 1819 1820 static void 1821 rfs4_revoke_deleg(rfs4_deleg_state_t *dsp) 1822 { 1823 rfs4_return_deleg(dsp, TRUE); 1824 } 1825 1826 static void 1827 rfs4_revoke_file(rfs4_file_t *fp) 1828 { 1829 rfs4_deleg_state_t *dsp; 1830 1831 /* 1832 * The lock for rfs4_file_t must be held when traversing the 1833 * delegation list but that lock needs to be released to call 1834 * rfs4_revoke_deleg() 1835 * This for loop is set up to check the list for being empty, 1836 * and locking the rfs4_file_t struct on init and end 1837 */ 1838 for (rfs4_dbe_lock(fp->dbe); 1839 &fp->delegationlist != fp->delegationlist.next; 1840 rfs4_dbe_lock(fp->dbe)) { 1841 1842 dsp = fp->delegationlist.next->dsp; 1843 rfs4_dbe_hold(dsp->dbe); 1844 rfs4_dbe_unlock(fp->dbe); 1845 rfs4_revoke_deleg(dsp); 1846 rfs4_deleg_state_rele(dsp); 1847 } 1848 rfs4_dbe_unlock(fp->dbe); 1849 } 1850 1851 /* 1852 * A delegation is assumed to be present on the file associated with 1853 * "state". Check to see if the delegation matches is associated with 1854 * the same client as referenced by "state". If it is not, TRUE is 1855 * returned. If the delegation DOES match the client (or no 1856 * delegation is present), return FALSE. 1857 * Assume the state entry and file entry are locked. 1858 */ 1859 bool_t 1860 rfs4_is_deleg(rfs4_state_t *state) 1861 { 1862 rfs4_deleg_state_t *dsp; 1863 rfs4_file_t *fp = state->finfo; 1864 rfs4_client_t *cp = state->owner->client; 1865 1866 ASSERT(rfs4_dbe_islocked(fp->dbe)); 1867 for (dsp = fp->delegationlist.next->dsp; dsp != NULL; 1868 dsp = dsp->delegationlist.next->dsp) { 1869 if (cp != dsp->client) { 1870 return (TRUE); 1871 } 1872 } 1873 return (FALSE); 1874 } 1875 1876 void 1877 rfs4_disable_delegation(void) 1878 { 1879 mutex_enter(&rfs4_deleg_lock); 1880 rfs4_deleg_disabled++; 1881 mutex_exit(&rfs4_deleg_lock); 1882 } 1883 1884 void 1885 rfs4_enable_delegation(void) 1886 { 1887 mutex_enter(&rfs4_deleg_lock); 1888 ASSERT(rfs4_deleg_disabled > 0); 1889 rfs4_deleg_disabled--; 1890 mutex_exit(&rfs4_deleg_lock); 1891 } 1892 1893 void 1894 rfs4_mon_hold(void *arg) 1895 { 1896 rfs4_file_t *fp = arg; 1897 1898 rfs4_dbe_hold(fp->dbe); 1899 } 1900 1901 void 1902 rfs4_mon_rele(void *arg) 1903 { 1904 rfs4_file_t *fp = arg; 1905 1906 rfs4_dbe_rele_nolock(fp->dbe); 1907 } 1908