1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/systm.h> 34 #include <sys/cred.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/pathname.h> 38 #include <sys/sysmacros.h> 39 #include <sys/kmem.h> 40 #include <sys/kstat.h> 41 #include <sys/mkdev.h> 42 #include <sys/mount.h> 43 #include <sys/statvfs.h> 44 #include <sys/errno.h> 45 #include <sys/debug.h> 46 #include <sys/cmn_err.h> 47 #include <sys/utsname.h> 48 #include <sys/bootconf.h> 49 #include <sys/modctl.h> 50 #include <sys/acl.h> 51 #include <sys/flock.h> 52 #include <sys/kstr.h> 53 #include <sys/stropts.h> 54 #include <sys/strsubr.h> 55 #include <sys/atomic.h> 56 #include <sys/disp.h> 57 #include <sys/policy.h> 58 #include <sys/list.h> 59 #include <sys/zone.h> 60 61 #include <rpc/types.h> 62 #include <rpc/auth.h> 63 #include <rpc/rpcsec_gss.h> 64 #include <rpc/clnt.h> 65 #include <rpc/xdr.h> 66 67 #include <nfs/nfs.h> 68 #include <nfs/nfs_clnt.h> 69 #include <nfs/mount.h> 70 #include <nfs/nfs_acl.h> 71 72 #include <fs/fs_subr.h> 73 74 #include <nfs/nfs4.h> 75 #include <nfs/rnode4.h> 76 #include <nfs/nfs4_clnt.h> 77 #include <nfs/nfssys.h> 78 79 #ifdef DEBUG 80 /* 81 * These are "special" state IDs and file handles that 82 * match any delegation state ID or file handled. This 83 * is for testing purposes only. 84 */ 85 86 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 }; 87 char nfs4_deleg_fh[] = "\0377\0376\0375\0374"; 88 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh }; 89 nfsstat4 cb4_getattr_fail = NFS4_OK; 90 nfsstat4 cb4_recall_fail = NFS4_OK; 91 92 int nfs4_callback_debug; 93 int nfs4_recall_debug; 94 int nfs4_drat_debug; 95 96 #endif 97 98 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x)) 99 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x)) 100 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y)) 101 102 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE; 103 104 static zone_key_t nfs4_callback_zone_key; 105 106 /* 107 * NFS4_MAPSIZE is the number of bytes we are willing to consume 108 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK 109 * style delegation. 110 */ 111 112 #define NFS4_MAPSIZE 8192 113 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t) 114 #define NbPW (NBBY*sizeof (uint_t)) 115 116 static int nfs4_num_prognums = 1024; 117 static SVC_CALLOUT_TABLE nfs4_cb_sct; 118 119 struct nfs4_dnode { 120 list_node_t linkage; 121 rnode4_t *rnodep; 122 int flags; /* Flags for nfs4delegreturn_impl() */ 123 }; 124 125 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = { 126 { "delegations", KSTAT_DATA_UINT64 }, 127 { "cb_getattr", KSTAT_DATA_UINT64 }, 128 { "cb_recall", KSTAT_DATA_UINT64 }, 129 { "cb_null", KSTAT_DATA_UINT64 }, 130 { "cb_dispatch", KSTAT_DATA_UINT64 }, 131 { "delegaccept_r", KSTAT_DATA_UINT64 }, 132 { "delegaccept_rw", KSTAT_DATA_UINT64 }, 133 { "delegreturn", KSTAT_DATA_UINT64 }, 134 { "callbacks", KSTAT_DATA_UINT64 }, 135 { "claim_cur", KSTAT_DATA_UINT64 }, 136 { "claim_cur_ok", KSTAT_DATA_UINT64 }, 137 { "recall_trunc", KSTAT_DATA_UINT64 }, 138 { "recall_failed", KSTAT_DATA_UINT64 }, 139 { "return_limit_write", KSTAT_DATA_UINT64 }, 140 { "return_limit_addmap", KSTAT_DATA_UINT64 }, 141 { "deleg_recover", KSTAT_DATA_UINT64 }, 142 { "cb_illegal", KSTAT_DATA_UINT64 } 143 }; 144 145 struct nfs4_cb_port { 146 list_node_t linkage; /* linkage into per-zone port list */ 147 char netid[KNC_STRSIZE]; 148 char uaddr[KNC_STRSIZE]; 149 char protofmly[KNC_STRSIZE]; 150 char proto[KNC_STRSIZE]; 151 }; 152 153 static int cb_getattr_bytes; 154 155 struct cb_recall_pass { 156 rnode4_t *rp; 157 int flags; /* Flags for nfs4delegreturn_impl() */ 158 bool_t truncate; 159 }; 160 161 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int); 162 static void nfs4delegreturn_thread(struct cb_recall_pass *); 163 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *, 164 int); 165 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int); 166 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int); 167 static int nfs4delegreturn_impl(rnode4_t *, int, 168 struct nfs4_callback_globals *); 169 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *, 170 struct nfs4_callback_globals *); 171 172 static void 173 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 174 struct compound_state *cs, struct nfs4_callback_globals *ncg) 175 { 176 CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr; 177 CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr; 178 rnode4_t *rp; 179 vnode_t *vp; 180 bool_t found = FALSE; 181 struct nfs4_server *sp; 182 struct fattr4 *fap; 183 rpc_inline_t *fdata; 184 long mapcnt; 185 fattr4_change change; 186 fattr4_size size; 187 uint_t rflag; 188 189 ncg->nfs4_callback_stats.cb_getattr.value.ui64++; 190 191 #ifdef DEBUG 192 /* 193 * error injection hook: set cb_getattr_fail global to 194 * NFS4 pcol error to be returned 195 */ 196 if (cb4_getattr_fail != NFS4_OK) { 197 *cs->statusp = resp->status = cb4_getattr_fail; 198 return; 199 } 200 #endif 201 202 resp->obj_attributes.attrmask = 0; 203 204 mutex_enter(&ncg->nfs4_cb_lock); 205 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 206 mutex_exit(&ncg->nfs4_cb_lock); 207 208 if (nfs4_server_vlock(sp, 0) == FALSE) { 209 210 CB_WARN("cb_getattr: cannot find server\n"); 211 212 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 213 return; 214 } 215 216 /* 217 * In cb_compound, callback_ident was validated against rq_prog, 218 * but we couldn't verify that it was set to the value we provided 219 * at setclientid time (because we didn't have server struct yet). 220 * Now we have the server struct, but don't have callback_ident 221 * handy. So, validate server struct program number against req 222 * RPC's prog number. At this point, we know the RPC prog num 223 * is valid (else we wouldn't be here); however, we don't know 224 * that it was the prog number we supplied to this server at 225 * setclientid time. If the prog numbers aren't equivalent, then 226 * log the problem and fail the request because either cbserv 227 * and/or cbclient are confused. This will probably never happen. 228 */ 229 if (sp->s_program != req->rq_prog) { 230 #ifdef DEBUG 231 zcmn_err(getzoneid(), CE_WARN, 232 "cb_getattr: wrong server program number srv=%d req=%d\n", 233 sp->s_program, req->rq_prog); 234 #else 235 zcmn_err(getzoneid(), CE_WARN, 236 "cb_getattr: wrong server program number\n"); 237 #endif 238 mutex_exit(&sp->s_lock); 239 nfs4_server_rele(sp); 240 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 241 return; 242 } 243 244 /* 245 * Search the delegation list for a matching file handle; 246 * mutex on sp prevents the list from changing. 247 */ 248 249 rp = list_head(&sp->s_deleg_list); 250 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) { 251 nfs4_fhandle_t fhandle; 252 253 sfh4_copyval(rp->r_fh, &fhandle); 254 255 if ((fhandle.fh_len == args->fh.nfs_fh4_len && 256 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val, 257 fhandle.fh_len) == 0)) { 258 259 found = TRUE; 260 break; 261 } 262 #ifdef DEBUG 263 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len && 264 bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val, 265 args->fh.nfs_fh4_len) == 0) { 266 267 found = TRUE; 268 break; 269 } 270 #endif 271 } 272 273 /* 274 * VN_HOLD the vnode before releasing s_lock to guarantee 275 * we have a valid vnode reference. 276 */ 277 if (found == TRUE) { 278 vp = RTOV4(rp); 279 VN_HOLD(vp); 280 } 281 282 mutex_exit(&sp->s_lock); 283 nfs4_server_rele(sp); 284 285 if (found == FALSE) { 286 287 CB_WARN("cb_getattr: bad fhandle\n"); 288 289 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 290 return; 291 } 292 293 /* 294 * Figure out which attributes the server wants. We only 295 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest. 296 */ 297 fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP); 298 299 /* 300 * Don't actually need to create XDR to encode these 301 * simple data structures. 302 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE); 303 */ 304 fap = &resp->obj_attributes; 305 306 fap->attrmask = 0; 307 /* attrlist4_len starts at 0 and increases as attrs are processed */ 308 fap->attrlist4 = (char *)fdata; 309 fap->attrlist4_len = 0; 310 311 /* don't supply attrs if request was zero */ 312 if (args->attr_request != 0) { 313 if (args->attr_request & FATTR4_CHANGE_MASK) { 314 /* 315 * If the file is mmapped, then increment the change 316 * attribute and return it. This will guarantee that 317 * the server will perceive that the file has changed 318 * if there is any chance that the client application 319 * has changed it. Otherwise, just return the change 320 * attribute as it has been updated by nfs4write_deleg. 321 */ 322 323 mutex_enter(&rp->r_statelock); 324 mapcnt = rp->r_mapcnt; 325 rflag = rp->r_flags; 326 mutex_exit(&rp->r_statelock); 327 328 mutex_enter(&rp->r_statev4_lock); 329 /* 330 * If object mapped, then always return new change. 331 * Otherwise, return change if object has dirty 332 * pages. If object doesn't have any dirty pages, 333 * then all changes have been pushed to server, so 334 * reset change to grant change. 335 */ 336 if (mapcnt) 337 rp->r_deleg_change++; 338 else if (! (rflag & R4DIRTY)) 339 rp->r_deleg_change = rp->r_deleg_change_grant; 340 change = rp->r_deleg_change; 341 mutex_exit(&rp->r_statev4_lock); 342 343 /* 344 * Use inline XDR code directly, we know that we 345 * going to a memory buffer and it has enough 346 * space so it cannot fail. 347 */ 348 IXDR_PUT_U_HYPER(fdata, change); 349 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT; 350 fap->attrmask |= FATTR4_CHANGE_MASK; 351 } 352 353 if (args->attr_request & FATTR4_SIZE_MASK) { 354 /* 355 * Use an atomic add of 0 to fetch a consistent view 356 * of r_size; this avoids having to take rw_lock 357 * which could cause a deadlock. 358 */ 359 size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0); 360 361 /* 362 * Use inline XDR code directly, we know that we 363 * going to a memory buffer and it has enough 364 * space so it cannot fail. 365 */ 366 IXDR_PUT_U_HYPER(fdata, size); 367 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT; 368 fap->attrmask |= FATTR4_SIZE_MASK; 369 } 370 } 371 372 VN_RELE(vp); 373 374 *cs->statusp = resp->status = NFS4_OK; 375 } 376 377 static void 378 cb_getattr_free(nfs_cb_resop4 *resop) 379 { 380 if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4) 381 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr. 382 obj_attributes.attrlist4, cb_getattr_bytes); 383 } 384 385 static void 386 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 387 struct compound_state *cs, struct nfs4_callback_globals *ncg) 388 { 389 CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall; 390 CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall; 391 rnode4_t *rp; 392 vnode_t *vp; 393 struct nfs4_server *sp; 394 bool_t found = FALSE; 395 396 ncg->nfs4_callback_stats.cb_recall.value.ui64++; 397 398 ASSERT(req->rq_prog >= NFS4_CALLBACK); 399 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums); 400 401 #ifdef DEBUG 402 /* 403 * error injection hook: set cb_recall_fail global to 404 * NFS4 pcol error to be returned 405 */ 406 if (cb4_recall_fail != NFS4_OK) { 407 *cs->statusp = resp->status = cb4_recall_fail; 408 return; 409 } 410 #endif 411 412 mutex_enter(&ncg->nfs4_cb_lock); 413 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 414 mutex_exit(&ncg->nfs4_cb_lock); 415 416 if (nfs4_server_vlock(sp, 0) == FALSE) { 417 418 CB_WARN("cb_recall: cannot find server\n"); 419 420 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 421 return; 422 } 423 424 /* 425 * Search the delegation list for a matching file handle 426 * AND stateid; mutex on sp prevents the list from changing. 427 */ 428 429 rp = list_head(&sp->s_deleg_list); 430 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) { 431 mutex_enter(&rp->r_statev4_lock); 432 433 /* check both state id and file handle! */ 434 435 if ((bcmp(&rp->r_deleg_stateid, &args->stateid, 436 sizeof (stateid4)) == 0)) { 437 nfs4_fhandle_t fhandle; 438 439 sfh4_copyval(rp->r_fh, &fhandle); 440 if ((fhandle.fh_len == args->fh.nfs_fh4_len && 441 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val, 442 fhandle.fh_len) == 0)) { 443 444 found = TRUE; 445 break; 446 } else { 447 #ifdef DEBUG 448 CB_WARN("cb_recall: stateid OK, bad fh"); 449 #endif 450 } 451 } 452 #ifdef DEBUG 453 if (bcmp(&args->stateid, &nfs4_deleg_any, 454 sizeof (stateid4)) == 0) { 455 456 found = TRUE; 457 break; 458 } 459 #endif 460 mutex_exit(&rp->r_statev4_lock); 461 } 462 463 /* 464 * VN_HOLD the vnode before releasing s_lock to guarantee 465 * we have a valid vnode reference. The async thread will 466 * release the hold when it's done. 467 */ 468 if (found == TRUE) { 469 mutex_exit(&rp->r_statev4_lock); 470 vp = RTOV4(rp); 471 VN_HOLD(vp); 472 } 473 mutex_exit(&sp->s_lock); 474 nfs4_server_rele(sp); 475 476 if (found == FALSE) { 477 478 CB_WARN("cb_recall: bad stateid\n"); 479 480 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 481 return; 482 } 483 484 /* Fire up a thread to do the delegreturn */ 485 nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN, 486 args->truncate); 487 488 *cs->statusp = resp->status = 0; 489 } 490 491 /* ARGSUSED */ 492 static void 493 cb_recall_free(nfs_cb_resop4 *resop) 494 { 495 /* nothing to do here, cb_recall doesn't kmem_alloc */ 496 } 497 498 /* 499 * This function handles the CB_NULL proc call from an NFSv4 Server. 500 * 501 * We take note that the server has sent a CB_NULL for later processing 502 * in the recovery logic. It is noted so we may pause slightly after the 503 * setclientid and before reopening files. The pause is to allow the 504 * NFSv4 Server time to receive the CB_NULL reply and adjust any of 505 * its internal structures such that it has the opportunity to grant 506 * delegations to reopened files. 507 * 508 */ 509 510 /* ARGSUSED */ 511 static void 512 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, 513 struct nfs4_callback_globals *ncg) 514 { 515 struct nfs4_server *sp; 516 517 ncg->nfs4_callback_stats.cb_null.value.ui64++; 518 519 ASSERT(req->rq_prog >= NFS4_CALLBACK); 520 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums); 521 522 mutex_enter(&ncg->nfs4_cb_lock); 523 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 524 mutex_exit(&ncg->nfs4_cb_lock); 525 526 if (nfs4_server_vlock(sp, 0) != FALSE) { 527 sp->s_flags |= N4S_CB_PINGED; 528 cv_broadcast(&sp->wait_cb_null); 529 mutex_exit(&sp->s_lock); 530 nfs4_server_rele(sp); 531 } 532 } 533 534 /* 535 * cb_illegal args: void 536 * res : status (NFS4ERR_OP_CB_ILLEGAL) 537 */ 538 /* ARGSUSED */ 539 static void 540 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 541 struct compound_state *cs, struct nfs4_callback_globals *ncg) 542 { 543 CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal; 544 545 ncg->nfs4_callback_stats.cb_illegal.value.ui64++; 546 resop->resop = OP_CB_ILLEGAL; 547 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 548 } 549 550 static void 551 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, 552 struct nfs4_callback_globals *ncg) 553 { 554 uint_t i; 555 struct compound_state cs; 556 nfs_cb_argop4 *argop; 557 nfs_cb_resop4 *resop, *new_res; 558 uint_t op; 559 560 bzero(&cs, sizeof (cs)); 561 cs.statusp = &resp->status; 562 cs.cont = TRUE; 563 564 /* 565 * Form a reply tag by copying over the reqeuest tag. 566 */ 567 resp->tag.utf8string_len = args->tag.utf8string_len; 568 resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len, 569 KM_SLEEP); 570 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 571 args->tag.utf8string_len); 572 573 /* 574 * XXX for now, minorversion should be zero 575 */ 576 if (args->minorversion != CB4_MINORVERSION) { 577 resp->array_len = 0; 578 resp->array = NULL; 579 resp->status = NFS4ERR_MINOR_VERS_MISMATCH; 580 return; 581 } 582 583 #ifdef DEBUG 584 /* 585 * Verify callback_ident. It doesn't really matter if it's wrong 586 * because we don't really use callback_ident -- we use prog number 587 * of the RPC request instead. In this case, just print a DEBUG 588 * console message to reveal brokenness of cbclient (at bkoff/cthon). 589 */ 590 if (args->callback_ident != req->rq_prog) 591 zcmn_err(getzoneid(), CE_WARN, 592 "cb_compound: cb_client using wrong " 593 "callback_ident(%d), should be %d", 594 args->callback_ident, req->rq_prog); 595 #endif 596 597 resp->array_len = args->array_len; 598 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4), 599 KM_SLEEP); 600 601 for (i = 0; i < args->array_len && cs.cont; i++) { 602 603 argop = &args->array[i]; 604 resop = &resp->array[i]; 605 resop->resop = argop->argop; 606 op = (uint_t)resop->resop; 607 608 switch (op) { 609 610 case OP_CB_GETATTR: 611 612 cb_getattr(argop, resop, req, &cs, ncg); 613 break; 614 615 case OP_CB_RECALL: 616 617 cb_recall(argop, resop, req, &cs, ncg); 618 break; 619 620 case OP_CB_ILLEGAL: 621 622 /* fall through */ 623 624 default: 625 /* 626 * Handle OP_CB_ILLEGAL and any undefined opcode. 627 * Currently, the XDR code will return BADXDR 628 * if cb op doesn't decode to legal value, so 629 * it really only handles OP_CB_ILLEGAL. 630 */ 631 op = OP_CB_ILLEGAL; 632 cb_illegal(argop, resop, req, &cs, ncg); 633 } 634 635 if (*cs.statusp != NFS4_OK) 636 cs.cont = FALSE; 637 638 /* 639 * If not at last op, and if we are to stop, then 640 * compact the results array. 641 */ 642 if ((i + 1) < args->array_len && !cs.cont) { 643 644 new_res = kmem_alloc( 645 (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP); 646 bcopy(resp->array, 647 new_res, (i+1) * sizeof (nfs_cb_resop4)); 648 kmem_free(resp->array, 649 args->array_len * sizeof (nfs_cb_resop4)); 650 651 resp->array_len = i + 1; 652 resp->array = new_res; 653 } 654 } 655 656 } 657 658 static void 659 cb_compound_free(CB_COMPOUND4res *resp) 660 { 661 uint_t i, op; 662 nfs_cb_resop4 *resop; 663 664 if (resp->tag.utf8string_val) { 665 UTF8STRING_FREE(resp->tag) 666 } 667 668 for (i = 0; i < resp->array_len; i++) { 669 670 resop = &resp->array[i]; 671 op = (uint_t)resop->resop; 672 673 switch (op) { 674 675 case OP_CB_GETATTR: 676 677 cb_getattr_free(resop); 678 break; 679 680 case OP_CB_RECALL: 681 682 cb_recall_free(resop); 683 break; 684 685 default: 686 break; 687 } 688 } 689 690 if (resp->array != NULL) { 691 kmem_free(resp->array, 692 resp->array_len * sizeof (nfs_cb_resop4)); 693 } 694 } 695 696 static void 697 cb_dispatch(struct svc_req *req, SVCXPRT *xprt) 698 { 699 CB_COMPOUND4args args; 700 CB_COMPOUND4res res; 701 struct nfs4_callback_globals *ncg; 702 703 bool_t (*xdr_args)(), (*xdr_res)(); 704 void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *, 705 struct nfs4_callback_globals *); 706 void (*freeproc)(CB_COMPOUND4res *); 707 708 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 709 ASSERT(ncg != NULL); 710 711 ncg->nfs4_callback_stats.cb_dispatch.value.ui64++; 712 713 switch (req->rq_proc) { 714 case CB_NULL: 715 xdr_args = xdr_void; 716 xdr_res = xdr_void; 717 proc = cb_null; 718 freeproc = NULL; 719 break; 720 721 case CB_COMPOUND: 722 xdr_args = xdr_CB_COMPOUND4args_clnt; 723 xdr_res = xdr_CB_COMPOUND4res; 724 proc = cb_compound; 725 freeproc = cb_compound_free; 726 break; 727 728 default: 729 CB_WARN("cb_dispatch: no proc\n"); 730 svcerr_noproc(xprt); 731 return; 732 } 733 734 args.tag.utf8string_val = NULL; 735 args.array = NULL; 736 737 if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) { 738 739 CB_WARN("cb_dispatch: cannot getargs\n"); 740 svcerr_decode(xprt); 741 return; 742 } 743 744 (*proc)(&args, &res, req, ncg); 745 746 if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) { 747 748 CB_WARN("cb_dispatch: bad sendreply\n"); 749 svcerr_systemerr(xprt); 750 } 751 752 if (freeproc) 753 (*freeproc)(&res); 754 755 if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) { 756 757 CB_WARN("cb_dispatch: bad freeargs\n"); 758 } 759 } 760 761 static rpcprog_t 762 nfs4_getnextprogram(struct nfs4_callback_globals *ncg) 763 { 764 int i, j; 765 766 j = ncg->nfs4_program_hint; 767 for (i = 0; i < nfs4_num_prognums; i++, j++) { 768 769 if (j >= nfs4_num_prognums) 770 j = 0; 771 772 if (ncg->nfs4prog2server[j] == NULL) { 773 ncg->nfs4_program_hint = j+1; 774 return (j+NFS4_CALLBACK); 775 } 776 } 777 778 return (0); 779 } 780 781 void 782 nfs4callback_destroy(nfs4_server_t *np) 783 { 784 struct nfs4_callback_globals *ncg; 785 int i; 786 787 if (np->s_program == 0) 788 return; 789 790 ncg = np->zone_globals; 791 i = np->s_program - NFS4_CALLBACK; 792 793 mutex_enter(&ncg->nfs4_cb_lock); 794 795 ASSERT(ncg->nfs4prog2server[i] == np); 796 797 ncg->nfs4prog2server[i] = NULL; 798 799 if (i < ncg->nfs4_program_hint) 800 ncg->nfs4_program_hint = i; 801 802 mutex_exit(&ncg->nfs4_cb_lock); 803 } 804 805 /* 806 * nfs4_setport - This function saves a netid and univeral address for 807 * the callback program. These values will be used during setclientid. 808 */ 809 static void 810 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto, 811 struct nfs4_callback_globals *ncg) 812 { 813 struct nfs4_cb_port *p; 814 bool_t found = FALSE; 815 816 ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock)); 817 818 p = list_head(&ncg->nfs4_cb_ports); 819 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) { 820 if (strcmp(p->netid, netid) == 0) { 821 found = TRUE; 822 break; 823 } 824 } 825 if (found == TRUE) 826 (void) strcpy(p->uaddr, uaddr); 827 else { 828 p = kmem_alloc(sizeof (*p), KM_SLEEP); 829 830 (void) strcpy(p->uaddr, uaddr); 831 (void) strcpy(p->netid, netid); 832 (void) strcpy(p->protofmly, protofmly); 833 (void) strcpy(p->proto, proto); 834 list_insert_head(&ncg->nfs4_cb_ports, p); 835 } 836 } 837 838 /* 839 * nfs4_cb_args - This function is used to construct the callback 840 * portion of the arguments needed for setclientid. 841 */ 842 843 void 844 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args) 845 { 846 struct nfs4_cb_port *p; 847 bool_t found = FALSE; 848 rpcprog_t pgm; 849 struct nfs4_callback_globals *ncg = np->zone_globals; 850 851 /* 852 * This server structure may already have a program number 853 * assigned to it. This happens when the client has to 854 * re-issue SETCLIENTID. Just re-use the information. 855 */ 856 if (np->s_program >= NFS4_CALLBACK && 857 np->s_program < NFS4_CALLBACK + nfs4_num_prognums) 858 nfs4callback_destroy(np); 859 860 mutex_enter(&ncg->nfs4_cb_lock); 861 862 p = list_head(&ncg->nfs4_cb_ports); 863 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) { 864 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 && 865 strcmp(p->proto, knc->knc_proto) == 0) { 866 found = TRUE; 867 break; 868 } 869 } 870 871 if (found == FALSE) { 872 873 NFS4_DEBUG(nfs4_callback_debug, 874 (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n", 875 knc->knc_protofmly, knc->knc_proto)); 876 877 args->callback.cb_program = 0; 878 args->callback.cb_location.r_netid = NULL; 879 args->callback.cb_location.r_addr = NULL; 880 args->callback_ident = 0; 881 mutex_exit(&ncg->nfs4_cb_lock); 882 return; 883 } 884 885 if ((pgm = nfs4_getnextprogram(ncg)) == 0) { 886 CB_WARN("nfs4_cb_args: out of program numbers\n"); 887 888 args->callback.cb_program = 0; 889 args->callback.cb_location.r_netid = NULL; 890 args->callback.cb_location.r_addr = NULL; 891 args->callback_ident = 0; 892 mutex_exit(&ncg->nfs4_cb_lock); 893 return; 894 } 895 896 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np; 897 args->callback.cb_program = pgm; 898 args->callback.cb_location.r_netid = p->netid; 899 args->callback.cb_location.r_addr = p->uaddr; 900 args->callback_ident = pgm; 901 902 np->s_program = pgm; 903 904 mutex_exit(&ncg->nfs4_cb_lock); 905 } 906 907 static int 908 nfs4_dquery(struct nfs4_svc_args *arg, model_t model) 909 { 910 file_t *fp; 911 vnode_t *vp; 912 rnode4_t *rp; 913 int error; 914 STRUCT_HANDLE(nfs4_svc_args, uap); 915 916 STRUCT_SET_HANDLE(uap, model, arg); 917 918 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) 919 return (EBADF); 920 921 vp = fp->f_vnode; 922 923 if (vp == NULL || vp->v_type != VREG || 924 !vn_matchops(vp, nfs4_vnodeops)) { 925 releasef(STRUCT_FGET(uap, fd)); 926 return (EBADF); 927 } 928 929 rp = VTOR4(vp); 930 931 /* 932 * I can't convince myself that we need locking here. The 933 * rnode cannot disappear and the value returned is instantly 934 * stale anway, so why bother? 935 */ 936 937 error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type); 938 releasef(STRUCT_FGET(uap, fd)); 939 return (error); 940 } 941 942 943 /* 944 * NFS4 client system call. This service does the 945 * necessary initialization for the callback program. 946 * This is fashioned after the server side interaction 947 * between nfsd and the kernel. On the client, the 948 * mount command forks and the child process does the 949 * necessary interaction with the kernel. 950 * 951 * uap->fd is the fd of an open transport provider 952 */ 953 int 954 nfs4_svc(struct nfs4_svc_args *arg, model_t model) 955 { 956 file_t *fp; 957 int error; 958 int readsize; 959 char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE]; 960 char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE]; 961 size_t len; 962 STRUCT_HANDLE(nfs4_svc_args, uap); 963 struct netbuf addrmask; 964 int cmd; 965 SVCMASTERXPRT *cb_xprt; 966 struct nfs4_callback_globals *ncg; 967 968 #ifdef lint 969 model = model; /* STRUCT macros don't always refer to it */ 970 #endif 971 972 STRUCT_SET_HANDLE(uap, model, arg); 973 974 if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY) 975 return (nfs4_dquery(arg, model)); 976 977 if (secpolicy_nfs(CRED()) != 0) 978 return (EPERM); 979 980 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) 981 return (EBADF); 982 983 /* 984 * Set read buffer size to rsize 985 * and add room for RPC headers. 986 */ 987 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA); 988 if (readsize < RPC_MAXDATASIZE) 989 readsize = RPC_MAXDATASIZE; 990 991 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf, 992 KNC_STRSIZE, &len); 993 if (error) { 994 releasef(STRUCT_FGET(uap, fd)); 995 return (error); 996 } 997 998 cmd = STRUCT_FGET(uap, cmd); 999 1000 if (cmd & NFS4_KRPC_START) { 1001 addrmask.len = STRUCT_FGET(uap, addrmask.len); 1002 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen); 1003 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP); 1004 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf, 1005 addrmask.len); 1006 if (error) { 1007 releasef(STRUCT_FGET(uap, fd)); 1008 kmem_free(addrmask.buf, addrmask.maxlen); 1009 return (error); 1010 } 1011 } 1012 else 1013 addrmask.buf = NULL; 1014 1015 error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr, 1016 sizeof (uaddr), &len); 1017 if (error) { 1018 releasef(STRUCT_FGET(uap, fd)); 1019 if (addrmask.buf) 1020 kmem_free(addrmask.buf, addrmask.maxlen); 1021 return (error); 1022 } 1023 1024 error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly, 1025 sizeof (protofmly), &len); 1026 if (error) { 1027 releasef(STRUCT_FGET(uap, fd)); 1028 if (addrmask.buf) 1029 kmem_free(addrmask.buf, addrmask.maxlen); 1030 return (error); 1031 } 1032 1033 error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto, 1034 sizeof (proto), &len); 1035 if (error) { 1036 releasef(STRUCT_FGET(uap, fd)); 1037 if (addrmask.buf) 1038 kmem_free(addrmask.buf, addrmask.maxlen); 1039 return (error); 1040 } 1041 1042 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1043 ASSERT(ncg != NULL); 1044 1045 mutex_enter(&ncg->nfs4_cb_lock); 1046 if (cmd & NFS4_SETPORT) 1047 nfs4_setport(buf, uaddr, protofmly, proto, ncg); 1048 1049 if (cmd & NFS4_KRPC_START) { 1050 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt, 1051 &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE); 1052 if (error) { 1053 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n", 1054 error); 1055 kmem_free(addrmask.buf, addrmask.maxlen); 1056 } 1057 } 1058 1059 mutex_exit(&ncg->nfs4_cb_lock); 1060 releasef(STRUCT_FGET(uap, fd)); 1061 return (error); 1062 } 1063 1064 struct nfs4_callback_globals * 1065 nfs4_get_callback_globals(void) 1066 { 1067 return (zone_getspecific(nfs4_callback_zone_key, nfs_zone())); 1068 } 1069 1070 static void * 1071 nfs4_callback_init_zone(zoneid_t zoneid) 1072 { 1073 kstat_t *nfs4_callback_kstat; 1074 struct nfs4_callback_globals *ncg; 1075 1076 ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP); 1077 1078 ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums * 1079 sizeof (struct nfs4_server *), KM_SLEEP); 1080 1081 /* initialize the dlist */ 1082 mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL); 1083 list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode), 1084 offsetof(struct nfs4_dnode, linkage)); 1085 1086 /* initialize cb_port list */ 1087 mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL); 1088 list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port), 1089 offsetof(struct nfs4_cb_port, linkage)); 1090 1091 /* get our own copy of the kstats */ 1092 bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats, 1093 sizeof (nfs4_callback_stats_tmpl)); 1094 /* register "nfs:0:nfs4_callback_stats" for this zone */ 1095 if ((nfs4_callback_kstat = 1096 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc", 1097 KSTAT_TYPE_NAMED, 1098 sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t), 1099 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, 1100 zoneid)) != NULL) { 1101 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats; 1102 kstat_install(nfs4_callback_kstat); 1103 } 1104 return (ncg); 1105 } 1106 1107 static void 1108 nfs4_discard_delegations(struct nfs4_callback_globals *ncg) 1109 { 1110 nfs4_server_t *sp; 1111 int i, num_removed; 1112 1113 /* 1114 * It's OK here to just run through the registered "programs", as 1115 * servers without programs won't have any delegations to handle. 1116 */ 1117 for (i = 0; i < nfs4_num_prognums; i++) { 1118 rnode4_t *rp; 1119 1120 mutex_enter(&ncg->nfs4_cb_lock); 1121 sp = ncg->nfs4prog2server[i]; 1122 mutex_exit(&ncg->nfs4_cb_lock); 1123 1124 if (nfs4_server_vlock(sp, 1) == FALSE) 1125 continue; 1126 num_removed = 0; 1127 while ((rp = list_head(&sp->s_deleg_list)) != NULL) { 1128 mutex_enter(&rp->r_statev4_lock); 1129 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1130 /* 1131 * We need to take matters into our own hands, 1132 * as nfs4delegreturn_cleanup_impl() won't 1133 * remove this from the list. 1134 */ 1135 list_remove(&sp->s_deleg_list, rp); 1136 mutex_exit(&rp->r_statev4_lock); 1137 nfs4_dec_state_ref_count_nolock(sp, 1138 VTOMI4(RTOV4(rp))); 1139 num_removed++; 1140 continue; 1141 } 1142 mutex_exit(&rp->r_statev4_lock); 1143 VN_HOLD(RTOV4(rp)); 1144 mutex_exit(&sp->s_lock); 1145 /* 1146 * The following will remove the node from the list. 1147 */ 1148 nfs4delegreturn_cleanup_impl(rp, sp, ncg); 1149 VN_RELE(RTOV4(rp)); 1150 mutex_enter(&sp->s_lock); 1151 } 1152 mutex_exit(&sp->s_lock); 1153 /* each removed list node reles a reference */ 1154 while (num_removed-- > 0) 1155 nfs4_server_rele(sp); 1156 /* remove our reference for nfs4_server_vlock */ 1157 nfs4_server_rele(sp); 1158 } 1159 } 1160 1161 /* ARGSUSED */ 1162 static void 1163 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data) 1164 { 1165 struct nfs4_callback_globals *ncg = data; 1166 1167 /* 1168 * Clean pending delegation return list. 1169 */ 1170 nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD); 1171 1172 /* 1173 * Discard all delegations. 1174 */ 1175 nfs4_discard_delegations(ncg); 1176 } 1177 1178 static void 1179 nfs4_callback_fini_zone(zoneid_t zoneid, void *data) 1180 { 1181 struct nfs4_callback_globals *ncg = data; 1182 struct nfs4_cb_port *p; 1183 nfs4_server_t *sp, *next; 1184 nfs4_server_t freelist; 1185 int i; 1186 1187 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid); 1188 1189 /* 1190 * Discard all delegations that may have crept in since we did the 1191 * _shutdown. 1192 */ 1193 nfs4_discard_delegations(ncg); 1194 /* 1195 * We're completely done with this zone and all associated 1196 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one 1197 * more reference outstanding -- the reference we didn't release in 1198 * nfs4_renew_lease_thread(). 1199 * 1200 * Here we need to run through the global nfs4_server_lst as we need to 1201 * deal with nfs4_server_ts without programs, as they also have threads 1202 * created for them, and so have outstanding references that we need to 1203 * release. 1204 */ 1205 freelist.forw = &freelist; 1206 freelist.back = &freelist; 1207 mutex_enter(&nfs4_server_lst_lock); 1208 sp = nfs4_server_lst.forw; 1209 while (sp != &nfs4_server_lst) { 1210 next = sp->forw; 1211 if (sp->zoneid == zoneid) { 1212 remque(sp); 1213 insque(sp, &freelist); 1214 } 1215 sp = next; 1216 } 1217 mutex_exit(&nfs4_server_lst_lock); 1218 1219 sp = freelist.forw; 1220 while (sp != &freelist) { 1221 next = sp->forw; 1222 nfs4_server_rele(sp); /* free the list's reference */ 1223 sp = next; 1224 } 1225 1226 #ifdef DEBUG 1227 for (i = 0; i < nfs4_num_prognums; i++) { 1228 ASSERT(ncg->nfs4prog2server[i] == NULL); 1229 } 1230 #endif 1231 kmem_free(ncg->nfs4prog2server, nfs4_num_prognums * 1232 sizeof (struct nfs4_server *)); 1233 1234 mutex_enter(&ncg->nfs4_cb_lock); 1235 while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) { 1236 list_remove(&ncg->nfs4_cb_ports, p); 1237 kmem_free(p, sizeof (*p)); 1238 } 1239 list_destroy(&ncg->nfs4_cb_ports); 1240 mutex_destroy(&ncg->nfs4_cb_lock); 1241 list_destroy(&ncg->nfs4_dlist); 1242 mutex_destroy(&ncg->nfs4_dlist_lock); 1243 kmem_free(ncg, sizeof (*ncg)); 1244 } 1245 1246 void 1247 nfs4_callback_init(void) 1248 { 1249 int i; 1250 SVC_CALLOUT *nfs4_cb_sc; 1251 1252 /* initialize the callback table */ 1253 nfs4_cb_sc = kmem_alloc(nfs4_num_prognums * 1254 sizeof (SVC_CALLOUT), KM_SLEEP); 1255 1256 for (i = 0; i < nfs4_num_prognums; i++) { 1257 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i; 1258 nfs4_cb_sc[i].sc_versmin = NFS_CB; 1259 nfs4_cb_sc[i].sc_versmax = NFS_CB; 1260 nfs4_cb_sc[i].sc_dispatch = cb_dispatch; 1261 } 1262 1263 nfs4_cb_sct.sct_size = nfs4_num_prognums; 1264 nfs4_cb_sct.sct_free = FALSE; 1265 nfs4_cb_sct.sct_sc = nfs4_cb_sc; 1266 1267 /* 1268 * Compute max bytes required for dyamically allocated parts 1269 * of cb_getattr reply. Only size and change are supported now. 1270 * If CB_GETATTR is changed to reply with additional attrs, 1271 * additional sizes must be added below. 1272 * 1273 * fattr4_change + fattr4_size == uint64_t + uint64_t 1274 */ 1275 cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT; 1276 1277 zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone, 1278 nfs4_callback_shutdown_zone, nfs4_callback_fini_zone); 1279 } 1280 1281 void 1282 nfs4_callback_fini(void) 1283 { 1284 } 1285 1286 /* 1287 * NB: This function can be called from the *wrong* zone (ie, the zone that 1288 * 'rp' belongs to and the caller's zone may not be the same). This can happen 1289 * if the zone is going away and we get called from nfs4_async_inactive(). In 1290 * this case the globals will be NULL and we won't update the counters, which 1291 * doesn't matter as the zone is going away anyhow. 1292 */ 1293 static void 1294 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np, 1295 struct nfs4_callback_globals *ncg) 1296 { 1297 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 1298 boolean_t need_rele = B_FALSE; 1299 1300 /* 1301 * Caller must be holding mi_recovlock in read mode 1302 * to call here. This is provided by start_op. 1303 * Delegation management requires to grab s_lock 1304 * first and then r_statev4_lock. 1305 */ 1306 1307 if (np == NULL) { 1308 np = find_nfs4_server_all(mi, 1); 1309 ASSERT(np != NULL); 1310 need_rele = B_TRUE; 1311 } else { 1312 mutex_enter(&np->s_lock); 1313 } 1314 1315 mutex_enter(&rp->r_statev4_lock); 1316 1317 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1318 mutex_exit(&rp->r_statev4_lock); 1319 mutex_exit(&np->s_lock); 1320 if (need_rele) 1321 nfs4_server_rele(np); 1322 return; 1323 } 1324 1325 /* 1326 * Free the cred originally held when 1327 * the delegation was granted. Caller must 1328 * hold this cred if it wants to use it after 1329 * this call. 1330 */ 1331 crfree(rp->r_deleg_cred); 1332 rp->r_deleg_cred = NULL; 1333 rp->r_deleg_type = OPEN_DELEGATE_NONE; 1334 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 1335 rp->r_deleg_needs_recall = FALSE; 1336 rp->r_deleg_return_pending = FALSE; 1337 1338 /* 1339 * Remove the rnode from the server's list and 1340 * update the ref counts. 1341 */ 1342 list_remove(&np->s_deleg_list, rp); 1343 mutex_exit(&rp->r_statev4_lock); 1344 nfs4_dec_state_ref_count_nolock(np, mi); 1345 mutex_exit(&np->s_lock); 1346 /* removed list node removes a reference */ 1347 nfs4_server_rele(np); 1348 if (need_rele) 1349 nfs4_server_rele(np); 1350 if (ncg != NULL) 1351 ncg->nfs4_callback_stats.delegations.value.ui64--; 1352 } 1353 1354 void 1355 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np) 1356 { 1357 struct nfs4_callback_globals *ncg; 1358 1359 if (np != NULL) { 1360 ncg = np->zone_globals; 1361 } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) { 1362 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1363 ASSERT(ncg != NULL); 1364 } else { 1365 /* 1366 * Request coming from the wrong zone. 1367 */ 1368 ASSERT(getzoneid() == GLOBAL_ZONEID); 1369 ncg = NULL; 1370 } 1371 1372 nfs4delegreturn_cleanup_impl(rp, np, ncg); 1373 } 1374 1375 static void 1376 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 1377 cred_t *cr, vnode_t *vp) 1378 { 1379 if (error != ETIMEDOUT && error != EINTR && 1380 !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 1381 lost_rqstp->lr_op = 0; 1382 return; 1383 } 1384 1385 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 1386 "nfs4close_save_lost_rqst: error %d", error)); 1387 1388 lost_rqstp->lr_op = OP_DELEGRETURN; 1389 /* 1390 * The vp is held and rele'd via the recovery code. 1391 * See nfs4_save_lost_rqst. 1392 */ 1393 lost_rqstp->lr_vp = vp; 1394 lost_rqstp->lr_dvp = NULL; 1395 lost_rqstp->lr_oop = NULL; 1396 lost_rqstp->lr_osp = NULL; 1397 lost_rqstp->lr_lop = NULL; 1398 lost_rqstp->lr_cr = cr; 1399 lost_rqstp->lr_flk = NULL; 1400 lost_rqstp->lr_putfirst = FALSE; 1401 } 1402 1403 static void 1404 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep) 1405 { 1406 COMPOUND4args_clnt args; 1407 COMPOUND4res_clnt res; 1408 nfs_argop4 argops[3]; 1409 nfs4_ga_res_t *garp = NULL; 1410 hrtime_t t; 1411 int numops; 1412 int doqueue = 1; 1413 1414 args.ctag = TAG_DELEGRETURN; 1415 1416 numops = 3; /* PUTFH, GETATTR, DELEGRETURN */ 1417 1418 args.array = argops; 1419 args.array_len = numops; 1420 1421 argops[0].argop = OP_CPUTFH; 1422 argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 1423 1424 argops[1].argop = OP_GETATTR; 1425 argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 1426 argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp)); 1427 1428 argops[2].argop = OP_DELEGRETURN; 1429 argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid = 1430 rp->r_deleg_stateid; 1431 1432 t = gethrtime(); 1433 rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep); 1434 1435 if (ep->error) 1436 return; 1437 1438 if (res.status == NFS4_OK) { 1439 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 1440 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL); 1441 1442 } 1443 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1444 } 1445 1446 int 1447 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr, 1448 struct nfs4_callback_globals *ncg) 1449 { 1450 vnode_t *vp = RTOV4(rp); 1451 mntinfo4_t *mi = VTOMI4(vp); 1452 nfs4_lost_rqst_t lost_rqst; 1453 nfs4_recov_state_t recov_state; 1454 bool_t needrecov = FALSE, recovonly, done = FALSE; 1455 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1456 1457 ncg->nfs4_callback_stats.delegreturn.value.ui64++; 1458 1459 while (!done) { 1460 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN, 1461 &recov_state, &recovonly); 1462 1463 if (e.error) { 1464 if (flags & NFS4_DR_FORCE) { 1465 (void) nfs_rw_enter_sig(&mi->mi_recovlock, 1466 RW_READER, 0); 1467 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1468 nfs_rw_exit(&mi->mi_recovlock); 1469 } 1470 break; 1471 } 1472 1473 /* 1474 * Check to see if the delegation has already been 1475 * returned by the recovery thread. The state of 1476 * the delegation cannot change at this point due 1477 * to start_fop and the r_deleg_recall_lock. 1478 */ 1479 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1480 e.error = 0; 1481 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1482 break; 1483 } 1484 1485 if (recovonly) { 1486 /* 1487 * Delegation will be returned via the 1488 * recovery framework. Build a lost request 1489 * structure, start recovery and get out. 1490 */ 1491 nfs4_error_init(&e, EINTR); 1492 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst, 1493 cr, vp); 1494 (void) nfs4_start_recovery(&e, mi, vp, 1495 NULL, &rp->r_deleg_stateid, 1496 lost_rqst.lr_op == OP_DELEGRETURN ? 1497 &lost_rqst : NULL, OP_DELEGRETURN, NULL); 1498 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1499 break; 1500 } 1501 1502 nfs4delegreturn_otw(rp, cr, &e); 1503 1504 /* 1505 * Ignore some errors on delegreturn; no point in marking 1506 * the file dead on a state destroying operation. 1507 */ 1508 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) || 1509 e.stat == NFS4ERR_BADHANDLE || 1510 e.stat == NFS4ERR_STALE)) 1511 needrecov = FALSE; 1512 else 1513 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp); 1514 1515 if (needrecov) { 1516 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst, 1517 cr, vp); 1518 (void) nfs4_start_recovery(&e, mi, vp, 1519 NULL, &rp->r_deleg_stateid, 1520 lost_rqst.lr_op == OP_DELEGRETURN ? 1521 &lost_rqst : NULL, OP_DELEGRETURN, NULL); 1522 } else { 1523 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1524 done = TRUE; 1525 } 1526 1527 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1528 } 1529 return (e.error); 1530 } 1531 1532 /* 1533 * nfs4_resend_delegreturn - used to drive the delegreturn 1534 * operation via the recovery thread. 1535 */ 1536 void 1537 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep, 1538 nfs4_server_t *np) 1539 { 1540 rnode4_t *rp = VTOR4(lorp->lr_vp); 1541 1542 /* If the file failed recovery, just quit. */ 1543 mutex_enter(&rp->r_statelock); 1544 if (rp->r_flags & R4RECOVERR) { 1545 ep->error = EIO; 1546 } 1547 mutex_exit(&rp->r_statelock); 1548 1549 if (!ep->error) 1550 nfs4delegreturn_otw(rp, lorp->lr_cr, ep); 1551 1552 /* 1553 * If recovery is now needed, then return the error 1554 * and status and let the recovery thread handle it, 1555 * including re-driving another delegreturn. Otherwise, 1556 * just give up and clean up the delegation. 1557 */ 1558 if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp)) 1559 return; 1560 1561 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) 1562 nfs4delegreturn_cleanup(rp, np); 1563 1564 nfs4_error_zinit(ep); 1565 } 1566 1567 /* 1568 * nfs4delegreturn - general function to return a delegation. 1569 * 1570 * NFS4_DR_FORCE - return the delegation even if start_op fails 1571 * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE 1572 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn 1573 * NFS4_DR_DID_OP - calling function already did nfs4_start_op 1574 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL 1575 * NFS4_DR_REOPEN - do file reopens, if applicable 1576 */ 1577 static int 1578 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg) 1579 { 1580 int error = 0; 1581 cred_t *cr = NULL; 1582 vnode_t *vp; 1583 bool_t needrecov = FALSE; 1584 bool_t rw_entered = FALSE; 1585 bool_t do_reopen; 1586 1587 vp = RTOV4(rp); 1588 1589 /* 1590 * If NFS4_DR_DISCARD is set by itself, take a short-cut and 1591 * discard without doing an otw DELEGRETURN. This may only be used 1592 * by the recovery thread because it bypasses the synchronization 1593 * with r_deleg_recall_lock and mi->mi_recovlock. 1594 */ 1595 if (flags == NFS4_DR_DISCARD) { 1596 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1597 return (0); 1598 } 1599 1600 if (flags & NFS4_DR_DID_OP) { 1601 /* 1602 * Caller had already done start_op, which means the 1603 * r_deleg_recall_lock is already held in READ mode 1604 * so we cannot take it in write mode. Return the 1605 * delegation asynchronously. 1606 * 1607 * Remove the NFS4_DR_DID_OP flag so we don't 1608 * get stuck looping through here. 1609 */ 1610 VN_HOLD(vp); 1611 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE); 1612 return (0); 1613 } 1614 1615 /* 1616 * Verify we still have a delegation and crhold the credential. 1617 */ 1618 mutex_enter(&rp->r_statev4_lock); 1619 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1620 mutex_exit(&rp->r_statev4_lock); 1621 goto out; 1622 } 1623 cr = rp->r_deleg_cred; 1624 ASSERT(cr != NULL); 1625 crhold(cr); 1626 mutex_exit(&rp->r_statev4_lock); 1627 1628 /* 1629 * Push the modified data back to the server synchronously 1630 * before doing DELEGRETURN. 1631 */ 1632 if (flags & NFS4_DR_PUSH) 1633 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL); 1634 1635 /* 1636 * Take r_deleg_recall_lock in WRITE mode, this will prevent 1637 * nfs4_is_otw_open_necessary from trying to use the delegation 1638 * while the DELEGRETURN is in progress. 1639 */ 1640 (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE); 1641 1642 rw_entered = TRUE; 1643 1644 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) 1645 goto out; 1646 1647 if (flags & NFS4_DR_REOPEN) { 1648 /* 1649 * If R4RECOVERRP is already set, then skip re-opening 1650 * the delegation open streams and go straight to doing 1651 * delegreturn. (XXX if the file has failed recovery, then the 1652 * delegreturn attempt is likely to be futile.) 1653 */ 1654 mutex_enter(&rp->r_statelock); 1655 do_reopen = !(rp->r_flags & R4RECOVERRP); 1656 mutex_exit(&rp->r_statelock); 1657 1658 if (do_reopen) { 1659 error = deleg_reopen(vp, &needrecov, ncg, flags); 1660 if (error != 0) { 1661 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL)) 1662 == 0) 1663 goto out; 1664 } else if (needrecov) { 1665 if ((flags & NFS4_DR_FORCE) == 0) 1666 goto out; 1667 } 1668 } 1669 } 1670 1671 if (flags & NFS4_DR_DISCARD) { 1672 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 1673 1674 mutex_enter(&rp->r_statelock); 1675 /* 1676 * deleg_return_pending is cleared inside of delegation_accept 1677 * when a delegation is accepted. if this flag has been 1678 * cleared, then a new delegation has overwritten the one we 1679 * were about to throw away. 1680 */ 1681 if (!rp->r_deleg_return_pending) { 1682 mutex_exit(&rp->r_statelock); 1683 goto out; 1684 } 1685 mutex_exit(&rp->r_statelock); 1686 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 1687 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1688 nfs_rw_exit(&mi->mi_recovlock); 1689 } else { 1690 error = nfs4_do_delegreturn(rp, flags, cr, ncg); 1691 } 1692 1693 out: 1694 if (cr) 1695 crfree(cr); 1696 if (rw_entered) 1697 nfs_rw_exit(&rp->r_deleg_recall_lock); 1698 return (error); 1699 } 1700 1701 int 1702 nfs4delegreturn(rnode4_t *rp, int flags) 1703 { 1704 struct nfs4_callback_globals *ncg; 1705 1706 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1707 ASSERT(ncg != NULL); 1708 1709 return (nfs4delegreturn_impl(rp, flags, ncg)); 1710 } 1711 1712 void 1713 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc) 1714 { 1715 struct cb_recall_pass *pp; 1716 1717 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP); 1718 pp->rp = rp; 1719 pp->flags = flags; 1720 pp->truncate = trunc; 1721 1722 /* 1723 * Fire up a thread to do the actual delegreturn 1724 * Caller must guarantee that the rnode doesn't 1725 * vanish (by calling VN_HOLD). 1726 */ 1727 1728 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0, 1729 minclsyspri); 1730 } 1731 1732 static void 1733 delegreturn_all_thread(rpcprog_t *pp) 1734 { 1735 nfs4_server_t *np; 1736 bool_t found = FALSE; 1737 rpcprog_t prog; 1738 rnode4_t *rp; 1739 vnode_t *vp; 1740 zoneid_t zoneid = getzoneid(); 1741 struct nfs4_callback_globals *ncg; 1742 1743 NFS4_DEBUG(nfs4_drat_debug, 1744 (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp)); 1745 1746 prog = *pp; 1747 kmem_free(pp, sizeof (*pp)); 1748 pp = NULL; 1749 1750 mutex_enter(&nfs4_server_lst_lock); 1751 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 1752 if (np->zoneid == zoneid && np->s_program == prog) { 1753 mutex_enter(&np->s_lock); 1754 found = TRUE; 1755 break; 1756 } 1757 } 1758 mutex_exit(&nfs4_server_lst_lock); 1759 1760 /* 1761 * It's possible that the nfs4_server which was using this 1762 * program number has vanished since this thread is async. 1763 * If so, just return. Your work here is finished, my friend. 1764 */ 1765 if (!found) 1766 goto out; 1767 1768 ncg = np->zone_globals; 1769 while ((rp = list_head(&np->s_deleg_list)) != NULL) { 1770 vp = RTOV4(rp); 1771 VN_HOLD(vp); 1772 mutex_exit(&np->s_lock); 1773 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN, 1774 ncg); 1775 VN_RELE(vp); 1776 1777 /* retake the s_lock for next trip through the loop */ 1778 mutex_enter(&np->s_lock); 1779 } 1780 mutex_exit(&np->s_lock); 1781 out: 1782 NFS4_DEBUG(nfs4_drat_debug, 1783 (CE_NOTE, "delereturn_all_thread: complete\n")); 1784 zthread_exit(); 1785 } 1786 1787 void 1788 nfs4_delegreturn_all(nfs4_server_t *sp) 1789 { 1790 rpcprog_t pro, *pp; 1791 1792 mutex_enter(&sp->s_lock); 1793 1794 /* Check to see if the delegation list is empty */ 1795 1796 if (list_head(&sp->s_deleg_list) == NULL) { 1797 mutex_exit(&sp->s_lock); 1798 return; 1799 } 1800 /* 1801 * Grab the program number; the async thread will use this 1802 * to find the nfs4_server. 1803 */ 1804 pro = sp->s_program; 1805 mutex_exit(&sp->s_lock); 1806 pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP); 1807 *pp = pro; 1808 (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0, 1809 minclsyspri); 1810 } 1811 1812 1813 /* 1814 * Discard any delegations 1815 * 1816 * Iterate over the servers s_deleg_list and 1817 * for matching mount-point rnodes discard 1818 * the delegation. 1819 */ 1820 void 1821 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp) 1822 { 1823 rnode4_t *rp, *next; 1824 mntinfo4_t *r_mi; 1825 struct nfs4_callback_globals *ncg; 1826 1827 ASSERT(mutex_owned(&sp->s_lock)); 1828 ncg = sp->zone_globals; 1829 1830 for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) { 1831 r_mi = VTOMI4(RTOV4(rp)); 1832 next = list_next(&sp->s_deleg_list, rp); 1833 1834 if (r_mi != mi) { 1835 /* 1836 * Skip if this rnode is in not on the 1837 * same mount-point 1838 */ 1839 continue; 1840 } 1841 1842 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ); 1843 1844 #ifdef DEBUG 1845 if (nfs4_client_recov_debug) { 1846 zprintf(getzoneid(), 1847 "nfs4_deleg_discard: matched rnode %p " 1848 "-- discarding delegation\n", (void *)rp); 1849 } 1850 #endif 1851 mutex_enter(&rp->r_statev4_lock); 1852 /* 1853 * Free the cred originally held when the delegation 1854 * was granted. Also need to decrement the refcnt 1855 * on this server for each delegation we discard 1856 */ 1857 if (rp->r_deleg_cred) 1858 crfree(rp->r_deleg_cred); 1859 rp->r_deleg_cred = NULL; 1860 rp->r_deleg_type = OPEN_DELEGATE_NONE; 1861 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 1862 rp->r_deleg_needs_recall = FALSE; 1863 ASSERT(sp->s_refcnt > 1); 1864 sp->s_refcnt--; 1865 list_remove(&sp->s_deleg_list, rp); 1866 mutex_exit(&rp->r_statev4_lock); 1867 nfs4_dec_state_ref_count_nolock(sp, mi); 1868 ncg->nfs4_callback_stats.delegations.value.ui64--; 1869 } 1870 } 1871 1872 /* 1873 * Reopen any open streams that were covered by the given file's 1874 * delegation. 1875 * Returns zero or an errno value. If there was no error, *recovp 1876 * indicates whether recovery was initiated. 1877 */ 1878 1879 static int 1880 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg, 1881 int flags) 1882 { 1883 nfs4_open_stream_t *osp; 1884 nfs4_recov_state_t recov_state; 1885 bool_t needrecov = FALSE; 1886 mntinfo4_t *mi; 1887 rnode4_t *rp; 1888 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1889 int claimnull; 1890 1891 mi = VTOMI4(vp); 1892 rp = VTOR4(vp); 1893 1894 recov_state.rs_flags = 0; 1895 recov_state.rs_num_retry_despite_err = 0; 1896 1897 retry: 1898 if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) { 1899 return (e.error); 1900 } 1901 1902 /* 1903 * if we mean to discard the delegation, it must be BAD, so don't 1904 * use it when doing the reopen or it will fail too. 1905 */ 1906 claimnull = (flags & NFS4_DR_DISCARD); 1907 /* 1908 * Loop through the open streams for this rnode to find 1909 * all of the ones created using the delegation state ID. 1910 * Each of these needs to be re-opened. 1911 */ 1912 1913 while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) { 1914 1915 if (claimnull) { 1916 nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE); 1917 } else { 1918 ncg->nfs4_callback_stats.claim_cur.value.ui64++; 1919 1920 nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE, 1921 FALSE); 1922 if (e.error == 0 && e.stat == NFS4_OK) 1923 ncg->nfs4_callback_stats. 1924 claim_cur_ok.value.ui64++; 1925 } 1926 1927 if (e.error == EAGAIN) { 1928 nfs4_end_op(mi, vp, NULL, &recov_state, TRUE); 1929 goto retry; 1930 } 1931 1932 /* 1933 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then 1934 * recovery has already been started inside of nfs4_reopen. 1935 */ 1936 if (e.error == EINTR || e.error == ETIMEDOUT || 1937 NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) { 1938 open_stream_rele(osp, rp); 1939 break; 1940 } 1941 1942 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp); 1943 1944 if (e.error != 0 && !needrecov) { 1945 /* 1946 * Recovery is not possible, but don't give up yet; 1947 * we'd still like to do delegreturn after 1948 * reopening as many streams as possible. 1949 * Continue processing the open streams. 1950 */ 1951 1952 ncg->nfs4_callback_stats.recall_failed.value.ui64++; 1953 1954 } else if (needrecov) { 1955 /* 1956 * Start recovery and bail out. The recovery 1957 * thread will take it from here. 1958 */ 1959 (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL, 1960 NULL, OP_OPEN, NULL); 1961 open_stream_rele(osp, rp); 1962 *recovp = TRUE; 1963 break; 1964 } 1965 1966 open_stream_rele(osp, rp); 1967 } 1968 1969 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1970 1971 return (e.error); 1972 } 1973 1974 /* 1975 * get_next_deleg_stream - returns the next open stream which 1976 * represents a delegation for this rnode. In order to assure 1977 * forward progress, the caller must guarantee that each open 1978 * stream returned is changed so that a future call won't return 1979 * it again. 1980 * 1981 * There are several ways for the open stream to change. If the open 1982 * stream is !os_delegation, then we aren't interested in it. Also, if 1983 * either os_failed_reopen or !os_valid, then don't return the osp. 1984 * 1985 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return 1986 * the osp if it is an os_delegation open stream. Also, if the rnode still 1987 * has r_deleg_return_pending, then return the os_delegation osp. Lastly, 1988 * if the rnode's r_deleg_stateid is different from the osp's open_stateid, 1989 * then return the osp. 1990 * 1991 * We have already taken the 'r_deleg_recall_lock' as WRITER, which 1992 * prevents new OPENs from going OTW (as start_fop takes this 1993 * lock in READ mode); thus, no new open streams can be created 1994 * (which inherently means no new delegation open streams are 1995 * being created). 1996 */ 1997 1998 static nfs4_open_stream_t * 1999 get_next_deleg_stream(rnode4_t *rp, int claimnull) 2000 { 2001 nfs4_open_stream_t *osp; 2002 2003 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER)); 2004 2005 /* 2006 * Search through the list of open streams looking for 2007 * one that was created while holding the delegation. 2008 */ 2009 mutex_enter(&rp->r_os_lock); 2010 for (osp = list_head(&rp->r_open_streams); osp != NULL; 2011 osp = list_next(&rp->r_open_streams, osp)) { 2012 mutex_enter(&osp->os_sync_lock); 2013 if (!osp->os_delegation || osp->os_failed_reopen || 2014 !osp->os_valid) { 2015 mutex_exit(&osp->os_sync_lock); 2016 continue; 2017 } 2018 if (!claimnull || rp->r_deleg_return_pending || 2019 !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) { 2020 osp->os_ref_count++; 2021 mutex_exit(&osp->os_sync_lock); 2022 mutex_exit(&rp->r_os_lock); 2023 return (osp); 2024 } 2025 mutex_exit(&osp->os_sync_lock); 2026 } 2027 mutex_exit(&rp->r_os_lock); 2028 2029 return (NULL); 2030 } 2031 2032 static void 2033 nfs4delegreturn_thread(struct cb_recall_pass *args) 2034 { 2035 rnode4_t *rp; 2036 vnode_t *vp; 2037 cred_t *cr; 2038 int dtype, error, flags; 2039 bool_t rdirty, rip; 2040 kmutex_t cpr_lock; 2041 callb_cpr_t cpr_info; 2042 struct nfs4_callback_globals *ncg; 2043 2044 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2045 ASSERT(ncg != NULL); 2046 2047 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 2048 2049 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 2050 "nfsv4delegRtn"); 2051 2052 rp = args->rp; 2053 vp = RTOV4(rp); 2054 2055 mutex_enter(&rp->r_statev4_lock); 2056 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2057 mutex_exit(&rp->r_statev4_lock); 2058 goto out; 2059 } 2060 mutex_exit(&rp->r_statev4_lock); 2061 2062 /* 2063 * Take the read-write lock in read mode to prevent other 2064 * threads from modifying the data during the recall. This 2065 * doesn't affect mmappers. 2066 */ 2067 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE); 2068 2069 /* Proceed with delegreturn */ 2070 2071 mutex_enter(&rp->r_statev4_lock); 2072 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2073 mutex_exit(&rp->r_statev4_lock); 2074 nfs_rw_exit(&rp->r_rwlock); 2075 goto out; 2076 } 2077 dtype = rp->r_deleg_type; 2078 cr = rp->r_deleg_cred; 2079 ASSERT(cr != NULL); 2080 crhold(cr); 2081 mutex_exit(&rp->r_statev4_lock); 2082 2083 flags = args->flags; 2084 2085 /* 2086 * If the file is being truncated at the server, then throw 2087 * away all of the pages, it doesn't matter what flavor of 2088 * delegation we have. 2089 */ 2090 2091 if (args->truncate) { 2092 ncg->nfs4_callback_stats.recall_trunc.value.ui64++; 2093 nfs4_invalidate_pages(vp, 0, cr); 2094 } else if (dtype == OPEN_DELEGATE_WRITE) { 2095 2096 mutex_enter(&rp->r_statelock); 2097 rdirty = rp->r_flags & R4DIRTY; 2098 mutex_exit(&rp->r_statelock); 2099 2100 if (rdirty) { 2101 error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL); 2102 2103 if (error) 2104 CB_WARN1("nfs4delegreturn_thread:" 2105 " VOP_PUTPAGE: %d\n", error); 2106 } 2107 /* turn off NFS4_DR_PUSH because we just did that above. */ 2108 flags &= ~NFS4_DR_PUSH; 2109 } 2110 2111 mutex_enter(&rp->r_statelock); 2112 rip = rp->r_flags & R4RECOVERRP; 2113 mutex_exit(&rp->r_statelock); 2114 2115 /* If a failed recovery is indicated, discard the pages */ 2116 2117 if (rip) { 2118 2119 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL); 2120 2121 if (error) 2122 CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n", 2123 error); 2124 } 2125 2126 /* 2127 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass 2128 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again. 2129 */ 2130 flags &= ~NFS4_DR_DID_OP; 2131 2132 (void) nfs4delegreturn_impl(rp, flags, ncg); 2133 2134 nfs_rw_exit(&rp->r_rwlock); 2135 crfree(cr); 2136 out: 2137 kmem_free(args, sizeof (struct cb_recall_pass)); 2138 VN_RELE(vp); 2139 mutex_enter(&cpr_lock); 2140 CALLB_CPR_EXIT(&cpr_info); 2141 mutex_destroy(&cpr_lock); 2142 zthread_exit(); 2143 } 2144 2145 /* 2146 * This function has one assumption that the caller of this function is 2147 * either doing recovery (therefore cannot call nfs4_start_op) or has 2148 * already called nfs4_start_op(). 2149 */ 2150 void 2151 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res, 2152 nfs4_ga_res_t *garp, cred_t *cr) 2153 { 2154 open_read_delegation4 *orp; 2155 open_write_delegation4 *owp; 2156 nfs4_server_t *np; 2157 bool_t already = FALSE; 2158 bool_t recall = FALSE; 2159 bool_t valid_garp = TRUE; 2160 bool_t delegation_granted = FALSE; 2161 bool_t dr_needed = FALSE; 2162 bool_t recov; 2163 int dr_flags = 0; 2164 long mapcnt; 2165 uint_t rflag; 2166 mntinfo4_t *mi; 2167 struct nfs4_callback_globals *ncg; 2168 open_delegation_type4 odt; 2169 2170 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2171 ASSERT(ncg != NULL); 2172 2173 mi = VTOMI4(RTOV4(rp)); 2174 2175 /* 2176 * Accept a delegation granted to the client via an OPEN. 2177 * Set the delegation fields in the rnode and insert the 2178 * rnode onto the list anchored in the nfs4_server_t. The 2179 * proper locking order requires the nfs4_server_t first, 2180 * even though it may not be needed in all cases. 2181 * 2182 * NB: find_nfs4_server returns with s_lock held. 2183 */ 2184 2185 if ((np = find_nfs4_server(mi)) == NULL) 2186 return; 2187 2188 /* grab the statelock too, for examining r_mapcnt */ 2189 mutex_enter(&rp->r_statelock); 2190 mutex_enter(&rp->r_statev4_lock); 2191 2192 if (rp->r_deleg_type == OPEN_DELEGATE_READ || 2193 rp->r_deleg_type == OPEN_DELEGATE_WRITE) 2194 already = TRUE; 2195 2196 odt = res->delegation.delegation_type; 2197 2198 if (odt == OPEN_DELEGATE_READ) { 2199 2200 rp->r_deleg_type = res->delegation.delegation_type; 2201 orp = &res->delegation.open_delegation4_u.read; 2202 rp->r_deleg_stateid = orp->stateid; 2203 rp->r_deleg_perms = orp->permissions; 2204 if (claim == CLAIM_PREVIOUS) 2205 if ((recall = orp->recall) != 0) 2206 dr_needed = TRUE; 2207 2208 delegation_granted = TRUE; 2209 2210 ncg->nfs4_callback_stats.delegations.value.ui64++; 2211 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++; 2212 2213 } else if (odt == OPEN_DELEGATE_WRITE) { 2214 2215 rp->r_deleg_type = res->delegation.delegation_type; 2216 owp = &res->delegation.open_delegation4_u.write; 2217 rp->r_deleg_stateid = owp->stateid; 2218 rp->r_deleg_perms = owp->permissions; 2219 rp->r_deleg_limit = owp->space_limit; 2220 if (claim == CLAIM_PREVIOUS) 2221 if ((recall = owp->recall) != 0) 2222 dr_needed = TRUE; 2223 2224 delegation_granted = TRUE; 2225 2226 if (garp == NULL || !garp->n4g_change_valid) { 2227 valid_garp = FALSE; 2228 rp->r_deleg_change = 0; 2229 rp->r_deleg_change_grant = 0; 2230 } else { 2231 rp->r_deleg_change = garp->n4g_change; 2232 rp->r_deleg_change_grant = garp->n4g_change; 2233 } 2234 mapcnt = rp->r_mapcnt; 2235 rflag = rp->r_flags; 2236 2237 /* 2238 * Update the delegation change attribute if 2239 * there are mappers for the file is dirty. This 2240 * might be the case during recovery after server 2241 * reboot. 2242 */ 2243 if (mapcnt > 0 || rflag & R4DIRTY) 2244 rp->r_deleg_change++; 2245 2246 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, 2247 "nfs4_delegation_accept: r_deleg_change: 0x%x\n", 2248 (int)(rp->r_deleg_change >> 32))); 2249 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, 2250 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n", 2251 (int)(rp->r_deleg_change_grant >> 32))); 2252 2253 2254 ncg->nfs4_callback_stats.delegations.value.ui64++; 2255 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++; 2256 } else if (already) { 2257 /* 2258 * No delegation granted. If the rnode currently has 2259 * has one, then consider it tainted and return it. 2260 */ 2261 dr_needed = TRUE; 2262 } 2263 2264 if (delegation_granted) { 2265 /* Add the rnode to the list. */ 2266 if (!already) { 2267 crhold(cr); 2268 rp->r_deleg_cred = cr; 2269 2270 ASSERT(mutex_owned(&np->s_lock)); 2271 list_insert_head(&np->s_deleg_list, rp); 2272 /* added list node gets a reference */ 2273 np->s_refcnt++; 2274 nfs4_inc_state_ref_count_nolock(np, mi); 2275 } 2276 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 2277 } 2278 2279 /* 2280 * We've now safely accepted the delegation, if any. Drop the 2281 * locks and figure out what post-processing is needed. We'd 2282 * like to retain r_statev4_lock, but nfs4_server_rele takes 2283 * s_lock which would be a lock ordering violation. 2284 */ 2285 mutex_exit(&rp->r_statev4_lock); 2286 mutex_exit(&rp->r_statelock); 2287 mutex_exit(&np->s_lock); 2288 nfs4_server_rele(np); 2289 2290 /* 2291 * Check to see if we are in recovery. Remember that 2292 * this function is protected by start_op, so a recovery 2293 * cannot begin until we are out of here. 2294 */ 2295 mutex_enter(&mi->mi_lock); 2296 recov = mi->mi_recovflags & MI4_RECOV_ACTIV; 2297 mutex_exit(&mi->mi_lock); 2298 2299 mutex_enter(&rp->r_statev4_lock); 2300 2301 if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp) 2302 dr_needed = TRUE; 2303 2304 if (dr_needed && rp->r_deleg_return_pending == FALSE) { 2305 if (recov) { 2306 /* 2307 * We cannot call delegreturn from inside 2308 * of recovery or VOP_PUTPAGE will hang 2309 * due to nfs4_start_fop call in 2310 * nfs4write. Use dlistadd to add the 2311 * rnode to the list of rnodes needing 2312 * cleaning. We do not need to do reopen 2313 * here because recov_openfiles will do it. 2314 * In the non-recall case, just discard the 2315 * delegation as it is no longer valid. 2316 */ 2317 if (recall) 2318 dr_flags = NFS4_DR_PUSH; 2319 else 2320 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD; 2321 2322 nfs4_dlistadd(rp, ncg, dr_flags); 2323 dr_flags = 0; 2324 } else { 2325 /* 2326 * Push the modified data back to the server, 2327 * reopen any delegation open streams, and return 2328 * the delegation. Drop the statev4_lock first! 2329 */ 2330 dr_flags = NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN; 2331 } 2332 } 2333 mutex_exit(&rp->r_statev4_lock); 2334 if (dr_flags) 2335 (void) nfs4delegreturn_impl(rp, dr_flags, ncg); 2336 } 2337 2338 /* 2339 * nfs4delegabandon - Abandon the delegation on an rnode4. This code 2340 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID 2341 * or BADSEQID and the recovery code is unable to recover. Push any 2342 * dirty data back to the server and return the delegation (if any). 2343 */ 2344 2345 void 2346 nfs4delegabandon(rnode4_t *rp) 2347 { 2348 vnode_t *vp; 2349 struct cb_recall_pass *pp; 2350 open_delegation_type4 dt; 2351 2352 mutex_enter(&rp->r_statev4_lock); 2353 dt = rp->r_deleg_type; 2354 mutex_exit(&rp->r_statev4_lock); 2355 2356 if (dt == OPEN_DELEGATE_NONE) 2357 return; 2358 2359 vp = RTOV4(rp); 2360 VN_HOLD(vp); 2361 2362 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP); 2363 pp->rp = rp; 2364 /* 2365 * Recovery on the file has failed and we want to return 2366 * the delegation. We don't want to reopen files and 2367 * nfs4delegreturn_thread() figures out what to do about 2368 * the data. The only thing to do is attempt to return 2369 * the delegation. 2370 */ 2371 pp->flags = 0; 2372 pp->truncate = FALSE; 2373 2374 /* 2375 * Fire up a thread to do the delegreturn; this is 2376 * necessary because we could be inside a GETPAGE or 2377 * PUTPAGE and we cannot do another one. 2378 */ 2379 2380 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0, 2381 minclsyspri); 2382 } 2383 2384 static int 2385 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp, 2386 int flg) 2387 { 2388 rnode4_t *rp; 2389 int error = 0; 2390 2391 #ifdef lint 2392 op = op; 2393 #endif 2394 2395 if (vp && vp->v_type == VREG) { 2396 rp = VTOR4(vp); 2397 2398 /* 2399 * Take r_deleg_recall_lock in read mode to synchronize 2400 * with delegreturn. 2401 */ 2402 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock, 2403 RW_READER, INTR4(vp)); 2404 2405 if (error == 0) 2406 rsp->rs_flags |= flg; 2407 2408 } 2409 return (error); 2410 } 2411 2412 void 2413 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp) 2414 { 2415 NFS4_DEBUG(nfs4_recall_debug, 2416 (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n", 2417 (void *)vp1, (void *)vp2)); 2418 2419 if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2) 2420 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock); 2421 if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1) 2422 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock); 2423 } 2424 2425 int 2426 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 2427 nfs4_recov_state_t *rsp) 2428 { 2429 int error; 2430 2431 NFS4_DEBUG(nfs4_recall_debug, 2432 (CE_NOTE, "wait_for_recall: 0x%p, 0x%p\n", 2433 (void *)vp1, (void *) vp2)); 2434 2435 rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2); 2436 2437 if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0) 2438 return (error); 2439 2440 if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2)) 2441 != 0) { 2442 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) { 2443 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock); 2444 rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1; 2445 } 2446 2447 return (error); 2448 } 2449 2450 return (0); 2451 } 2452 2453 /* 2454 * nfs4_dlistadd - Add this rnode to a list of rnodes to be 2455 * DELEGRETURN'd at the end of recovery. 2456 */ 2457 2458 static void 2459 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags) 2460 { 2461 struct nfs4_dnode *dp; 2462 2463 ASSERT(mutex_owned(&rp->r_statev4_lock)); 2464 /* 2465 * Mark the delegation as having a return pending. 2466 * This will prevent the use of the delegation stateID 2467 * by read, write, setattr and open. 2468 */ 2469 rp->r_deleg_return_pending = TRUE; 2470 dp = kmem_alloc(sizeof (*dp), KM_SLEEP); 2471 VN_HOLD(RTOV4(rp)); 2472 dp->rnodep = rp; 2473 dp->flags = flags; 2474 mutex_enter(&ncg->nfs4_dlist_lock); 2475 list_insert_head(&ncg->nfs4_dlist, dp); 2476 #ifdef DEBUG 2477 ncg->nfs4_dlistadd_c++; 2478 #endif 2479 mutex_exit(&ncg->nfs4_dlist_lock); 2480 } 2481 2482 /* 2483 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list. 2484 * of files awaiting cleaning. If the override_flags are non-zero 2485 * then use them rather than the flags that were set when the rnode 2486 * was added to the dlist. 2487 */ 2488 static void 2489 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags) 2490 { 2491 rnode4_t *rp; 2492 struct nfs4_dnode *dp; 2493 int flags; 2494 2495 ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD); 2496 2497 mutex_enter(&ncg->nfs4_dlist_lock); 2498 while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) { 2499 #ifdef DEBUG 2500 ncg->nfs4_dlistclean_c++; 2501 #endif 2502 list_remove(&ncg->nfs4_dlist, dp); 2503 mutex_exit(&ncg->nfs4_dlist_lock); 2504 rp = dp->rnodep; 2505 flags = (override_flags != 0) ? override_flags : dp->flags; 2506 kmem_free(dp, sizeof (*dp)); 2507 (void) nfs4delegreturn_impl(rp, flags, ncg); 2508 VN_RELE(RTOV4(rp)); 2509 mutex_enter(&ncg->nfs4_dlist_lock); 2510 } 2511 mutex_exit(&ncg->nfs4_dlist_lock); 2512 } 2513 2514 void 2515 nfs4_dlistclean(void) 2516 { 2517 struct nfs4_callback_globals *ncg; 2518 2519 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2520 ASSERT(ncg != NULL); 2521 2522 nfs4_dlistclean_impl(ncg, 0); 2523 } 2524