1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/cred.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/pathname.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/kstat.h> 39 #include <sys/mkdev.h> 40 #include <sys/mount.h> 41 #include <sys/statvfs.h> 42 #include <sys/errno.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/utsname.h> 46 #include <sys/bootconf.h> 47 #include <sys/modctl.h> 48 #include <sys/acl.h> 49 #include <sys/flock.h> 50 #include <sys/kstr.h> 51 #include <sys/stropts.h> 52 #include <sys/strsubr.h> 53 #include <sys/atomic.h> 54 #include <sys/disp.h> 55 #include <sys/policy.h> 56 #include <sys/list.h> 57 #include <sys/zone.h> 58 59 #include <rpc/types.h> 60 #include <rpc/auth.h> 61 #include <rpc/rpcsec_gss.h> 62 #include <rpc/clnt.h> 63 #include <rpc/xdr.h> 64 65 #include <nfs/nfs.h> 66 #include <nfs/nfs_clnt.h> 67 #include <nfs/mount.h> 68 #include <nfs/nfs_acl.h> 69 70 #include <fs/fs_subr.h> 71 72 #include <nfs/nfs4.h> 73 #include <nfs/rnode4.h> 74 #include <nfs/nfs4_clnt.h> 75 #include <nfs/nfssys.h> 76 77 #ifdef DEBUG 78 /* 79 * These are "special" state IDs and file handles that 80 * match any delegation state ID or file handled. This 81 * is for testing purposes only. 82 */ 83 84 stateid4 nfs4_deleg_any = { 0x7FFFFFF0 }; 85 char nfs4_deleg_fh[] = "\0377\0376\0375\0374"; 86 nfs_fh4 nfs4_deleg_anyfh = { sizeof (nfs4_deleg_fh)-1, nfs4_deleg_fh }; 87 nfsstat4 cb4_getattr_fail = NFS4_OK; 88 nfsstat4 cb4_recall_fail = NFS4_OK; 89 90 int nfs4_callback_debug; 91 int nfs4_recall_debug; 92 int nfs4_drat_debug; 93 94 #endif 95 96 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x)) 97 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x)) 98 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y)) 99 100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy = INACTIVE; 101 102 static zone_key_t nfs4_callback_zone_key; 103 104 /* 105 * NFS4_MAPSIZE is the number of bytes we are willing to consume 106 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK 107 * style delegation. 108 */ 109 110 #define NFS4_MAPSIZE 8192 111 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t) 112 #define NbPW (NBBY*sizeof (uint_t)) 113 114 static int nfs4_num_prognums = 1024; 115 static SVC_CALLOUT_TABLE nfs4_cb_sct; 116 117 struct nfs4_dnode { 118 list_node_t linkage; 119 rnode4_t *rnodep; 120 int flags; /* Flags for nfs4delegreturn_impl() */ 121 }; 122 123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl = { 124 { "delegations", KSTAT_DATA_UINT64 }, 125 { "cb_getattr", KSTAT_DATA_UINT64 }, 126 { "cb_recall", KSTAT_DATA_UINT64 }, 127 { "cb_null", KSTAT_DATA_UINT64 }, 128 { "cb_dispatch", KSTAT_DATA_UINT64 }, 129 { "delegaccept_r", KSTAT_DATA_UINT64 }, 130 { "delegaccept_rw", KSTAT_DATA_UINT64 }, 131 { "delegreturn", KSTAT_DATA_UINT64 }, 132 { "callbacks", KSTAT_DATA_UINT64 }, 133 { "claim_cur", KSTAT_DATA_UINT64 }, 134 { "claim_cur_ok", KSTAT_DATA_UINT64 }, 135 { "recall_trunc", KSTAT_DATA_UINT64 }, 136 { "recall_failed", KSTAT_DATA_UINT64 }, 137 { "return_limit_write", KSTAT_DATA_UINT64 }, 138 { "return_limit_addmap", KSTAT_DATA_UINT64 }, 139 { "deleg_recover", KSTAT_DATA_UINT64 }, 140 { "cb_illegal", KSTAT_DATA_UINT64 } 141 }; 142 143 struct nfs4_cb_port { 144 list_node_t linkage; /* linkage into per-zone port list */ 145 char netid[KNC_STRSIZE]; 146 char uaddr[KNC_STRSIZE]; 147 char protofmly[KNC_STRSIZE]; 148 char proto[KNC_STRSIZE]; 149 }; 150 151 static int cb_getattr_bytes; 152 153 struct cb_recall_pass { 154 rnode4_t *rp; 155 int flags; /* Flags for nfs4delegreturn_impl() */ 156 bool_t truncate; 157 }; 158 159 static nfs4_open_stream_t *get_next_deleg_stream(rnode4_t *, int); 160 static void nfs4delegreturn_thread(struct cb_recall_pass *); 161 static int deleg_reopen(vnode_t *, bool_t *, struct nfs4_callback_globals *, 162 int); 163 static void nfs4_dlistadd(rnode4_t *, struct nfs4_callback_globals *, int); 164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals *, int); 165 static int nfs4delegreturn_impl(rnode4_t *, int, 166 struct nfs4_callback_globals *); 167 static void nfs4delegreturn_cleanup_impl(rnode4_t *, nfs4_server_t *, 168 struct nfs4_callback_globals *); 169 170 static void 171 cb_getattr(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 172 struct compound_state *cs, struct nfs4_callback_globals *ncg) 173 { 174 CB_GETATTR4args *args = &argop->nfs_cb_argop4_u.opcbgetattr; 175 CB_GETATTR4res *resp = &resop->nfs_cb_resop4_u.opcbgetattr; 176 rnode4_t *rp; 177 vnode_t *vp; 178 bool_t found = FALSE; 179 struct nfs4_server *sp; 180 struct fattr4 *fap; 181 rpc_inline_t *fdata; 182 long mapcnt; 183 fattr4_change change; 184 fattr4_size size; 185 uint_t rflag; 186 187 ncg->nfs4_callback_stats.cb_getattr.value.ui64++; 188 189 #ifdef DEBUG 190 /* 191 * error injection hook: set cb_getattr_fail global to 192 * NFS4 pcol error to be returned 193 */ 194 if (cb4_getattr_fail != NFS4_OK) { 195 *cs->statusp = resp->status = cb4_getattr_fail; 196 return; 197 } 198 #endif 199 200 resp->obj_attributes.attrmask = 0; 201 202 mutex_enter(&ncg->nfs4_cb_lock); 203 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 204 mutex_exit(&ncg->nfs4_cb_lock); 205 206 if (nfs4_server_vlock(sp, 0) == FALSE) { 207 208 CB_WARN("cb_getattr: cannot find server\n"); 209 210 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 211 return; 212 } 213 214 /* 215 * In cb_compound, callback_ident was validated against rq_prog, 216 * but we couldn't verify that it was set to the value we provided 217 * at setclientid time (because we didn't have server struct yet). 218 * Now we have the server struct, but don't have callback_ident 219 * handy. So, validate server struct program number against req 220 * RPC's prog number. At this point, we know the RPC prog num 221 * is valid (else we wouldn't be here); however, we don't know 222 * that it was the prog number we supplied to this server at 223 * setclientid time. If the prog numbers aren't equivalent, then 224 * log the problem and fail the request because either cbserv 225 * and/or cbclient are confused. This will probably never happen. 226 */ 227 if (sp->s_program != req->rq_prog) { 228 #ifdef DEBUG 229 zcmn_err(getzoneid(), CE_WARN, 230 "cb_getattr: wrong server program number srv=%d req=%d\n", 231 sp->s_program, req->rq_prog); 232 #else 233 zcmn_err(getzoneid(), CE_WARN, 234 "cb_getattr: wrong server program number\n"); 235 #endif 236 mutex_exit(&sp->s_lock); 237 nfs4_server_rele(sp); 238 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 239 return; 240 } 241 242 /* 243 * Search the delegation list for a matching file handle; 244 * mutex on sp prevents the list from changing. 245 */ 246 247 rp = list_head(&sp->s_deleg_list); 248 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) { 249 nfs4_fhandle_t fhandle; 250 251 sfh4_copyval(rp->r_fh, &fhandle); 252 253 if ((fhandle.fh_len == args->fh.nfs_fh4_len && 254 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val, 255 fhandle.fh_len) == 0)) { 256 257 found = TRUE; 258 break; 259 } 260 #ifdef DEBUG 261 if (nfs4_deleg_anyfh.nfs_fh4_len == args->fh.nfs_fh4_len && 262 bcmp(nfs4_deleg_anyfh.nfs_fh4_val, args->fh.nfs_fh4_val, 263 args->fh.nfs_fh4_len) == 0) { 264 265 found = TRUE; 266 break; 267 } 268 #endif 269 } 270 271 /* 272 * VN_HOLD the vnode before releasing s_lock to guarantee 273 * we have a valid vnode reference. 274 */ 275 if (found == TRUE) { 276 vp = RTOV4(rp); 277 VN_HOLD(vp); 278 } 279 280 mutex_exit(&sp->s_lock); 281 nfs4_server_rele(sp); 282 283 if (found == FALSE) { 284 285 CB_WARN("cb_getattr: bad fhandle\n"); 286 287 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 288 return; 289 } 290 291 /* 292 * Figure out which attributes the server wants. We only 293 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest. 294 */ 295 fdata = kmem_alloc(cb_getattr_bytes, KM_SLEEP); 296 297 /* 298 * Don't actually need to create XDR to encode these 299 * simple data structures. 300 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE); 301 */ 302 fap = &resp->obj_attributes; 303 304 fap->attrmask = 0; 305 /* attrlist4_len starts at 0 and increases as attrs are processed */ 306 fap->attrlist4 = (char *)fdata; 307 fap->attrlist4_len = 0; 308 309 /* don't supply attrs if request was zero */ 310 if (args->attr_request != 0) { 311 if (args->attr_request & FATTR4_CHANGE_MASK) { 312 /* 313 * If the file is mmapped, then increment the change 314 * attribute and return it. This will guarantee that 315 * the server will perceive that the file has changed 316 * if there is any chance that the client application 317 * has changed it. Otherwise, just return the change 318 * attribute as it has been updated by nfs4write_deleg. 319 */ 320 321 mutex_enter(&rp->r_statelock); 322 mapcnt = rp->r_mapcnt; 323 rflag = rp->r_flags; 324 mutex_exit(&rp->r_statelock); 325 326 mutex_enter(&rp->r_statev4_lock); 327 /* 328 * If object mapped, then always return new change. 329 * Otherwise, return change if object has dirty 330 * pages. If object doesn't have any dirty pages, 331 * then all changes have been pushed to server, so 332 * reset change to grant change. 333 */ 334 if (mapcnt) 335 rp->r_deleg_change++; 336 else if (! (rflag & R4DIRTY)) 337 rp->r_deleg_change = rp->r_deleg_change_grant; 338 change = rp->r_deleg_change; 339 mutex_exit(&rp->r_statev4_lock); 340 341 /* 342 * Use inline XDR code directly, we know that we 343 * going to a memory buffer and it has enough 344 * space so it cannot fail. 345 */ 346 IXDR_PUT_U_HYPER(fdata, change); 347 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT; 348 fap->attrmask |= FATTR4_CHANGE_MASK; 349 } 350 351 if (args->attr_request & FATTR4_SIZE_MASK) { 352 /* 353 * Use an atomic add of 0 to fetch a consistent view 354 * of r_size; this avoids having to take rw_lock 355 * which could cause a deadlock. 356 */ 357 size = atomic_add_64_nv((uint64_t *)&rp->r_size, 0); 358 359 /* 360 * Use inline XDR code directly, we know that we 361 * going to a memory buffer and it has enough 362 * space so it cannot fail. 363 */ 364 IXDR_PUT_U_HYPER(fdata, size); 365 fap->attrlist4_len += 2 * BYTES_PER_XDR_UNIT; 366 fap->attrmask |= FATTR4_SIZE_MASK; 367 } 368 } 369 370 VN_RELE(vp); 371 372 *cs->statusp = resp->status = NFS4_OK; 373 } 374 375 static void 376 cb_getattr_free(nfs_cb_resop4 *resop) 377 { 378 if (resop->nfs_cb_resop4_u.opcbgetattr.obj_attributes.attrlist4) 379 kmem_free(resop->nfs_cb_resop4_u.opcbgetattr. 380 obj_attributes.attrlist4, cb_getattr_bytes); 381 } 382 383 static void 384 cb_recall(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 385 struct compound_state *cs, struct nfs4_callback_globals *ncg) 386 { 387 CB_RECALL4args * args = &argop->nfs_cb_argop4_u.opcbrecall; 388 CB_RECALL4res *resp = &resop->nfs_cb_resop4_u.opcbrecall; 389 rnode4_t *rp; 390 vnode_t *vp; 391 struct nfs4_server *sp; 392 bool_t found = FALSE; 393 394 ncg->nfs4_callback_stats.cb_recall.value.ui64++; 395 396 ASSERT(req->rq_prog >= NFS4_CALLBACK); 397 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums); 398 399 #ifdef DEBUG 400 /* 401 * error injection hook: set cb_recall_fail global to 402 * NFS4 pcol error to be returned 403 */ 404 if (cb4_recall_fail != NFS4_OK) { 405 *cs->statusp = resp->status = cb4_recall_fail; 406 return; 407 } 408 #endif 409 410 mutex_enter(&ncg->nfs4_cb_lock); 411 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 412 mutex_exit(&ncg->nfs4_cb_lock); 413 414 if (nfs4_server_vlock(sp, 0) == FALSE) { 415 416 CB_WARN("cb_recall: cannot find server\n"); 417 418 *cs->statusp = resp->status = NFS4ERR_BADHANDLE; 419 return; 420 } 421 422 /* 423 * Search the delegation list for a matching file handle 424 * AND stateid; mutex on sp prevents the list from changing. 425 */ 426 427 rp = list_head(&sp->s_deleg_list); 428 for (; rp != NULL; rp = list_next(&sp->s_deleg_list, rp)) { 429 mutex_enter(&rp->r_statev4_lock); 430 431 /* check both state id and file handle! */ 432 433 if ((bcmp(&rp->r_deleg_stateid, &args->stateid, 434 sizeof (stateid4)) == 0)) { 435 nfs4_fhandle_t fhandle; 436 437 sfh4_copyval(rp->r_fh, &fhandle); 438 if ((fhandle.fh_len == args->fh.nfs_fh4_len && 439 bcmp(fhandle.fh_buf, args->fh.nfs_fh4_val, 440 fhandle.fh_len) == 0)) { 441 442 found = TRUE; 443 break; 444 } else { 445 #ifdef DEBUG 446 CB_WARN("cb_recall: stateid OK, bad fh"); 447 #endif 448 } 449 } 450 #ifdef DEBUG 451 if (bcmp(&args->stateid, &nfs4_deleg_any, 452 sizeof (stateid4)) == 0) { 453 454 found = TRUE; 455 break; 456 } 457 #endif 458 mutex_exit(&rp->r_statev4_lock); 459 } 460 461 /* 462 * VN_HOLD the vnode before releasing s_lock to guarantee 463 * we have a valid vnode reference. The async thread will 464 * release the hold when it's done. 465 */ 466 if (found == TRUE) { 467 mutex_exit(&rp->r_statev4_lock); 468 vp = RTOV4(rp); 469 VN_HOLD(vp); 470 } 471 mutex_exit(&sp->s_lock); 472 nfs4_server_rele(sp); 473 474 if (found == FALSE) { 475 476 CB_WARN("cb_recall: bad stateid\n"); 477 478 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID; 479 return; 480 } 481 482 /* Fire up a thread to do the delegreturn */ 483 nfs4delegreturn_async(rp, NFS4_DR_RECALL|NFS4_DR_REOPEN, 484 args->truncate); 485 486 *cs->statusp = resp->status = 0; 487 } 488 489 /* ARGSUSED */ 490 static void 491 cb_recall_free(nfs_cb_resop4 *resop) 492 { 493 /* nothing to do here, cb_recall doesn't kmem_alloc */ 494 } 495 496 /* 497 * This function handles the CB_NULL proc call from an NFSv4 Server. 498 * 499 * We take note that the server has sent a CB_NULL for later processing 500 * in the recovery logic. It is noted so we may pause slightly after the 501 * setclientid and before reopening files. The pause is to allow the 502 * NFSv4 Server time to receive the CB_NULL reply and adjust any of 503 * its internal structures such that it has the opportunity to grant 504 * delegations to reopened files. 505 * 506 */ 507 508 /* ARGSUSED */ 509 static void 510 cb_null(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, 511 struct nfs4_callback_globals *ncg) 512 { 513 struct nfs4_server *sp; 514 515 ncg->nfs4_callback_stats.cb_null.value.ui64++; 516 517 ASSERT(req->rq_prog >= NFS4_CALLBACK); 518 ASSERT(req->rq_prog < NFS4_CALLBACK+nfs4_num_prognums); 519 520 mutex_enter(&ncg->nfs4_cb_lock); 521 sp = ncg->nfs4prog2server[req->rq_prog - NFS4_CALLBACK]; 522 mutex_exit(&ncg->nfs4_cb_lock); 523 524 if (nfs4_server_vlock(sp, 0) != FALSE) { 525 sp->s_flags |= N4S_CB_PINGED; 526 cv_broadcast(&sp->wait_cb_null); 527 mutex_exit(&sp->s_lock); 528 nfs4_server_rele(sp); 529 } 530 } 531 532 /* 533 * cb_illegal args: void 534 * res : status (NFS4ERR_OP_CB_ILLEGAL) 535 */ 536 /* ARGSUSED */ 537 static void 538 cb_illegal(nfs_cb_argop4 *argop, nfs_cb_resop4 *resop, struct svc_req *req, 539 struct compound_state *cs, struct nfs4_callback_globals *ncg) 540 { 541 CB_ILLEGAL4res *resp = &resop->nfs_cb_resop4_u.opcbillegal; 542 543 ncg->nfs4_callback_stats.cb_illegal.value.ui64++; 544 resop->resop = OP_CB_ILLEGAL; 545 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL; 546 } 547 548 static void 549 cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, 550 struct nfs4_callback_globals *ncg) 551 { 552 uint_t i; 553 struct compound_state cs; 554 nfs_cb_argop4 *argop; 555 nfs_cb_resop4 *resop, *new_res; 556 uint_t op; 557 558 bzero(&cs, sizeof (cs)); 559 cs.statusp = &resp->status; 560 cs.cont = TRUE; 561 562 /* 563 * Form a reply tag by copying over the request tag. 564 */ 565 resp->tag.utf8string_len = args->tag.utf8string_len; 566 if (args->tag.utf8string_len != 0) { 567 resp->tag.utf8string_val = 568 kmem_alloc(resp->tag.utf8string_len, KM_SLEEP); 569 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, 570 args->tag.utf8string_len); 571 } else { 572 resp->tag.utf8string_val = NULL; 573 } 574 575 /* 576 * XXX for now, minorversion should be zero 577 */ 578 if (args->minorversion != CB4_MINORVERSION) { 579 resp->array_len = 0; 580 resp->array = NULL; 581 resp->status = NFS4ERR_MINOR_VERS_MISMATCH; 582 return; 583 } 584 585 #ifdef DEBUG 586 /* 587 * Verify callback_ident. It doesn't really matter if it's wrong 588 * because we don't really use callback_ident -- we use prog number 589 * of the RPC request instead. In this case, just print a DEBUG 590 * console message to reveal brokenness of cbclient (at bkoff/cthon). 591 */ 592 if (args->callback_ident != req->rq_prog) 593 zcmn_err(getzoneid(), CE_WARN, 594 "cb_compound: cb_client using wrong " 595 "callback_ident(%d), should be %d", 596 args->callback_ident, req->rq_prog); 597 #endif 598 599 resp->array_len = args->array_len; 600 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_cb_resop4), 601 KM_SLEEP); 602 603 for (i = 0; i < args->array_len && cs.cont; i++) { 604 605 argop = &args->array[i]; 606 resop = &resp->array[i]; 607 resop->resop = argop->argop; 608 op = (uint_t)resop->resop; 609 610 switch (op) { 611 612 case OP_CB_GETATTR: 613 614 cb_getattr(argop, resop, req, &cs, ncg); 615 break; 616 617 case OP_CB_RECALL: 618 619 cb_recall(argop, resop, req, &cs, ncg); 620 break; 621 622 case OP_CB_ILLEGAL: 623 624 /* fall through */ 625 626 default: 627 /* 628 * Handle OP_CB_ILLEGAL and any undefined opcode. 629 * Currently, the XDR code will return BADXDR 630 * if cb op doesn't decode to legal value, so 631 * it really only handles OP_CB_ILLEGAL. 632 */ 633 op = OP_CB_ILLEGAL; 634 cb_illegal(argop, resop, req, &cs, ncg); 635 } 636 637 if (*cs.statusp != NFS4_OK) 638 cs.cont = FALSE; 639 640 /* 641 * If not at last op, and if we are to stop, then 642 * compact the results array. 643 */ 644 if ((i + 1) < args->array_len && !cs.cont) { 645 646 new_res = kmem_alloc( 647 (i+1) * sizeof (nfs_cb_resop4), KM_SLEEP); 648 bcopy(resp->array, 649 new_res, (i+1) * sizeof (nfs_cb_resop4)); 650 kmem_free(resp->array, 651 args->array_len * sizeof (nfs_cb_resop4)); 652 653 resp->array_len = i + 1; 654 resp->array = new_res; 655 } 656 } 657 658 } 659 660 static void 661 cb_compound_free(CB_COMPOUND4res *resp) 662 { 663 uint_t i, op; 664 nfs_cb_resop4 *resop; 665 666 if (resp->tag.utf8string_val) { 667 UTF8STRING_FREE(resp->tag) 668 } 669 670 for (i = 0; i < resp->array_len; i++) { 671 672 resop = &resp->array[i]; 673 op = (uint_t)resop->resop; 674 675 switch (op) { 676 677 case OP_CB_GETATTR: 678 679 cb_getattr_free(resop); 680 break; 681 682 case OP_CB_RECALL: 683 684 cb_recall_free(resop); 685 break; 686 687 default: 688 break; 689 } 690 } 691 692 if (resp->array != NULL) { 693 kmem_free(resp->array, 694 resp->array_len * sizeof (nfs_cb_resop4)); 695 } 696 } 697 698 static void 699 cb_dispatch(struct svc_req *req, SVCXPRT *xprt) 700 { 701 CB_COMPOUND4args args; 702 CB_COMPOUND4res res; 703 struct nfs4_callback_globals *ncg; 704 705 bool_t (*xdr_args)(), (*xdr_res)(); 706 void (*proc)(CB_COMPOUND4args *, CB_COMPOUND4res *, struct svc_req *, 707 struct nfs4_callback_globals *); 708 void (*freeproc)(CB_COMPOUND4res *); 709 710 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 711 ASSERT(ncg != NULL); 712 713 ncg->nfs4_callback_stats.cb_dispatch.value.ui64++; 714 715 switch (req->rq_proc) { 716 case CB_NULL: 717 xdr_args = xdr_void; 718 xdr_res = xdr_void; 719 proc = cb_null; 720 freeproc = NULL; 721 break; 722 723 case CB_COMPOUND: 724 xdr_args = xdr_CB_COMPOUND4args_clnt; 725 xdr_res = xdr_CB_COMPOUND4res; 726 proc = cb_compound; 727 freeproc = cb_compound_free; 728 break; 729 730 default: 731 CB_WARN("cb_dispatch: no proc\n"); 732 svcerr_noproc(xprt); 733 return; 734 } 735 736 args.tag.utf8string_val = NULL; 737 args.array = NULL; 738 739 if (!SVC_GETARGS(xprt, xdr_args, (caddr_t)&args)) { 740 741 CB_WARN("cb_dispatch: cannot getargs\n"); 742 svcerr_decode(xprt); 743 return; 744 } 745 746 (*proc)(&args, &res, req, ncg); 747 748 if (svc_sendreply(xprt, xdr_res, (caddr_t)&res) == FALSE) { 749 750 CB_WARN("cb_dispatch: bad sendreply\n"); 751 svcerr_systemerr(xprt); 752 } 753 754 if (freeproc) 755 (*freeproc)(&res); 756 757 if (!SVC_FREEARGS(xprt, xdr_args, (caddr_t)&args)) { 758 759 CB_WARN("cb_dispatch: bad freeargs\n"); 760 } 761 } 762 763 static rpcprog_t 764 nfs4_getnextprogram(struct nfs4_callback_globals *ncg) 765 { 766 int i, j; 767 768 j = ncg->nfs4_program_hint; 769 for (i = 0; i < nfs4_num_prognums; i++, j++) { 770 771 if (j >= nfs4_num_prognums) 772 j = 0; 773 774 if (ncg->nfs4prog2server[j] == NULL) { 775 ncg->nfs4_program_hint = j+1; 776 return (j+NFS4_CALLBACK); 777 } 778 } 779 780 return (0); 781 } 782 783 void 784 nfs4callback_destroy(nfs4_server_t *np) 785 { 786 struct nfs4_callback_globals *ncg; 787 int i; 788 789 if (np->s_program == 0) 790 return; 791 792 ncg = np->zone_globals; 793 i = np->s_program - NFS4_CALLBACK; 794 795 mutex_enter(&ncg->nfs4_cb_lock); 796 797 ASSERT(ncg->nfs4prog2server[i] == np); 798 799 ncg->nfs4prog2server[i] = NULL; 800 801 if (i < ncg->nfs4_program_hint) 802 ncg->nfs4_program_hint = i; 803 804 mutex_exit(&ncg->nfs4_cb_lock); 805 } 806 807 /* 808 * nfs4_setport - This function saves a netid and univeral address for 809 * the callback program. These values will be used during setclientid. 810 */ 811 static void 812 nfs4_setport(char *netid, char *uaddr, char *protofmly, char *proto, 813 struct nfs4_callback_globals *ncg) 814 { 815 struct nfs4_cb_port *p; 816 bool_t found = FALSE; 817 818 ASSERT(MUTEX_HELD(&ncg->nfs4_cb_lock)); 819 820 p = list_head(&ncg->nfs4_cb_ports); 821 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) { 822 if (strcmp(p->netid, netid) == 0) { 823 found = TRUE; 824 break; 825 } 826 } 827 if (found == TRUE) 828 (void) strcpy(p->uaddr, uaddr); 829 else { 830 p = kmem_alloc(sizeof (*p), KM_SLEEP); 831 832 (void) strcpy(p->uaddr, uaddr); 833 (void) strcpy(p->netid, netid); 834 (void) strcpy(p->protofmly, protofmly); 835 (void) strcpy(p->proto, proto); 836 list_insert_head(&ncg->nfs4_cb_ports, p); 837 } 838 } 839 840 /* 841 * nfs4_cb_args - This function is used to construct the callback 842 * portion of the arguments needed for setclientid. 843 */ 844 845 void 846 nfs4_cb_args(nfs4_server_t *np, struct knetconfig *knc, SETCLIENTID4args *args) 847 { 848 struct nfs4_cb_port *p; 849 bool_t found = FALSE; 850 rpcprog_t pgm; 851 struct nfs4_callback_globals *ncg = np->zone_globals; 852 853 /* 854 * This server structure may already have a program number 855 * assigned to it. This happens when the client has to 856 * re-issue SETCLIENTID. Just re-use the information. 857 */ 858 if (np->s_program >= NFS4_CALLBACK && 859 np->s_program < NFS4_CALLBACK + nfs4_num_prognums) 860 nfs4callback_destroy(np); 861 862 mutex_enter(&ncg->nfs4_cb_lock); 863 864 p = list_head(&ncg->nfs4_cb_ports); 865 for (; p != NULL; p = list_next(&ncg->nfs4_cb_ports, p)) { 866 if (strcmp(p->protofmly, knc->knc_protofmly) == 0 && 867 strcmp(p->proto, knc->knc_proto) == 0) { 868 found = TRUE; 869 break; 870 } 871 } 872 873 if (found == FALSE) { 874 875 NFS4_DEBUG(nfs4_callback_debug, 876 (CE_WARN, "nfs4_cb_args: could not find netid for %s/%s\n", 877 knc->knc_protofmly, knc->knc_proto)); 878 879 args->callback.cb_program = 0; 880 args->callback.cb_location.r_netid = NULL; 881 args->callback.cb_location.r_addr = NULL; 882 args->callback_ident = 0; 883 mutex_exit(&ncg->nfs4_cb_lock); 884 return; 885 } 886 887 if ((pgm = nfs4_getnextprogram(ncg)) == 0) { 888 CB_WARN("nfs4_cb_args: out of program numbers\n"); 889 890 args->callback.cb_program = 0; 891 args->callback.cb_location.r_netid = NULL; 892 args->callback.cb_location.r_addr = NULL; 893 args->callback_ident = 0; 894 mutex_exit(&ncg->nfs4_cb_lock); 895 return; 896 } 897 898 ncg->nfs4prog2server[pgm-NFS4_CALLBACK] = np; 899 args->callback.cb_program = pgm; 900 args->callback.cb_location.r_netid = p->netid; 901 args->callback.cb_location.r_addr = p->uaddr; 902 args->callback_ident = pgm; 903 904 np->s_program = pgm; 905 906 mutex_exit(&ncg->nfs4_cb_lock); 907 } 908 909 static int 910 nfs4_dquery(struct nfs4_svc_args *arg, model_t model) 911 { 912 file_t *fp; 913 vnode_t *vp; 914 rnode4_t *rp; 915 int error; 916 STRUCT_HANDLE(nfs4_svc_args, uap); 917 918 STRUCT_SET_HANDLE(uap, model, arg); 919 920 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) 921 return (EBADF); 922 923 vp = fp->f_vnode; 924 925 if (vp == NULL || vp->v_type != VREG || 926 !vn_matchops(vp, nfs4_vnodeops)) { 927 releasef(STRUCT_FGET(uap, fd)); 928 return (EBADF); 929 } 930 931 rp = VTOR4(vp); 932 933 /* 934 * I can't convince myself that we need locking here. The 935 * rnode cannot disappear and the value returned is instantly 936 * stale anway, so why bother? 937 */ 938 939 error = suword32(STRUCT_FGETP(uap, netid), rp->r_deleg_type); 940 releasef(STRUCT_FGET(uap, fd)); 941 return (error); 942 } 943 944 945 /* 946 * NFS4 client system call. This service does the 947 * necessary initialization for the callback program. 948 * This is fashioned after the server side interaction 949 * between nfsd and the kernel. On the client, the 950 * mount command forks and the child process does the 951 * necessary interaction with the kernel. 952 * 953 * uap->fd is the fd of an open transport provider 954 */ 955 int 956 nfs4_svc(struct nfs4_svc_args *arg, model_t model) 957 { 958 file_t *fp; 959 int error; 960 int readsize; 961 char buf[KNC_STRSIZE], uaddr[KNC_STRSIZE]; 962 char protofmly[KNC_STRSIZE], proto[KNC_STRSIZE]; 963 size_t len; 964 STRUCT_HANDLE(nfs4_svc_args, uap); 965 struct netbuf addrmask; 966 int cmd; 967 SVCMASTERXPRT *cb_xprt; 968 struct nfs4_callback_globals *ncg; 969 970 #ifdef lint 971 model = model; /* STRUCT macros don't always refer to it */ 972 #endif 973 974 STRUCT_SET_HANDLE(uap, model, arg); 975 976 if (STRUCT_FGET(uap, cmd) == NFS4_DQUERY) 977 return (nfs4_dquery(arg, model)); 978 979 if (secpolicy_nfs(CRED()) != 0) 980 return (EPERM); 981 982 if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) 983 return (EBADF); 984 985 /* 986 * Set read buffer size to rsize 987 * and add room for RPC headers. 988 */ 989 readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA); 990 if (readsize < RPC_MAXDATASIZE) 991 readsize = RPC_MAXDATASIZE; 992 993 error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf, 994 KNC_STRSIZE, &len); 995 if (error) { 996 releasef(STRUCT_FGET(uap, fd)); 997 return (error); 998 } 999 1000 cmd = STRUCT_FGET(uap, cmd); 1001 1002 if (cmd & NFS4_KRPC_START) { 1003 addrmask.len = STRUCT_FGET(uap, addrmask.len); 1004 addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen); 1005 addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP); 1006 error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf, 1007 addrmask.len); 1008 if (error) { 1009 releasef(STRUCT_FGET(uap, fd)); 1010 kmem_free(addrmask.buf, addrmask.maxlen); 1011 return (error); 1012 } 1013 } 1014 else 1015 addrmask.buf = NULL; 1016 1017 error = copyinstr((const char *)STRUCT_FGETP(uap, addr), uaddr, 1018 sizeof (uaddr), &len); 1019 if (error) { 1020 releasef(STRUCT_FGET(uap, fd)); 1021 if (addrmask.buf) 1022 kmem_free(addrmask.buf, addrmask.maxlen); 1023 return (error); 1024 } 1025 1026 error = copyinstr((const char *)STRUCT_FGETP(uap, protofmly), protofmly, 1027 sizeof (protofmly), &len); 1028 if (error) { 1029 releasef(STRUCT_FGET(uap, fd)); 1030 if (addrmask.buf) 1031 kmem_free(addrmask.buf, addrmask.maxlen); 1032 return (error); 1033 } 1034 1035 error = copyinstr((const char *)STRUCT_FGETP(uap, proto), proto, 1036 sizeof (proto), &len); 1037 if (error) { 1038 releasef(STRUCT_FGET(uap, fd)); 1039 if (addrmask.buf) 1040 kmem_free(addrmask.buf, addrmask.maxlen); 1041 return (error); 1042 } 1043 1044 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1045 ASSERT(ncg != NULL); 1046 1047 mutex_enter(&ncg->nfs4_cb_lock); 1048 if (cmd & NFS4_SETPORT) 1049 nfs4_setport(buf, uaddr, protofmly, proto, ncg); 1050 1051 if (cmd & NFS4_KRPC_START) { 1052 error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &cb_xprt, 1053 &nfs4_cb_sct, NULL, NFS_CB_SVCPOOL_ID, FALSE); 1054 if (error) { 1055 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n", 1056 error); 1057 kmem_free(addrmask.buf, addrmask.maxlen); 1058 } 1059 } 1060 1061 mutex_exit(&ncg->nfs4_cb_lock); 1062 releasef(STRUCT_FGET(uap, fd)); 1063 return (error); 1064 } 1065 1066 struct nfs4_callback_globals * 1067 nfs4_get_callback_globals(void) 1068 { 1069 return (zone_getspecific(nfs4_callback_zone_key, nfs_zone())); 1070 } 1071 1072 static void * 1073 nfs4_callback_init_zone(zoneid_t zoneid) 1074 { 1075 kstat_t *nfs4_callback_kstat; 1076 struct nfs4_callback_globals *ncg; 1077 1078 ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP); 1079 1080 ncg->nfs4prog2server = kmem_zalloc(nfs4_num_prognums * 1081 sizeof (struct nfs4_server *), KM_SLEEP); 1082 1083 /* initialize the dlist */ 1084 mutex_init(&ncg->nfs4_dlist_lock, NULL, MUTEX_DEFAULT, NULL); 1085 list_create(&ncg->nfs4_dlist, sizeof (struct nfs4_dnode), 1086 offsetof(struct nfs4_dnode, linkage)); 1087 1088 /* initialize cb_port list */ 1089 mutex_init(&ncg->nfs4_cb_lock, NULL, MUTEX_DEFAULT, NULL); 1090 list_create(&ncg->nfs4_cb_ports, sizeof (struct nfs4_cb_port), 1091 offsetof(struct nfs4_cb_port, linkage)); 1092 1093 /* get our own copy of the kstats */ 1094 bcopy(&nfs4_callback_stats_tmpl, &ncg->nfs4_callback_stats, 1095 sizeof (nfs4_callback_stats_tmpl)); 1096 /* register "nfs:0:nfs4_callback_stats" for this zone */ 1097 if ((nfs4_callback_kstat = 1098 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc", 1099 KSTAT_TYPE_NAMED, 1100 sizeof (ncg->nfs4_callback_stats) / sizeof (kstat_named_t), 1101 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, 1102 zoneid)) != NULL) { 1103 nfs4_callback_kstat->ks_data = &ncg->nfs4_callback_stats; 1104 kstat_install(nfs4_callback_kstat); 1105 } 1106 return (ncg); 1107 } 1108 1109 static void 1110 nfs4_discard_delegations(struct nfs4_callback_globals *ncg) 1111 { 1112 nfs4_server_t *sp; 1113 int i, num_removed; 1114 1115 /* 1116 * It's OK here to just run through the registered "programs", as 1117 * servers without programs won't have any delegations to handle. 1118 */ 1119 for (i = 0; i < nfs4_num_prognums; i++) { 1120 rnode4_t *rp; 1121 1122 mutex_enter(&ncg->nfs4_cb_lock); 1123 sp = ncg->nfs4prog2server[i]; 1124 mutex_exit(&ncg->nfs4_cb_lock); 1125 1126 if (nfs4_server_vlock(sp, 1) == FALSE) 1127 continue; 1128 num_removed = 0; 1129 while ((rp = list_head(&sp->s_deleg_list)) != NULL) { 1130 mutex_enter(&rp->r_statev4_lock); 1131 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1132 /* 1133 * We need to take matters into our own hands, 1134 * as nfs4delegreturn_cleanup_impl() won't 1135 * remove this from the list. 1136 */ 1137 list_remove(&sp->s_deleg_list, rp); 1138 mutex_exit(&rp->r_statev4_lock); 1139 nfs4_dec_state_ref_count_nolock(sp, 1140 VTOMI4(RTOV4(rp))); 1141 num_removed++; 1142 continue; 1143 } 1144 mutex_exit(&rp->r_statev4_lock); 1145 VN_HOLD(RTOV4(rp)); 1146 mutex_exit(&sp->s_lock); 1147 /* 1148 * The following will remove the node from the list. 1149 */ 1150 nfs4delegreturn_cleanup_impl(rp, sp, ncg); 1151 VN_RELE(RTOV4(rp)); 1152 mutex_enter(&sp->s_lock); 1153 } 1154 mutex_exit(&sp->s_lock); 1155 /* each removed list node reles a reference */ 1156 while (num_removed-- > 0) 1157 nfs4_server_rele(sp); 1158 /* remove our reference for nfs4_server_vlock */ 1159 nfs4_server_rele(sp); 1160 } 1161 } 1162 1163 /* ARGSUSED */ 1164 static void 1165 nfs4_callback_shutdown_zone(zoneid_t zoneid, void *data) 1166 { 1167 struct nfs4_callback_globals *ncg = data; 1168 1169 /* 1170 * Clean pending delegation return list. 1171 */ 1172 nfs4_dlistclean_impl(ncg, NFS4_DR_DISCARD); 1173 1174 /* 1175 * Discard all delegations. 1176 */ 1177 nfs4_discard_delegations(ncg); 1178 } 1179 1180 static void 1181 nfs4_callback_fini_zone(zoneid_t zoneid, void *data) 1182 { 1183 struct nfs4_callback_globals *ncg = data; 1184 struct nfs4_cb_port *p; 1185 nfs4_server_t *sp, *next; 1186 nfs4_server_t freelist; 1187 int i; 1188 1189 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid); 1190 1191 /* 1192 * Discard all delegations that may have crept in since we did the 1193 * _shutdown. 1194 */ 1195 nfs4_discard_delegations(ncg); 1196 /* 1197 * We're completely done with this zone and all associated 1198 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one 1199 * more reference outstanding -- the reference we didn't release in 1200 * nfs4_renew_lease_thread(). 1201 * 1202 * Here we need to run through the global nfs4_server_lst as we need to 1203 * deal with nfs4_server_ts without programs, as they also have threads 1204 * created for them, and so have outstanding references that we need to 1205 * release. 1206 */ 1207 freelist.forw = &freelist; 1208 freelist.back = &freelist; 1209 mutex_enter(&nfs4_server_lst_lock); 1210 sp = nfs4_server_lst.forw; 1211 while (sp != &nfs4_server_lst) { 1212 next = sp->forw; 1213 if (sp->zoneid == zoneid) { 1214 remque(sp); 1215 insque(sp, &freelist); 1216 } 1217 sp = next; 1218 } 1219 mutex_exit(&nfs4_server_lst_lock); 1220 1221 sp = freelist.forw; 1222 while (sp != &freelist) { 1223 next = sp->forw; 1224 nfs4_server_rele(sp); /* free the list's reference */ 1225 sp = next; 1226 } 1227 1228 #ifdef DEBUG 1229 for (i = 0; i < nfs4_num_prognums; i++) { 1230 ASSERT(ncg->nfs4prog2server[i] == NULL); 1231 } 1232 #endif 1233 kmem_free(ncg->nfs4prog2server, nfs4_num_prognums * 1234 sizeof (struct nfs4_server *)); 1235 1236 mutex_enter(&ncg->nfs4_cb_lock); 1237 while ((p = list_head(&ncg->nfs4_cb_ports)) != NULL) { 1238 list_remove(&ncg->nfs4_cb_ports, p); 1239 kmem_free(p, sizeof (*p)); 1240 } 1241 list_destroy(&ncg->nfs4_cb_ports); 1242 mutex_destroy(&ncg->nfs4_cb_lock); 1243 list_destroy(&ncg->nfs4_dlist); 1244 mutex_destroy(&ncg->nfs4_dlist_lock); 1245 kmem_free(ncg, sizeof (*ncg)); 1246 } 1247 1248 void 1249 nfs4_callback_init(void) 1250 { 1251 int i; 1252 SVC_CALLOUT *nfs4_cb_sc; 1253 1254 /* initialize the callback table */ 1255 nfs4_cb_sc = kmem_alloc(nfs4_num_prognums * 1256 sizeof (SVC_CALLOUT), KM_SLEEP); 1257 1258 for (i = 0; i < nfs4_num_prognums; i++) { 1259 nfs4_cb_sc[i].sc_prog = NFS4_CALLBACK+i; 1260 nfs4_cb_sc[i].sc_versmin = NFS_CB; 1261 nfs4_cb_sc[i].sc_versmax = NFS_CB; 1262 nfs4_cb_sc[i].sc_dispatch = cb_dispatch; 1263 } 1264 1265 nfs4_cb_sct.sct_size = nfs4_num_prognums; 1266 nfs4_cb_sct.sct_free = FALSE; 1267 nfs4_cb_sct.sct_sc = nfs4_cb_sc; 1268 1269 /* 1270 * Compute max bytes required for dyamically allocated parts 1271 * of cb_getattr reply. Only size and change are supported now. 1272 * If CB_GETATTR is changed to reply with additional attrs, 1273 * additional sizes must be added below. 1274 * 1275 * fattr4_change + fattr4_size == uint64_t + uint64_t 1276 */ 1277 cb_getattr_bytes = 2 * BYTES_PER_XDR_UNIT + 2 * BYTES_PER_XDR_UNIT; 1278 1279 zone_key_create(&nfs4_callback_zone_key, nfs4_callback_init_zone, 1280 nfs4_callback_shutdown_zone, nfs4_callback_fini_zone); 1281 } 1282 1283 void 1284 nfs4_callback_fini(void) 1285 { 1286 } 1287 1288 /* 1289 * NB: This function can be called from the *wrong* zone (ie, the zone that 1290 * 'rp' belongs to and the caller's zone may not be the same). This can happen 1291 * if the zone is going away and we get called from nfs4_async_inactive(). In 1292 * this case the globals will be NULL and we won't update the counters, which 1293 * doesn't matter as the zone is going away anyhow. 1294 */ 1295 static void 1296 nfs4delegreturn_cleanup_impl(rnode4_t *rp, nfs4_server_t *np, 1297 struct nfs4_callback_globals *ncg) 1298 { 1299 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 1300 boolean_t need_rele = B_FALSE; 1301 1302 /* 1303 * Caller must be holding mi_recovlock in read mode 1304 * to call here. This is provided by start_op. 1305 * Delegation management requires to grab s_lock 1306 * first and then r_statev4_lock. 1307 */ 1308 1309 if (np == NULL) { 1310 np = find_nfs4_server_all(mi, 1); 1311 if (np == NULL) 1312 return; 1313 need_rele = B_TRUE; 1314 } else { 1315 mutex_enter(&np->s_lock); 1316 } 1317 1318 mutex_enter(&rp->r_statev4_lock); 1319 1320 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1321 mutex_exit(&rp->r_statev4_lock); 1322 mutex_exit(&np->s_lock); 1323 if (need_rele) 1324 nfs4_server_rele(np); 1325 return; 1326 } 1327 1328 /* 1329 * Free the cred originally held when 1330 * the delegation was granted. Caller must 1331 * hold this cred if it wants to use it after 1332 * this call. 1333 */ 1334 crfree(rp->r_deleg_cred); 1335 rp->r_deleg_cred = NULL; 1336 rp->r_deleg_type = OPEN_DELEGATE_NONE; 1337 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 1338 rp->r_deleg_needs_recall = FALSE; 1339 rp->r_deleg_return_pending = FALSE; 1340 1341 /* 1342 * Remove the rnode from the server's list and 1343 * update the ref counts. 1344 */ 1345 list_remove(&np->s_deleg_list, rp); 1346 mutex_exit(&rp->r_statev4_lock); 1347 nfs4_dec_state_ref_count_nolock(np, mi); 1348 mutex_exit(&np->s_lock); 1349 /* removed list node removes a reference */ 1350 nfs4_server_rele(np); 1351 if (need_rele) 1352 nfs4_server_rele(np); 1353 if (ncg != NULL) 1354 ncg->nfs4_callback_stats.delegations.value.ui64--; 1355 } 1356 1357 void 1358 nfs4delegreturn_cleanup(rnode4_t *rp, nfs4_server_t *np) 1359 { 1360 struct nfs4_callback_globals *ncg; 1361 1362 if (np != NULL) { 1363 ncg = np->zone_globals; 1364 } else if (nfs_zone() == VTOMI4(RTOV4(rp))->mi_zone) { 1365 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1366 ASSERT(ncg != NULL); 1367 } else { 1368 /* 1369 * Request coming from the wrong zone. 1370 */ 1371 ASSERT(getzoneid() == GLOBAL_ZONEID); 1372 ncg = NULL; 1373 } 1374 1375 nfs4delegreturn_cleanup_impl(rp, np, ncg); 1376 } 1377 1378 static void 1379 nfs4delegreturn_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 1380 cred_t *cr, vnode_t *vp) 1381 { 1382 if (error != ETIMEDOUT && error != EINTR && 1383 !NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 1384 lost_rqstp->lr_op = 0; 1385 return; 1386 } 1387 1388 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 1389 "nfs4close_save_lost_rqst: error %d", error)); 1390 1391 lost_rqstp->lr_op = OP_DELEGRETURN; 1392 /* 1393 * The vp is held and rele'd via the recovery code. 1394 * See nfs4_save_lost_rqst. 1395 */ 1396 lost_rqstp->lr_vp = vp; 1397 lost_rqstp->lr_dvp = NULL; 1398 lost_rqstp->lr_oop = NULL; 1399 lost_rqstp->lr_osp = NULL; 1400 lost_rqstp->lr_lop = NULL; 1401 lost_rqstp->lr_cr = cr; 1402 lost_rqstp->lr_flk = NULL; 1403 lost_rqstp->lr_putfirst = FALSE; 1404 } 1405 1406 static void 1407 nfs4delegreturn_otw(rnode4_t *rp, cred_t *cr, nfs4_error_t *ep) 1408 { 1409 COMPOUND4args_clnt args; 1410 COMPOUND4res_clnt res; 1411 nfs_argop4 argops[3]; 1412 nfs4_ga_res_t *garp = NULL; 1413 hrtime_t t; 1414 int numops; 1415 int doqueue = 1; 1416 1417 args.ctag = TAG_DELEGRETURN; 1418 1419 numops = 3; /* PUTFH, GETATTR, DELEGRETURN */ 1420 1421 args.array = argops; 1422 args.array_len = numops; 1423 1424 argops[0].argop = OP_CPUTFH; 1425 argops[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 1426 1427 argops[1].argop = OP_GETATTR; 1428 argops[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 1429 argops[1].nfs_argop4_u.opgetattr.mi = VTOMI4(RTOV4(rp)); 1430 1431 argops[2].argop = OP_DELEGRETURN; 1432 argops[2].nfs_argop4_u.opdelegreturn.deleg_stateid = 1433 rp->r_deleg_stateid; 1434 1435 t = gethrtime(); 1436 rfs4call(VTOMI4(RTOV4(rp)), &args, &res, cr, &doqueue, 0, ep); 1437 1438 if (ep->error) 1439 return; 1440 1441 if (res.status == NFS4_OK) { 1442 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res; 1443 nfs4_attr_cache(RTOV4(rp), garp, t, cr, TRUE, NULL); 1444 1445 } 1446 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1447 } 1448 1449 int 1450 nfs4_do_delegreturn(rnode4_t *rp, int flags, cred_t *cr, 1451 struct nfs4_callback_globals *ncg) 1452 { 1453 vnode_t *vp = RTOV4(rp); 1454 mntinfo4_t *mi = VTOMI4(vp); 1455 nfs4_lost_rqst_t lost_rqst; 1456 nfs4_recov_state_t recov_state; 1457 bool_t needrecov = FALSE, recovonly, done = FALSE; 1458 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1459 1460 ncg->nfs4_callback_stats.delegreturn.value.ui64++; 1461 1462 while (!done) { 1463 e.error = nfs4_start_fop(mi, vp, NULL, OH_DELEGRETURN, 1464 &recov_state, &recovonly); 1465 1466 if (e.error) { 1467 if (flags & NFS4_DR_FORCE) { 1468 (void) nfs_rw_enter_sig(&mi->mi_recovlock, 1469 RW_READER, 0); 1470 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1471 nfs_rw_exit(&mi->mi_recovlock); 1472 } 1473 break; 1474 } 1475 1476 /* 1477 * Check to see if the delegation has already been 1478 * returned by the recovery thread. The state of 1479 * the delegation cannot change at this point due 1480 * to start_fop and the r_deleg_recall_lock. 1481 */ 1482 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1483 e.error = 0; 1484 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1485 break; 1486 } 1487 1488 if (recovonly) { 1489 /* 1490 * Delegation will be returned via the 1491 * recovery framework. Build a lost request 1492 * structure, start recovery and get out. 1493 */ 1494 nfs4_error_init(&e, EINTR); 1495 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst, 1496 cr, vp); 1497 (void) nfs4_start_recovery(&e, mi, vp, 1498 NULL, &rp->r_deleg_stateid, 1499 lost_rqst.lr_op == OP_DELEGRETURN ? 1500 &lost_rqst : NULL, OP_DELEGRETURN, NULL, 1501 NULL, NULL); 1502 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1503 break; 1504 } 1505 1506 nfs4delegreturn_otw(rp, cr, &e); 1507 1508 /* 1509 * Ignore some errors on delegreturn; no point in marking 1510 * the file dead on a state destroying operation. 1511 */ 1512 if (e.error == 0 && (nfs4_recov_marks_dead(e.stat) || 1513 e.stat == NFS4ERR_BADHANDLE || 1514 e.stat == NFS4ERR_STALE)) 1515 needrecov = FALSE; 1516 else 1517 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp); 1518 1519 if (needrecov) { 1520 nfs4delegreturn_save_lost_rqst(e.error, &lost_rqst, 1521 cr, vp); 1522 (void) nfs4_start_recovery(&e, mi, vp, 1523 NULL, &rp->r_deleg_stateid, 1524 lost_rqst.lr_op == OP_DELEGRETURN ? 1525 &lost_rqst : NULL, OP_DELEGRETURN, NULL, 1526 NULL, NULL); 1527 } else { 1528 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1529 done = TRUE; 1530 } 1531 1532 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1533 } 1534 return (e.error); 1535 } 1536 1537 /* 1538 * nfs4_resend_delegreturn - used to drive the delegreturn 1539 * operation via the recovery thread. 1540 */ 1541 void 1542 nfs4_resend_delegreturn(nfs4_lost_rqst_t *lorp, nfs4_error_t *ep, 1543 nfs4_server_t *np) 1544 { 1545 rnode4_t *rp = VTOR4(lorp->lr_vp); 1546 1547 /* If the file failed recovery, just quit. */ 1548 mutex_enter(&rp->r_statelock); 1549 if (rp->r_flags & R4RECOVERR) { 1550 ep->error = EIO; 1551 } 1552 mutex_exit(&rp->r_statelock); 1553 1554 if (!ep->error) 1555 nfs4delegreturn_otw(rp, lorp->lr_cr, ep); 1556 1557 /* 1558 * If recovery is now needed, then return the error 1559 * and status and let the recovery thread handle it, 1560 * including re-driving another delegreturn. Otherwise, 1561 * just give up and clean up the delegation. 1562 */ 1563 if (nfs4_needs_recovery(ep, TRUE, lorp->lr_vp->v_vfsp)) 1564 return; 1565 1566 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) 1567 nfs4delegreturn_cleanup(rp, np); 1568 1569 nfs4_error_zinit(ep); 1570 } 1571 1572 /* 1573 * nfs4delegreturn - general function to return a delegation. 1574 * 1575 * NFS4_DR_FORCE - return the delegation even if start_op fails 1576 * NFS4_DR_PUSH - push modified data back to the server via VOP_PUTPAGE 1577 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn 1578 * NFS4_DR_DID_OP - calling function already did nfs4_start_op 1579 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL 1580 * NFS4_DR_REOPEN - do file reopens, if applicable 1581 */ 1582 static int 1583 nfs4delegreturn_impl(rnode4_t *rp, int flags, struct nfs4_callback_globals *ncg) 1584 { 1585 int error = 0; 1586 cred_t *cr = NULL; 1587 vnode_t *vp; 1588 bool_t needrecov = FALSE; 1589 bool_t rw_entered = FALSE; 1590 bool_t do_reopen; 1591 1592 vp = RTOV4(rp); 1593 1594 /* 1595 * If NFS4_DR_DISCARD is set by itself, take a short-cut and 1596 * discard without doing an otw DELEGRETURN. This may only be used 1597 * by the recovery thread because it bypasses the synchronization 1598 * with r_deleg_recall_lock and mi->mi_recovlock. 1599 */ 1600 if (flags == NFS4_DR_DISCARD) { 1601 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1602 return (0); 1603 } 1604 1605 if (flags & NFS4_DR_DID_OP) { 1606 /* 1607 * Caller had already done start_op, which means the 1608 * r_deleg_recall_lock is already held in READ mode 1609 * so we cannot take it in write mode. Return the 1610 * delegation asynchronously. 1611 * 1612 * Remove the NFS4_DR_DID_OP flag so we don't 1613 * get stuck looping through here. 1614 */ 1615 VN_HOLD(vp); 1616 nfs4delegreturn_async(rp, (flags & ~NFS4_DR_DID_OP), FALSE); 1617 return (0); 1618 } 1619 1620 /* 1621 * Verify we still have a delegation and crhold the credential. 1622 */ 1623 mutex_enter(&rp->r_statev4_lock); 1624 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 1625 mutex_exit(&rp->r_statev4_lock); 1626 goto out; 1627 } 1628 cr = rp->r_deleg_cred; 1629 ASSERT(cr != NULL); 1630 crhold(cr); 1631 mutex_exit(&rp->r_statev4_lock); 1632 1633 /* 1634 * Push the modified data back to the server synchronously 1635 * before doing DELEGRETURN. 1636 */ 1637 if (flags & NFS4_DR_PUSH) 1638 (void) VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL); 1639 1640 /* 1641 * Take r_deleg_recall_lock in WRITE mode, this will prevent 1642 * nfs4_is_otw_open_necessary from trying to use the delegation 1643 * while the DELEGRETURN is in progress. 1644 */ 1645 (void) nfs_rw_enter_sig(&rp->r_deleg_recall_lock, RW_WRITER, FALSE); 1646 1647 rw_entered = TRUE; 1648 1649 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) 1650 goto out; 1651 1652 if (flags & NFS4_DR_REOPEN) { 1653 /* 1654 * If R4RECOVERRP is already set, then skip re-opening 1655 * the delegation open streams and go straight to doing 1656 * delegreturn. (XXX if the file has failed recovery, then the 1657 * delegreturn attempt is likely to be futile.) 1658 */ 1659 mutex_enter(&rp->r_statelock); 1660 do_reopen = !(rp->r_flags & R4RECOVERRP); 1661 mutex_exit(&rp->r_statelock); 1662 1663 if (do_reopen) { 1664 error = deleg_reopen(vp, &needrecov, ncg, flags); 1665 if (error != 0) { 1666 if ((flags & (NFS4_DR_FORCE | NFS4_DR_RECALL)) 1667 == 0) 1668 goto out; 1669 } else if (needrecov) { 1670 if ((flags & NFS4_DR_FORCE) == 0) 1671 goto out; 1672 } 1673 } 1674 } 1675 1676 if (flags & NFS4_DR_DISCARD) { 1677 mntinfo4_t *mi = VTOMI4(RTOV4(rp)); 1678 1679 mutex_enter(&rp->r_statelock); 1680 /* 1681 * deleg_return_pending is cleared inside of delegation_accept 1682 * when a delegation is accepted. if this flag has been 1683 * cleared, then a new delegation has overwritten the one we 1684 * were about to throw away. 1685 */ 1686 if (!rp->r_deleg_return_pending) { 1687 mutex_exit(&rp->r_statelock); 1688 goto out; 1689 } 1690 mutex_exit(&rp->r_statelock); 1691 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE); 1692 nfs4delegreturn_cleanup_impl(rp, NULL, ncg); 1693 nfs_rw_exit(&mi->mi_recovlock); 1694 } else { 1695 error = nfs4_do_delegreturn(rp, flags, cr, ncg); 1696 } 1697 1698 out: 1699 if (cr) 1700 crfree(cr); 1701 if (rw_entered) 1702 nfs_rw_exit(&rp->r_deleg_recall_lock); 1703 return (error); 1704 } 1705 1706 int 1707 nfs4delegreturn(rnode4_t *rp, int flags) 1708 { 1709 struct nfs4_callback_globals *ncg; 1710 1711 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 1712 ASSERT(ncg != NULL); 1713 1714 return (nfs4delegreturn_impl(rp, flags, ncg)); 1715 } 1716 1717 void 1718 nfs4delegreturn_async(rnode4_t *rp, int flags, bool_t trunc) 1719 { 1720 struct cb_recall_pass *pp; 1721 1722 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP); 1723 pp->rp = rp; 1724 pp->flags = flags; 1725 pp->truncate = trunc; 1726 1727 /* 1728 * Fire up a thread to do the actual delegreturn 1729 * Caller must guarantee that the rnode doesn't 1730 * vanish (by calling VN_HOLD). 1731 */ 1732 1733 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0, 1734 minclsyspri); 1735 } 1736 1737 static void 1738 delegreturn_all_thread(rpcprog_t *pp) 1739 { 1740 nfs4_server_t *np; 1741 bool_t found = FALSE; 1742 rpcprog_t prog; 1743 rnode4_t *rp; 1744 vnode_t *vp; 1745 zoneid_t zoneid = getzoneid(); 1746 struct nfs4_callback_globals *ncg; 1747 1748 NFS4_DEBUG(nfs4_drat_debug, 1749 (CE_NOTE, "delereturn_all_thread: prog %d\n", *pp)); 1750 1751 prog = *pp; 1752 kmem_free(pp, sizeof (*pp)); 1753 pp = NULL; 1754 1755 mutex_enter(&nfs4_server_lst_lock); 1756 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 1757 if (np->zoneid == zoneid && np->s_program == prog) { 1758 mutex_enter(&np->s_lock); 1759 found = TRUE; 1760 break; 1761 } 1762 } 1763 mutex_exit(&nfs4_server_lst_lock); 1764 1765 /* 1766 * It's possible that the nfs4_server which was using this 1767 * program number has vanished since this thread is async. 1768 * If so, just return. Your work here is finished, my friend. 1769 */ 1770 if (!found) 1771 goto out; 1772 1773 ncg = np->zone_globals; 1774 while ((rp = list_head(&np->s_deleg_list)) != NULL) { 1775 vp = RTOV4(rp); 1776 VN_HOLD(vp); 1777 mutex_exit(&np->s_lock); 1778 (void) nfs4delegreturn_impl(rp, NFS4_DR_PUSH|NFS4_DR_REOPEN, 1779 ncg); 1780 VN_RELE(vp); 1781 1782 /* retake the s_lock for next trip through the loop */ 1783 mutex_enter(&np->s_lock); 1784 } 1785 mutex_exit(&np->s_lock); 1786 out: 1787 NFS4_DEBUG(nfs4_drat_debug, 1788 (CE_NOTE, "delereturn_all_thread: complete\n")); 1789 zthread_exit(); 1790 } 1791 1792 void 1793 nfs4_delegreturn_all(nfs4_server_t *sp) 1794 { 1795 rpcprog_t pro, *pp; 1796 1797 mutex_enter(&sp->s_lock); 1798 1799 /* Check to see if the delegation list is empty */ 1800 1801 if (list_head(&sp->s_deleg_list) == NULL) { 1802 mutex_exit(&sp->s_lock); 1803 return; 1804 } 1805 /* 1806 * Grab the program number; the async thread will use this 1807 * to find the nfs4_server. 1808 */ 1809 pro = sp->s_program; 1810 mutex_exit(&sp->s_lock); 1811 pp = kmem_alloc(sizeof (rpcprog_t), KM_SLEEP); 1812 *pp = pro; 1813 (void) zthread_create(NULL, 0, delegreturn_all_thread, pp, 0, 1814 minclsyspri); 1815 } 1816 1817 1818 /* 1819 * Discard any delegations 1820 * 1821 * Iterate over the servers s_deleg_list and 1822 * for matching mount-point rnodes discard 1823 * the delegation. 1824 */ 1825 void 1826 nfs4_deleg_discard(mntinfo4_t *mi, nfs4_server_t *sp) 1827 { 1828 rnode4_t *rp, *next; 1829 mntinfo4_t *r_mi; 1830 struct nfs4_callback_globals *ncg; 1831 1832 ASSERT(mutex_owned(&sp->s_lock)); 1833 ncg = sp->zone_globals; 1834 1835 for (rp = list_head(&sp->s_deleg_list); rp != NULL; rp = next) { 1836 r_mi = VTOMI4(RTOV4(rp)); 1837 next = list_next(&sp->s_deleg_list, rp); 1838 1839 if (r_mi != mi) { 1840 /* 1841 * Skip if this rnode is in not on the 1842 * same mount-point 1843 */ 1844 continue; 1845 } 1846 1847 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_READ); 1848 1849 #ifdef DEBUG 1850 if (nfs4_client_recov_debug) { 1851 zprintf(getzoneid(), 1852 "nfs4_deleg_discard: matched rnode %p " 1853 "-- discarding delegation\n", (void *)rp); 1854 } 1855 #endif 1856 mutex_enter(&rp->r_statev4_lock); 1857 /* 1858 * Free the cred originally held when the delegation 1859 * was granted. Also need to decrement the refcnt 1860 * on this server for each delegation we discard 1861 */ 1862 if (rp->r_deleg_cred) 1863 crfree(rp->r_deleg_cred); 1864 rp->r_deleg_cred = NULL; 1865 rp->r_deleg_type = OPEN_DELEGATE_NONE; 1866 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 1867 rp->r_deleg_needs_recall = FALSE; 1868 ASSERT(sp->s_refcnt > 1); 1869 sp->s_refcnt--; 1870 list_remove(&sp->s_deleg_list, rp); 1871 mutex_exit(&rp->r_statev4_lock); 1872 nfs4_dec_state_ref_count_nolock(sp, mi); 1873 ncg->nfs4_callback_stats.delegations.value.ui64--; 1874 } 1875 } 1876 1877 /* 1878 * Reopen any open streams that were covered by the given file's 1879 * delegation. 1880 * Returns zero or an errno value. If there was no error, *recovp 1881 * indicates whether recovery was initiated. 1882 */ 1883 1884 static int 1885 deleg_reopen(vnode_t *vp, bool_t *recovp, struct nfs4_callback_globals *ncg, 1886 int flags) 1887 { 1888 nfs4_open_stream_t *osp; 1889 nfs4_recov_state_t recov_state; 1890 bool_t needrecov = FALSE; 1891 mntinfo4_t *mi; 1892 rnode4_t *rp; 1893 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1894 int claimnull; 1895 1896 mi = VTOMI4(vp); 1897 rp = VTOR4(vp); 1898 1899 recov_state.rs_flags = 0; 1900 recov_state.rs_num_retry_despite_err = 0; 1901 1902 retry: 1903 if ((e.error = nfs4_start_op(mi, vp, NULL, &recov_state)) != 0) { 1904 return (e.error); 1905 } 1906 1907 /* 1908 * if we mean to discard the delegation, it must be BAD, so don't 1909 * use it when doing the reopen or it will fail too. 1910 */ 1911 claimnull = (flags & NFS4_DR_DISCARD); 1912 /* 1913 * Loop through the open streams for this rnode to find 1914 * all of the ones created using the delegation state ID. 1915 * Each of these needs to be re-opened. 1916 */ 1917 1918 while ((osp = get_next_deleg_stream(rp, claimnull)) != NULL) { 1919 1920 if (claimnull) { 1921 nfs4_reopen(vp, osp, &e, CLAIM_NULL, FALSE, FALSE); 1922 } else { 1923 ncg->nfs4_callback_stats.claim_cur.value.ui64++; 1924 1925 nfs4_reopen(vp, osp, &e, CLAIM_DELEGATE_CUR, FALSE, 1926 FALSE); 1927 if (e.error == 0 && e.stat == NFS4_OK) 1928 ncg->nfs4_callback_stats. 1929 claim_cur_ok.value.ui64++; 1930 } 1931 1932 if (e.error == EAGAIN) { 1933 open_stream_rele(osp, rp); 1934 nfs4_end_op(mi, vp, NULL, &recov_state, TRUE); 1935 goto retry; 1936 } 1937 1938 /* 1939 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then 1940 * recovery has already been started inside of nfs4_reopen. 1941 */ 1942 if (e.error == EINTR || e.error == ETIMEDOUT || 1943 NFS4_FRC_UNMT_ERR(e.error, vp->v_vfsp)) { 1944 open_stream_rele(osp, rp); 1945 break; 1946 } 1947 1948 needrecov = nfs4_needs_recovery(&e, TRUE, vp->v_vfsp); 1949 1950 if (e.error != 0 && !needrecov) { 1951 /* 1952 * Recovery is not possible, but don't give up yet; 1953 * we'd still like to do delegreturn after 1954 * reopening as many streams as possible. 1955 * Continue processing the open streams. 1956 */ 1957 1958 ncg->nfs4_callback_stats.recall_failed.value.ui64++; 1959 1960 } else if (needrecov) { 1961 /* 1962 * Start recovery and bail out. The recovery 1963 * thread will take it from here. 1964 */ 1965 (void) nfs4_start_recovery(&e, mi, vp, NULL, NULL, 1966 NULL, OP_OPEN, NULL, NULL, NULL); 1967 open_stream_rele(osp, rp); 1968 *recovp = TRUE; 1969 break; 1970 } 1971 1972 open_stream_rele(osp, rp); 1973 } 1974 1975 nfs4_end_op(mi, vp, NULL, &recov_state, needrecov); 1976 1977 return (e.error); 1978 } 1979 1980 /* 1981 * get_next_deleg_stream - returns the next open stream which 1982 * represents a delegation for this rnode. In order to assure 1983 * forward progress, the caller must guarantee that each open 1984 * stream returned is changed so that a future call won't return 1985 * it again. 1986 * 1987 * There are several ways for the open stream to change. If the open 1988 * stream is !os_delegation, then we aren't interested in it. Also, if 1989 * either os_failed_reopen or !os_valid, then don't return the osp. 1990 * 1991 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return 1992 * the osp if it is an os_delegation open stream. Also, if the rnode still 1993 * has r_deleg_return_pending, then return the os_delegation osp. Lastly, 1994 * if the rnode's r_deleg_stateid is different from the osp's open_stateid, 1995 * then return the osp. 1996 * 1997 * We have already taken the 'r_deleg_recall_lock' as WRITER, which 1998 * prevents new OPENs from going OTW (as start_fop takes this 1999 * lock in READ mode); thus, no new open streams can be created 2000 * (which inherently means no new delegation open streams are 2001 * being created). 2002 */ 2003 2004 static nfs4_open_stream_t * 2005 get_next_deleg_stream(rnode4_t *rp, int claimnull) 2006 { 2007 nfs4_open_stream_t *osp; 2008 2009 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_WRITER)); 2010 2011 /* 2012 * Search through the list of open streams looking for 2013 * one that was created while holding the delegation. 2014 */ 2015 mutex_enter(&rp->r_os_lock); 2016 for (osp = list_head(&rp->r_open_streams); osp != NULL; 2017 osp = list_next(&rp->r_open_streams, osp)) { 2018 mutex_enter(&osp->os_sync_lock); 2019 if (!osp->os_delegation || osp->os_failed_reopen || 2020 !osp->os_valid) { 2021 mutex_exit(&osp->os_sync_lock); 2022 continue; 2023 } 2024 if (!claimnull || rp->r_deleg_return_pending || 2025 !stateid4_cmp(&osp->open_stateid, &rp->r_deleg_stateid)) { 2026 osp->os_ref_count++; 2027 mutex_exit(&osp->os_sync_lock); 2028 mutex_exit(&rp->r_os_lock); 2029 return (osp); 2030 } 2031 mutex_exit(&osp->os_sync_lock); 2032 } 2033 mutex_exit(&rp->r_os_lock); 2034 2035 return (NULL); 2036 } 2037 2038 static void 2039 nfs4delegreturn_thread(struct cb_recall_pass *args) 2040 { 2041 rnode4_t *rp; 2042 vnode_t *vp; 2043 cred_t *cr; 2044 int dtype, error, flags; 2045 bool_t rdirty, rip; 2046 kmutex_t cpr_lock; 2047 callb_cpr_t cpr_info; 2048 struct nfs4_callback_globals *ncg; 2049 2050 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2051 ASSERT(ncg != NULL); 2052 2053 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 2054 2055 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, 2056 "nfsv4delegRtn"); 2057 2058 rp = args->rp; 2059 vp = RTOV4(rp); 2060 2061 mutex_enter(&rp->r_statev4_lock); 2062 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2063 mutex_exit(&rp->r_statev4_lock); 2064 goto out; 2065 } 2066 mutex_exit(&rp->r_statev4_lock); 2067 2068 /* 2069 * Take the read-write lock in read mode to prevent other 2070 * threads from modifying the data during the recall. This 2071 * doesn't affect mmappers. 2072 */ 2073 (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE); 2074 2075 /* Proceed with delegreturn */ 2076 2077 mutex_enter(&rp->r_statev4_lock); 2078 if (rp->r_deleg_type == OPEN_DELEGATE_NONE) { 2079 mutex_exit(&rp->r_statev4_lock); 2080 nfs_rw_exit(&rp->r_rwlock); 2081 goto out; 2082 } 2083 dtype = rp->r_deleg_type; 2084 cr = rp->r_deleg_cred; 2085 ASSERT(cr != NULL); 2086 crhold(cr); 2087 mutex_exit(&rp->r_statev4_lock); 2088 2089 flags = args->flags; 2090 2091 /* 2092 * If the file is being truncated at the server, then throw 2093 * away all of the pages, it doesn't matter what flavor of 2094 * delegation we have. 2095 */ 2096 2097 if (args->truncate) { 2098 ncg->nfs4_callback_stats.recall_trunc.value.ui64++; 2099 nfs4_invalidate_pages(vp, 0, cr); 2100 } else if (dtype == OPEN_DELEGATE_WRITE) { 2101 2102 mutex_enter(&rp->r_statelock); 2103 rdirty = rp->r_flags & R4DIRTY; 2104 mutex_exit(&rp->r_statelock); 2105 2106 if (rdirty) { 2107 error = VOP_PUTPAGE(vp, 0, 0, 0, cr, NULL); 2108 2109 if (error) 2110 CB_WARN1("nfs4delegreturn_thread:" 2111 " VOP_PUTPAGE: %d\n", error); 2112 } 2113 /* turn off NFS4_DR_PUSH because we just did that above. */ 2114 flags &= ~NFS4_DR_PUSH; 2115 } 2116 2117 mutex_enter(&rp->r_statelock); 2118 rip = rp->r_flags & R4RECOVERRP; 2119 mutex_exit(&rp->r_statelock); 2120 2121 /* If a failed recovery is indicated, discard the pages */ 2122 2123 if (rip) { 2124 2125 error = VOP_PUTPAGE(vp, 0, 0, B_INVAL, cr, NULL); 2126 2127 if (error) 2128 CB_WARN1("nfs4delegreturn_thread: VOP_PUTPAGE: %d\n", 2129 error); 2130 } 2131 2132 /* 2133 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass 2134 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again. 2135 */ 2136 flags &= ~NFS4_DR_DID_OP; 2137 2138 (void) nfs4delegreturn_impl(rp, flags, ncg); 2139 2140 nfs_rw_exit(&rp->r_rwlock); 2141 crfree(cr); 2142 out: 2143 kmem_free(args, sizeof (struct cb_recall_pass)); 2144 VN_RELE(vp); 2145 mutex_enter(&cpr_lock); 2146 CALLB_CPR_EXIT(&cpr_info); 2147 mutex_destroy(&cpr_lock); 2148 zthread_exit(); 2149 } 2150 2151 /* 2152 * This function has one assumption that the caller of this function is 2153 * either doing recovery (therefore cannot call nfs4_start_op) or has 2154 * already called nfs4_start_op(). 2155 */ 2156 void 2157 nfs4_delegation_accept(rnode4_t *rp, open_claim_type4 claim, OPEN4res *res, 2158 nfs4_ga_res_t *garp, cred_t *cr) 2159 { 2160 open_read_delegation4 *orp; 2161 open_write_delegation4 *owp; 2162 nfs4_server_t *np; 2163 bool_t already = FALSE; 2164 bool_t recall = FALSE; 2165 bool_t valid_garp = TRUE; 2166 bool_t delegation_granted = FALSE; 2167 bool_t dr_needed = FALSE; 2168 bool_t recov; 2169 int dr_flags = 0; 2170 long mapcnt; 2171 uint_t rflag; 2172 mntinfo4_t *mi; 2173 struct nfs4_callback_globals *ncg; 2174 open_delegation_type4 odt; 2175 2176 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2177 ASSERT(ncg != NULL); 2178 2179 mi = VTOMI4(RTOV4(rp)); 2180 2181 /* 2182 * Accept a delegation granted to the client via an OPEN. 2183 * Set the delegation fields in the rnode and insert the 2184 * rnode onto the list anchored in the nfs4_server_t. The 2185 * proper locking order requires the nfs4_server_t first, 2186 * even though it may not be needed in all cases. 2187 * 2188 * NB: find_nfs4_server returns with s_lock held. 2189 */ 2190 2191 if ((np = find_nfs4_server(mi)) == NULL) 2192 return; 2193 2194 /* grab the statelock too, for examining r_mapcnt */ 2195 mutex_enter(&rp->r_statelock); 2196 mutex_enter(&rp->r_statev4_lock); 2197 2198 if (rp->r_deleg_type == OPEN_DELEGATE_READ || 2199 rp->r_deleg_type == OPEN_DELEGATE_WRITE) 2200 already = TRUE; 2201 2202 odt = res->delegation.delegation_type; 2203 2204 if (odt == OPEN_DELEGATE_READ) { 2205 2206 rp->r_deleg_type = res->delegation.delegation_type; 2207 orp = &res->delegation.open_delegation4_u.read; 2208 rp->r_deleg_stateid = orp->stateid; 2209 rp->r_deleg_perms = orp->permissions; 2210 if (claim == CLAIM_PREVIOUS) 2211 if ((recall = orp->recall) != 0) 2212 dr_needed = TRUE; 2213 2214 delegation_granted = TRUE; 2215 2216 ncg->nfs4_callback_stats.delegations.value.ui64++; 2217 ncg->nfs4_callback_stats.delegaccept_r.value.ui64++; 2218 2219 } else if (odt == OPEN_DELEGATE_WRITE) { 2220 2221 rp->r_deleg_type = res->delegation.delegation_type; 2222 owp = &res->delegation.open_delegation4_u.write; 2223 rp->r_deleg_stateid = owp->stateid; 2224 rp->r_deleg_perms = owp->permissions; 2225 rp->r_deleg_limit = owp->space_limit; 2226 if (claim == CLAIM_PREVIOUS) 2227 if ((recall = owp->recall) != 0) 2228 dr_needed = TRUE; 2229 2230 delegation_granted = TRUE; 2231 2232 if (garp == NULL || !garp->n4g_change_valid) { 2233 valid_garp = FALSE; 2234 rp->r_deleg_change = 0; 2235 rp->r_deleg_change_grant = 0; 2236 } else { 2237 rp->r_deleg_change = garp->n4g_change; 2238 rp->r_deleg_change_grant = garp->n4g_change; 2239 } 2240 mapcnt = rp->r_mapcnt; 2241 rflag = rp->r_flags; 2242 2243 /* 2244 * Update the delegation change attribute if 2245 * there are mappers for the file is dirty. This 2246 * might be the case during recovery after server 2247 * reboot. 2248 */ 2249 if (mapcnt > 0 || rflag & R4DIRTY) 2250 rp->r_deleg_change++; 2251 2252 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, 2253 "nfs4_delegation_accept: r_deleg_change: 0x%x\n", 2254 (int)(rp->r_deleg_change >> 32))); 2255 NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, 2256 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n", 2257 (int)(rp->r_deleg_change_grant >> 32))); 2258 2259 2260 ncg->nfs4_callback_stats.delegations.value.ui64++; 2261 ncg->nfs4_callback_stats.delegaccept_rw.value.ui64++; 2262 } else if (already) { 2263 /* 2264 * No delegation granted. If the rnode currently has 2265 * has one, then consider it tainted and return it. 2266 */ 2267 dr_needed = TRUE; 2268 } 2269 2270 if (delegation_granted) { 2271 /* Add the rnode to the list. */ 2272 if (!already) { 2273 crhold(cr); 2274 rp->r_deleg_cred = cr; 2275 2276 ASSERT(mutex_owned(&np->s_lock)); 2277 list_insert_head(&np->s_deleg_list, rp); 2278 /* added list node gets a reference */ 2279 np->s_refcnt++; 2280 nfs4_inc_state_ref_count_nolock(np, mi); 2281 } 2282 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE; 2283 } 2284 2285 /* 2286 * We've now safely accepted the delegation, if any. Drop the 2287 * locks and figure out what post-processing is needed. We'd 2288 * like to retain r_statev4_lock, but nfs4_server_rele takes 2289 * s_lock which would be a lock ordering violation. 2290 */ 2291 mutex_exit(&rp->r_statev4_lock); 2292 mutex_exit(&rp->r_statelock); 2293 mutex_exit(&np->s_lock); 2294 nfs4_server_rele(np); 2295 2296 /* 2297 * Check to see if we are in recovery. Remember that 2298 * this function is protected by start_op, so a recovery 2299 * cannot begin until we are out of here. 2300 */ 2301 mutex_enter(&mi->mi_lock); 2302 recov = mi->mi_recovflags & MI4_RECOV_ACTIV; 2303 mutex_exit(&mi->mi_lock); 2304 2305 mutex_enter(&rp->r_statev4_lock); 2306 2307 if (nfs4_delegreturn_policy == IMMEDIATE || !valid_garp) 2308 dr_needed = TRUE; 2309 2310 if (dr_needed && rp->r_deleg_return_pending == FALSE) { 2311 if (recov) { 2312 /* 2313 * We cannot call delegreturn from inside 2314 * of recovery or VOP_PUTPAGE will hang 2315 * due to nfs4_start_fop call in 2316 * nfs4write. Use dlistadd to add the 2317 * rnode to the list of rnodes needing 2318 * cleaning. We do not need to do reopen 2319 * here because recov_openfiles will do it. 2320 * In the non-recall case, just discard the 2321 * delegation as it is no longer valid. 2322 */ 2323 if (recall) 2324 dr_flags = NFS4_DR_PUSH; 2325 else 2326 dr_flags = NFS4_DR_PUSH|NFS4_DR_DISCARD; 2327 2328 nfs4_dlistadd(rp, ncg, dr_flags); 2329 dr_flags = 0; 2330 } else { 2331 /* 2332 * Push the modified data back to the server, 2333 * reopen any delegation open streams, and return 2334 * the delegation. Drop the statev4_lock first! 2335 */ 2336 dr_flags = NFS4_DR_PUSH|NFS4_DR_DID_OP|NFS4_DR_REOPEN; 2337 } 2338 } 2339 mutex_exit(&rp->r_statev4_lock); 2340 if (dr_flags) 2341 (void) nfs4delegreturn_impl(rp, dr_flags, ncg); 2342 } 2343 2344 /* 2345 * nfs4delegabandon - Abandon the delegation on an rnode4. This code 2346 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID 2347 * or BADSEQID and the recovery code is unable to recover. Push any 2348 * dirty data back to the server and return the delegation (if any). 2349 */ 2350 2351 void 2352 nfs4delegabandon(rnode4_t *rp) 2353 { 2354 vnode_t *vp; 2355 struct cb_recall_pass *pp; 2356 open_delegation_type4 dt; 2357 2358 mutex_enter(&rp->r_statev4_lock); 2359 dt = rp->r_deleg_type; 2360 mutex_exit(&rp->r_statev4_lock); 2361 2362 if (dt == OPEN_DELEGATE_NONE) 2363 return; 2364 2365 vp = RTOV4(rp); 2366 VN_HOLD(vp); 2367 2368 pp = kmem_alloc(sizeof (struct cb_recall_pass), KM_SLEEP); 2369 pp->rp = rp; 2370 /* 2371 * Recovery on the file has failed and we want to return 2372 * the delegation. We don't want to reopen files and 2373 * nfs4delegreturn_thread() figures out what to do about 2374 * the data. The only thing to do is attempt to return 2375 * the delegation. 2376 */ 2377 pp->flags = 0; 2378 pp->truncate = FALSE; 2379 2380 /* 2381 * Fire up a thread to do the delegreturn; this is 2382 * necessary because we could be inside a GETPAGE or 2383 * PUTPAGE and we cannot do another one. 2384 */ 2385 2386 (void) zthread_create(NULL, 0, nfs4delegreturn_thread, pp, 0, 2387 minclsyspri); 2388 } 2389 2390 static int 2391 wait_for_recall1(vnode_t *vp, nfs4_op_hint_t op, nfs4_recov_state_t *rsp, 2392 int flg) 2393 { 2394 rnode4_t *rp; 2395 int error = 0; 2396 2397 #ifdef lint 2398 op = op; 2399 #endif 2400 2401 if (vp && vp->v_type == VREG) { 2402 rp = VTOR4(vp); 2403 2404 /* 2405 * Take r_deleg_recall_lock in read mode to synchronize 2406 * with delegreturn. 2407 */ 2408 error = nfs_rw_enter_sig(&rp->r_deleg_recall_lock, 2409 RW_READER, INTR4(vp)); 2410 2411 if (error == 0) 2412 rsp->rs_flags |= flg; 2413 2414 } 2415 return (error); 2416 } 2417 2418 void 2419 nfs4_end_op_recall(vnode_t *vp1, vnode_t *vp2, nfs4_recov_state_t *rsp) 2420 { 2421 NFS4_DEBUG(nfs4_recall_debug, 2422 (CE_NOTE, "nfs4_end_op_recall: 0x%p, 0x%p\n", 2423 (void *)vp1, (void *)vp2)); 2424 2425 if (vp2 && rsp->rs_flags & NFS4_RS_RECALL_HELD2) 2426 nfs_rw_exit(&VTOR4(vp2)->r_deleg_recall_lock); 2427 if (vp1 && rsp->rs_flags & NFS4_RS_RECALL_HELD1) 2428 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock); 2429 } 2430 2431 int 2432 wait_for_recall(vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 2433 nfs4_recov_state_t *rsp) 2434 { 2435 int error; 2436 2437 NFS4_DEBUG(nfs4_recall_debug, 2438 (CE_NOTE, "wait_for_recall: 0x%p, 0x%p\n", 2439 (void *)vp1, (void *) vp2)); 2440 2441 rsp->rs_flags &= ~(NFS4_RS_RECALL_HELD1|NFS4_RS_RECALL_HELD2); 2442 2443 if ((error = wait_for_recall1(vp1, op, rsp, NFS4_RS_RECALL_HELD1)) != 0) 2444 return (error); 2445 2446 if ((error = wait_for_recall1(vp2, op, rsp, NFS4_RS_RECALL_HELD2)) 2447 != 0) { 2448 if (rsp->rs_flags & NFS4_RS_RECALL_HELD1) { 2449 nfs_rw_exit(&VTOR4(vp1)->r_deleg_recall_lock); 2450 rsp->rs_flags &= ~NFS4_RS_RECALL_HELD1; 2451 } 2452 2453 return (error); 2454 } 2455 2456 return (0); 2457 } 2458 2459 /* 2460 * nfs4_dlistadd - Add this rnode to a list of rnodes to be 2461 * DELEGRETURN'd at the end of recovery. 2462 */ 2463 2464 static void 2465 nfs4_dlistadd(rnode4_t *rp, struct nfs4_callback_globals *ncg, int flags) 2466 { 2467 struct nfs4_dnode *dp; 2468 2469 ASSERT(mutex_owned(&rp->r_statev4_lock)); 2470 /* 2471 * Mark the delegation as having a return pending. 2472 * This will prevent the use of the delegation stateID 2473 * by read, write, setattr and open. 2474 */ 2475 rp->r_deleg_return_pending = TRUE; 2476 dp = kmem_alloc(sizeof (*dp), KM_SLEEP); 2477 VN_HOLD(RTOV4(rp)); 2478 dp->rnodep = rp; 2479 dp->flags = flags; 2480 mutex_enter(&ncg->nfs4_dlist_lock); 2481 list_insert_head(&ncg->nfs4_dlist, dp); 2482 #ifdef DEBUG 2483 ncg->nfs4_dlistadd_c++; 2484 #endif 2485 mutex_exit(&ncg->nfs4_dlist_lock); 2486 } 2487 2488 /* 2489 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list. 2490 * of files awaiting cleaning. If the override_flags are non-zero 2491 * then use them rather than the flags that were set when the rnode 2492 * was added to the dlist. 2493 */ 2494 static void 2495 nfs4_dlistclean_impl(struct nfs4_callback_globals *ncg, int override_flags) 2496 { 2497 rnode4_t *rp; 2498 struct nfs4_dnode *dp; 2499 int flags; 2500 2501 ASSERT(override_flags == 0 || override_flags == NFS4_DR_DISCARD); 2502 2503 mutex_enter(&ncg->nfs4_dlist_lock); 2504 while ((dp = list_head(&ncg->nfs4_dlist)) != NULL) { 2505 #ifdef DEBUG 2506 ncg->nfs4_dlistclean_c++; 2507 #endif 2508 list_remove(&ncg->nfs4_dlist, dp); 2509 mutex_exit(&ncg->nfs4_dlist_lock); 2510 rp = dp->rnodep; 2511 flags = (override_flags != 0) ? override_flags : dp->flags; 2512 kmem_free(dp, sizeof (*dp)); 2513 (void) nfs4delegreturn_impl(rp, flags, ncg); 2514 VN_RELE(RTOV4(rp)); 2515 mutex_enter(&ncg->nfs4_dlist_lock); 2516 } 2517 mutex_exit(&ncg->nfs4_dlist_lock); 2518 } 2519 2520 void 2521 nfs4_dlistclean(void) 2522 { 2523 struct nfs4_callback_globals *ncg; 2524 2525 ncg = zone_getspecific(nfs4_callback_zone_key, nfs_zone()); 2526 ASSERT(ncg != NULL); 2527 2528 nfs4_dlistclean_impl(ncg, 0); 2529 } 2530