1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <nfs/nfs4_clnt.h> 33 #include <nfs/rnode4.h> 34 #include <sys/systm.h> 35 #include <sys/cmn_err.h> 36 #include <sys/atomic.h> 37 38 static void nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *); 39 static nfs4_open_owner_t *find_freed_open_owner(cred_t *, 40 nfs4_oo_hash_bucket_t *, mntinfo4_t *); 41 static open_delegation_type4 get_dtype(rnode4_t *); 42 43 #ifdef DEBUG 44 int nfs4_client_foo_debug = 0x0; 45 int nfs4_client_open_dg = 0x0; 46 /* 47 * If this is non-zero, the lockowner and openowner seqid sync primitives 48 * will intermittently return errors. 49 */ 50 static int seqid_sync_faults = 0; 51 #endif 52 53 stateid4 clnt_special0 = { 54 0, 55 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 56 }; 57 58 stateid4 clnt_special1 = { 59 0xffffffff, 60 { 61 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 62 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 63 (char)0xff, (char)0xff, (char)0xff, (char)0xff 64 } 65 }; 66 67 /* finds hash bucket and locks it */ 68 static nfs4_oo_hash_bucket_t * 69 lock_bucket(cred_t *cr, mntinfo4_t *mi) 70 { 71 nfs4_oo_hash_bucket_t *bucketp; 72 uint32_t hash_key; 73 74 hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr)) 75 % NFS4_NUM_OO_BUCKETS; 76 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: " 77 "hash_key %d for cred %p", hash_key, (void*)cr)); 78 79 ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS); 80 ASSERT(mi != NULL); 81 ASSERT(mutex_owned(&mi->mi_lock)); 82 83 bucketp = &(mi->mi_oo_list[hash_key]); 84 mutex_enter(&bucketp->b_lock); 85 return (bucketp); 86 } 87 88 /* unlocks hash bucket pointed by bucket_ptr */ 89 static void 90 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp) 91 { 92 mutex_exit(&bucketp->b_lock); 93 } 94 95 /* 96 * Removes the lock owner from the rnode's lock_owners list and frees the 97 * corresponding reference. 98 */ 99 void 100 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop) 101 { 102 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 103 "nfs4_rnode_remove_lock_owner")); 104 105 mutex_enter(&rp->r_statev4_lock); 106 107 if (lop->lo_next_rnode == NULL) { 108 /* already removed from list */ 109 mutex_exit(&rp->r_statev4_lock); 110 return; 111 } 112 113 ASSERT(lop->lo_prev_rnode != NULL); 114 115 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 116 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 117 118 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 119 120 mutex_exit(&rp->r_statev4_lock); 121 122 /* 123 * This would be an appropriate place for 124 * RELEASE_LOCKOWNER. For now, this is overkill 125 * because in the common case, close is going to 126 * release any lockowners anyway. 127 */ 128 lock_owner_rele(lop); 129 } 130 131 /* 132 * Remove all lock owners from the rnode's lock_owners list. Frees up 133 * their references from the list. 134 */ 135 136 void 137 nfs4_flush_lock_owners(rnode4_t *rp) 138 { 139 nfs4_lock_owner_t *lop; 140 141 mutex_enter(&rp->r_statev4_lock); 142 while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) { 143 lop = rp->r_lo_head.lo_next_rnode; 144 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 145 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 146 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 147 lock_owner_rele(lop); 148 } 149 mutex_exit(&rp->r_statev4_lock); 150 } 151 152 void 153 nfs4_clear_open_streams(rnode4_t *rp) 154 { 155 nfs4_open_stream_t *osp; 156 157 mutex_enter(&rp->r_os_lock); 158 while ((osp = list_head(&rp->r_open_streams)) != NULL) { 159 open_owner_rele(osp->os_open_owner); 160 list_remove(&rp->r_open_streams, osp); 161 mutex_destroy(&osp->os_sync_lock); 162 osp->os_open_owner = NULL; 163 kmem_free(osp, sizeof (*osp)); 164 } 165 mutex_exit(&rp->r_os_lock); 166 } 167 168 void 169 open_owner_hold(nfs4_open_owner_t *oop) 170 { 171 mutex_enter(&oop->oo_lock); 172 oop->oo_ref_count++; 173 mutex_exit(&oop->oo_lock); 174 } 175 176 /* 177 * Frees the open owner if the ref count hits zero. 178 */ 179 void 180 open_owner_rele(nfs4_open_owner_t *oop) 181 { 182 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 183 "open_owner_rele")); 184 185 mutex_enter(&oop->oo_lock); 186 oop->oo_ref_count--; 187 if (oop->oo_ref_count == 0) { 188 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 189 "open_owner_rele: freeing open owner")); 190 oop->oo_valid = 0; 191 mutex_exit(&oop->oo_lock); 192 /* 193 * Ok, we don't destroy the open owner, nor do we put it on 194 * the mntinfo4's free list just yet. We are lazy about it 195 * and let callers to find_open_owner() do that to keep locking 196 * simple. 197 */ 198 } else { 199 mutex_exit(&oop->oo_lock); 200 } 201 } 202 203 void 204 open_stream_hold(nfs4_open_stream_t *osp) 205 { 206 mutex_enter(&osp->os_sync_lock); 207 osp->os_ref_count++; 208 mutex_exit(&osp->os_sync_lock); 209 } 210 211 /* 212 * Frees the open stream and removes it from the rnode4's open streams list if 213 * the ref count drops to zero. 214 */ 215 void 216 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp) 217 { 218 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 219 "open_stream_rele")); 220 221 ASSERT(!mutex_owned(&rp->r_os_lock)); 222 223 mutex_enter(&osp->os_sync_lock); 224 ASSERT(osp->os_ref_count > 0); 225 osp->os_ref_count--; 226 if (osp->os_ref_count == 0) { 227 nfs4_open_owner_t *tmp_oop; 228 229 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 230 "open_stream_rele: freeing open stream")); 231 osp->os_valid = 0; 232 tmp_oop = osp->os_open_owner; 233 mutex_exit(&osp->os_sync_lock); 234 235 /* now see if we need to destroy the open owner */ 236 open_owner_rele(tmp_oop); 237 238 mutex_enter(&rp->r_os_lock); 239 list_remove(&rp->r_open_streams, osp); 240 mutex_exit(&rp->r_os_lock); 241 242 /* free up osp */ 243 mutex_destroy(&osp->os_sync_lock); 244 osp->os_open_owner = NULL; 245 kmem_free(osp, sizeof (*osp)); 246 } else { 247 mutex_exit(&osp->os_sync_lock); 248 } 249 } 250 251 void 252 lock_owner_hold(nfs4_lock_owner_t *lop) 253 { 254 mutex_enter(&lop->lo_lock); 255 lop->lo_ref_count++; 256 mutex_exit(&lop->lo_lock); 257 } 258 259 /* 260 * Frees the lock owner if the ref count hits zero and 261 * the structure no longer has no locks. 262 */ 263 void 264 lock_owner_rele(nfs4_lock_owner_t *lop) 265 { 266 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 267 "lock_owner_rele")); 268 269 mutex_enter(&lop->lo_lock); 270 lop->lo_ref_count--; 271 if (lop->lo_ref_count == 0) { 272 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 273 "lock_owner_rele: freeing lock owner: " 274 "%x", lop->lo_pid)); 275 lop->lo_valid = 0; 276 /* 277 * If there are no references, the lock_owner should 278 * already be off the rnode's list. 279 */ 280 ASSERT(lop->lo_next_rnode == NULL); 281 ASSERT(lop->lo_prev_rnode == NULL); 282 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE)); 283 ASSERT(lop->lo_seqid_holder == NULL); 284 mutex_exit(&lop->lo_lock); 285 286 /* free up lop */ 287 cv_destroy(&lop->lo_cv_seqid_sync); 288 mutex_destroy(&lop->lo_lock); 289 kmem_free(lop, sizeof (*lop)); 290 } else { 291 mutex_exit(&lop->lo_lock); 292 } 293 } 294 295 /* 296 * This increments the open owner ref count if found. 297 * The argument 'just_created' determines whether we are looking for open 298 * owners with the 'oo_just_created' flag set or not. 299 */ 300 nfs4_open_owner_t * 301 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi) 302 { 303 nfs4_open_owner_t *oop = NULL, *next_oop; 304 nfs4_oo_hash_bucket_t *bucketp; 305 306 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 307 "find_open_owner: cred %p, just_created %d", 308 (void*)cr, just_created)); 309 310 ASSERT(mi != NULL); 311 ASSERT(mutex_owned(&mi->mi_lock)); 312 313 bucketp = lock_bucket(cr, mi); 314 315 /* got hash bucket, search through open owners */ 316 for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) { 317 mutex_enter(&oop->oo_lock); 318 if (!crcmp(oop->oo_cred, cr) && 319 (oop->oo_just_created == just_created || 320 just_created == NFS4_JUST_CREATED)) { 321 /* match */ 322 if (oop->oo_valid == 0) { 323 /* reactivate the open owner */ 324 oop->oo_valid = 1; 325 ASSERT(oop->oo_ref_count == 0); 326 } 327 oop->oo_ref_count++; 328 mutex_exit(&oop->oo_lock); 329 unlock_bucket(bucketp); 330 return (oop); 331 } 332 next_oop = list_next(&bucketp->b_oo_hash_list, oop); 333 if (oop->oo_valid == 0) { 334 list_remove(&bucketp->b_oo_hash_list, oop); 335 336 /* 337 * Now we go ahead and put this open owner 338 * on the freed list. This is our lazy method. 339 */ 340 nfs4_free_open_owner(oop, mi); 341 } 342 343 mutex_exit(&oop->oo_lock); 344 oop = next_oop; 345 } 346 347 /* search through recently freed open owners */ 348 oop = find_freed_open_owner(cr, bucketp, mi); 349 350 unlock_bucket(bucketp); 351 352 return (oop); 353 } 354 355 nfs4_open_owner_t * 356 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi) 357 { 358 nfs4_open_owner_t *oop; 359 360 mutex_enter(&mi->mi_lock); 361 oop = find_open_owner_nolock(cr, just_created, mi); 362 mutex_exit(&mi->mi_lock); 363 364 return (oop); 365 } 366 367 /* 368 * This increments osp's ref count if found. 369 * Returns with 'os_sync_lock' held. 370 */ 371 nfs4_open_stream_t * 372 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 373 { 374 nfs4_open_stream_t *osp; 375 376 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 377 "find_open_stream")); 378 379 mutex_enter(&rp->r_os_lock); 380 /* Now, no one can add or delete to rp's open streams list */ 381 for (osp = list_head(&rp->r_open_streams); osp != NULL; 382 osp = list_next(&rp->r_open_streams, osp)) { 383 mutex_enter(&osp->os_sync_lock); 384 if (osp->os_open_owner == oop && osp->os_valid != 0) { 385 /* match */ 386 NFS4_DEBUG(nfs4_client_state_debug, 387 (CE_NOTE, "find_open_stream " 388 "got a match")); 389 390 osp->os_ref_count++; 391 mutex_exit(&rp->r_os_lock); 392 return (osp); 393 } 394 mutex_exit(&osp->os_sync_lock); 395 } 396 397 mutex_exit(&rp->r_os_lock); 398 return (NULL); 399 } 400 401 /* 402 * Find the lock owner for the given file and process ID. If "which" is 403 * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid 404 * from the server. 405 * 406 * This increments the lock owner's ref count if found. Returns NULL if 407 * there was no match. 408 */ 409 nfs4_lock_owner_t * 410 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which) 411 { 412 nfs4_lock_owner_t *lop, *next_lop; 413 414 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 415 "find_lock_owner: pid %x, which %d", pid, which)); 416 417 ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID); 418 419 /* search by pid */ 420 mutex_enter(&rp->r_statev4_lock); 421 422 lop = rp->r_lo_head.lo_next_rnode; 423 while (lop != &rp->r_lo_head) { 424 mutex_enter(&lop->lo_lock); 425 if (lop->lo_pid == pid && lop->lo_valid != 0 && 426 !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) { 427 if (which == LOWN_ANY || 428 lop->lo_just_created != NFS4_JUST_CREATED) { 429 /* Found a matching lock owner */ 430 NFS4_DEBUG(nfs4_client_state_debug, 431 (CE_NOTE, "find_lock_owner: " 432 "got a match")); 433 434 lop->lo_ref_count++; 435 mutex_exit(&lop->lo_lock); 436 mutex_exit(&rp->r_statev4_lock); 437 return (lop); 438 } 439 } 440 next_lop = lop->lo_next_rnode; 441 mutex_exit(&lop->lo_lock); 442 lop = next_lop; 443 } 444 445 mutex_exit(&rp->r_statev4_lock); 446 return (NULL); 447 } 448 449 /* 450 * This returns the delegation stateid as 'sid'. Returns 1 if a successful 451 * delegation stateid was found, otherwise returns 0. 452 */ 453 454 static int 455 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid) 456 { 457 ASSERT(!mutex_owned(&rp->r_statev4_lock)); 458 459 mutex_enter(&rp->r_statev4_lock); 460 if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) || 461 (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) && 462 !rp->r_deleg_return_pending) { 463 464 *sid = rp->r_deleg_stateid; 465 mutex_exit(&rp->r_statev4_lock); 466 return (1); 467 } 468 mutex_exit(&rp->r_statev4_lock); 469 return (0); 470 } 471 472 /* 473 * This returns the lock stateid as 'sid'. Returns 1 if a successful lock 474 * stateid was found, otherwise returns 0. 475 */ 476 static int 477 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid) 478 { 479 nfs4_lock_owner_t *lop; 480 481 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 482 483 if (lop) { 484 /* 485 * Found a matching lock owner, so use a lock 486 * stateid rather than an open stateid. 487 */ 488 mutex_enter(&lop->lo_lock); 489 *sid = lop->lock_stateid; 490 mutex_exit(&lop->lo_lock); 491 lock_owner_rele(lop); 492 return (1); 493 } 494 495 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 496 "nfs4_get_lock_stateid: no lop")); 497 return (0); 498 } 499 500 /* 501 * This returns the open stateid as 'sid'. Returns 1 if a successful open 502 * stateid was found, otherwise returns 0. 503 * 504 * Once the stateid is returned to the caller, it is no longer protected; 505 * so the caller must be prepared to handle OLD/BAD_STATEID where 506 * appropiate. 507 */ 508 static int 509 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid) 510 { 511 nfs4_open_owner_t *oop; 512 nfs4_open_stream_t *osp; 513 514 ASSERT(mi != NULL); 515 516 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 517 if (!oop) { 518 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 519 "nfs4_get_open_stateid: no oop")); 520 return (0); 521 } 522 523 osp = find_open_stream(oop, rp); 524 open_owner_rele(oop); 525 if (!osp) { 526 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 527 "nfs4_get_open_stateid: no osp")); 528 return (0); 529 } 530 531 if (osp->os_failed_reopen) { 532 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 533 "nfs4_get_open_stateid: osp %p failed reopen", 534 (void *)osp)); 535 mutex_exit(&osp->os_sync_lock); 536 open_stream_rele(osp, rp); 537 return (0); 538 } 539 *sid = osp->open_stateid; 540 mutex_exit(&osp->os_sync_lock); 541 open_stream_rele(osp, rp); 542 return (1); 543 } 544 545 /* 546 * Returns the delegation stateid if this 'op' is OP_WRITE and the 547 * delegation we hold is a write delegation, OR this 'op' is not 548 * OP_WRITE and we have a delegation held (read or write), otherwise 549 * returns the lock stateid if there is a lock owner, otherwise 550 * returns the open stateid if there is a open stream, otherwise 551 * returns special stateid <seqid = 0, other = 0>. 552 * 553 * Used for WRITE operations. 554 */ 555 stateid4 556 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 557 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp) 558 { 559 stateid4 sid; 560 561 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 562 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 563 sid_tp->cur_sid_type = DEL_SID; 564 return (sid); 565 } 566 } 567 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 568 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 569 sid_tp->cur_sid_type = LOCK_SID; 570 return (sid); 571 } 572 } 573 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 574 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 575 sid_tp->cur_sid_type = OPEN_SID; 576 return (sid); 577 } 578 } 579 bzero(&sid, sizeof (stateid4)); 580 sid_tp->cur_sid_type = SPEC_SID; 581 return (sid); 582 } 583 584 /* 585 * Returns the delegation stateid if this 'op' is OP_WRITE and the 586 * delegation we hold is a write delegation, OR this 'op' is not 587 * OP_WRITE and we have a delegation held (read or write), otherwise 588 * returns the lock stateid if there is a lock owner, otherwise 589 * returns the open stateid if there is a open stream, otherwise 590 * returns special stateid <seqid = 0, other = 0>. 591 * 592 * This also updates which stateid we are using in 'sid_tp', skips 593 * previously attempted stateids, and skips checking higher priority 594 * stateids than the current level as dictated by 'sid_tp->cur_sid_type' 595 * for async reads. 596 * 597 * Used for READ and SETATTR operations. 598 */ 599 stateid4 600 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 601 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read) 602 { 603 stateid4 sid; 604 605 /* 606 * For asynchronous READs, do not attempt to retry from the start of 607 * the stateid priority list, just continue from where you last left 608 * off. 609 */ 610 if (async_read) { 611 switch (sid_tp->cur_sid_type) { 612 case NO_SID: 613 break; 614 case DEL_SID: 615 goto lock_stateid; 616 case LOCK_SID: 617 goto open_stateid; 618 case OPEN_SID: 619 goto special_stateid; 620 case SPEC_SID: 621 default: 622 cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current " 623 "stateid type %d", sid_tp->cur_sid_type); 624 } 625 } 626 627 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 628 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 629 sid_tp->cur_sid_type = DEL_SID; 630 return (sid); 631 } 632 } 633 lock_stateid: 634 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 635 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 636 sid_tp->cur_sid_type = LOCK_SID; 637 return (sid); 638 } 639 } 640 open_stateid: 641 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 642 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 643 sid_tp->cur_sid_type = OPEN_SID; 644 return (sid); 645 } 646 } 647 special_stateid: 648 bzero(&sid, sizeof (stateid4)); 649 sid_tp->cur_sid_type = SPEC_SID; 650 return (sid); 651 } 652 653 void 654 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid) 655 { 656 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 657 "nfs4_set_lock_stateid")); 658 659 ASSERT(lop); 660 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 661 662 mutex_enter(&lop->lo_lock); 663 lop->lock_stateid = stateid; 664 mutex_exit(&lop->lo_lock); 665 } 666 667 /* 668 * Sequence number used when a new open owner is needed. 669 * This is used so as to not confuse the server. Since a open owner 670 * is based off of cred, a cred could be re-used quickly, and the server 671 * may not release all state for a cred. 672 */ 673 static uint64_t open_owner_seq_num = 0; 674 675 uint64_t 676 nfs4_get_new_oo_name(void) 677 { 678 return (atomic_add_64_nv(&open_owner_seq_num, 1)); 679 } 680 681 /* 682 * Create a new open owner and add it to the open owner hash table. 683 */ 684 nfs4_open_owner_t * 685 create_open_owner(cred_t *cr, mntinfo4_t *mi) 686 { 687 nfs4_open_owner_t *oop; 688 nfs4_oo_hash_bucket_t *bucketp; 689 690 oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP); 691 /* 692 * Make sure the cred doesn't go away when we put this open owner 693 * on the free list, as well as make crcmp() a valid check. 694 */ 695 crhold(cr); 696 oop->oo_cred = cr; 697 mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL); 698 oop->oo_ref_count = 1; 699 oop->oo_valid = 1; 700 oop->oo_just_created = NFS4_JUST_CREATED; 701 oop->oo_seqid = 0; 702 oop->oo_seqid_inuse = 0; 703 oop->oo_last_good_seqid = 0; 704 oop->oo_last_good_op = TAG_NONE; 705 oop->oo_cred_otw = NULL; 706 cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 707 708 /* 709 * A Solaris open_owner is <oo_seq_num> 710 */ 711 oop->oo_name = nfs4_get_new_oo_name(); 712 713 /* now add the struct into the cred hash table */ 714 ASSERT(mutex_owned(&mi->mi_lock)); 715 bucketp = lock_bucket(cr, mi); 716 list_insert_head(&bucketp->b_oo_hash_list, oop); 717 unlock_bucket(bucketp); 718 719 return (oop); 720 } 721 722 /* 723 * Create a new open stream and it to the rnode's list. 724 * Increments the ref count on oop. 725 * Returns with 'os_sync_lock' held. 726 */ 727 nfs4_open_stream_t * 728 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 729 { 730 nfs4_open_stream_t *osp; 731 732 #ifdef DEBUG 733 mutex_enter(&oop->oo_lock); 734 ASSERT(oop->oo_seqid_inuse); 735 mutex_exit(&oop->oo_lock); 736 #endif 737 738 osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP); 739 osp->os_open_ref_count = 1; 740 osp->os_mapcnt = 0; 741 osp->os_ref_count = 2; 742 osp->os_valid = 1; 743 osp->os_open_owner = oop; 744 osp->os_orig_oo_name = oop->oo_name; 745 bzero(&osp->open_stateid, sizeof (stateid4)); 746 osp->os_share_acc_read = 0; 747 osp->os_share_acc_write = 0; 748 osp->os_mmap_read = 0; 749 osp->os_mmap_write = 0; 750 osp->os_share_deny_none = 0; 751 osp->os_share_deny_read = 0; 752 osp->os_share_deny_write = 0; 753 osp->os_delegation = 0; 754 osp->os_dc_openacc = 0; 755 osp->os_final_close = 0; 756 osp->os_pending_close = 0; 757 osp->os_failed_reopen = 0; 758 osp->os_force_close = 0; 759 mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL); 760 761 /* open owner gets a reference */ 762 open_owner_hold(oop); 763 764 /* now add the open stream to rp */ 765 mutex_enter(&rp->r_os_lock); 766 mutex_enter(&osp->os_sync_lock); 767 list_insert_head(&rp->r_open_streams, osp); 768 mutex_exit(&rp->r_os_lock); 769 770 return (osp); 771 } 772 773 /* 774 * Returns an open stream with 'os_sync_lock' held. 775 * If the open stream is found (rather than created), its 776 * 'os_open_ref_count' is bumped. 777 * 778 * There is no race with two threads entering this function 779 * and creating two open streams for the same <oop, rp> pair. 780 * This is because the open seqid sync must be acquired, thus 781 * only allowing one thread in at a time. 782 */ 783 nfs4_open_stream_t * 784 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp, 785 int *created_osp) 786 { 787 nfs4_open_stream_t *osp; 788 789 #ifdef DEBUG 790 mutex_enter(&oop->oo_lock); 791 ASSERT(oop->oo_seqid_inuse); 792 mutex_exit(&oop->oo_lock); 793 #endif 794 795 osp = find_open_stream(oop, rp); 796 if (!osp) { 797 osp = create_open_stream(oop, rp); 798 if (osp) 799 *created_osp = 1; 800 } else { 801 *created_osp = 0; 802 osp->os_open_ref_count++; 803 } 804 805 return (osp); 806 } 807 808 static uint64_t lock_owner_seq_num = 0; 809 810 /* 811 * Create a new lock owner and add it to the rnode's list. 812 * Assumes the rnode's r_statev4_lock is held. 813 * The created lock owner has a reference count of 2: one for the list and 814 * one for the caller to use. Returns the lock owner locked down. 815 */ 816 nfs4_lock_owner_t * 817 create_lock_owner(rnode4_t *rp, pid_t pid) 818 { 819 nfs4_lock_owner_t *lop; 820 821 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 822 "create_lock_owner: pid %x", pid)); 823 824 ASSERT(mutex_owned(&rp->r_statev4_lock)); 825 826 lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP); 827 lop->lo_ref_count = 2; 828 lop->lo_valid = 1; 829 bzero(&lop->lock_stateid, sizeof (stateid4)); 830 lop->lo_pid = pid; 831 lop->lock_seqid = 0; 832 lop->lo_pending_rqsts = 0; 833 lop->lo_just_created = NFS4_JUST_CREATED; 834 lop->lo_flags = 0; 835 lop->lo_seqid_holder = NULL; 836 837 /* 838 * A Solaris lock_owner is <seq_num><pid> 839 */ 840 lop->lock_owner_name.ln_seq_num = 841 atomic_add_64_nv(&lock_owner_seq_num, 1); 842 lop->lock_owner_name.ln_pid = pid; 843 844 cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 845 mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL); 846 847 mutex_enter(&lop->lo_lock); 848 849 /* now add the lock owner to rp */ 850 lop->lo_prev_rnode = &rp->r_lo_head; 851 lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode; 852 rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop; 853 rp->r_lo_head.lo_next_rnode = lop; 854 855 return (lop); 856 857 } 858 859 /* 860 * This sets the lock seqid of a lock owner. 861 */ 862 void 863 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop) 864 { 865 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 866 "nfs4_set_lock_seqid")); 867 868 ASSERT(lop != NULL); 869 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 870 871 lop->lock_seqid = seqid; 872 } 873 874 static void 875 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid) 876 { 877 nfs4_lo_name_t *cast_namep; 878 879 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 880 "nfs4_set_new_lock_owner_args")); 881 882 owner->owner_len = sizeof (*cast_namep); 883 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 884 /* 885 * A Solaris lock_owner is <seq_num><pid> 886 */ 887 cast_namep = (nfs4_lo_name_t *)owner->owner_val; 888 cast_namep->ln_seq_num = atomic_add_64_nv(&lock_owner_seq_num, 1); 889 cast_namep->ln_pid = pid; 890 } 891 892 /* 893 * Fill in the lock owner args. 894 */ 895 void 896 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid) 897 { 898 nfs4_lock_owner_t *lop; 899 900 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 901 "nfs4_setlockowner_args")); 902 903 /* This increments lop's ref count */ 904 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 905 906 if (!lop) 907 goto make_up_args; 908 909 mutex_enter(&lop->lo_lock); 910 owner->owner_len = sizeof (lop->lock_owner_name); 911 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 912 bcopy(&lop->lock_owner_name, owner->owner_val, 913 owner->owner_len); 914 mutex_exit(&lop->lo_lock); 915 lock_owner_rele(lop); 916 return; 917 918 make_up_args: 919 nfs4_set_new_lock_owner_args(owner, pid); 920 } 921 922 /* 923 * This ends our use of the open owner's open seqid by setting 924 * the appropiate flags and issuing a cv_signal to wake up another 925 * thread waiting to use the open seqid. 926 */ 927 928 void 929 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop) 930 { 931 mutex_enter(&oop->oo_lock); 932 ASSERT(oop->oo_seqid_inuse); 933 oop->oo_seqid_inuse = 0; 934 cv_broadcast(&oop->oo_cv_seqid_sync); 935 mutex_exit(&oop->oo_lock); 936 } 937 938 /* 939 * This starts our use of the open owner's open seqid by setting 940 * the oo_seqid_inuse to true. We will wait (forever) with a 941 * cv_wait() until we are woken up. 942 * 943 * Return values: 944 * 0 no problems 945 * EAGAIN caller should retry (like a recovery retry) 946 */ 947 int 948 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi) 949 { 950 int error = 0; 951 #ifdef DEBUG 952 static int ops = 0; /* fault injection */ 953 #endif 954 955 #ifdef DEBUG 956 if (seqid_sync_faults && curthread != mi->mi_recovthread && 957 ++ops % 5 == 0) 958 return (EAGAIN); 959 #endif 960 961 mutex_enter(&mi->mi_lock); 962 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 963 curthread != mi->mi_recovthread) 964 error = EAGAIN; 965 mutex_exit(&mi->mi_lock); 966 if (error != 0) 967 goto done; 968 969 mutex_enter(&oop->oo_lock); 970 971 while (oop->oo_seqid_inuse) { 972 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 973 "nfs4_start_open_seqid_sync waiting on cv")); 974 975 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock); 976 } 977 978 oop->oo_seqid_inuse = 1; 979 980 mutex_exit(&oop->oo_lock); 981 982 mutex_enter(&mi->mi_lock); 983 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 984 curthread != mi->mi_recovthread) 985 error = EAGAIN; 986 mutex_exit(&mi->mi_lock); 987 988 if (error == EAGAIN) 989 nfs4_end_open_seqid_sync(oop); 990 991 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 992 "nfs4_start_open_seqid_sync: error=%d", error)); 993 994 done: 995 return (error); 996 } 997 998 #ifdef DEBUG 999 int bypass_otw[2]; 1000 #endif 1001 1002 /* 1003 * Checks to see if the OPEN OTW is necessary that is, if it's already 1004 * been opened with the same access and deny bits we are now asking for. 1005 * Note, this assumes that *vpp is a rnode. 1006 */ 1007 int 1008 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp, 1009 int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp) 1010 { 1011 rnode4_t *rp; 1012 nfs4_open_stream_t *osp; 1013 open_delegation_type4 dt; 1014 1015 rp = VTOR4(vp); 1016 1017 /* 1018 * Grab the delegation type. This function is protected against 1019 * the delegation being returned by virtue of start_op (called 1020 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode, 1021 * delegreturn requires this lock in write mode to proceed. 1022 */ 1023 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER)); 1024 dt = get_dtype(rp); 1025 1026 /* returns with 'os_sync_lock' held */ 1027 osp = find_open_stream(oop, rp); 1028 1029 if (osp) { 1030 uint32_t do_otw = 0; 1031 1032 if (osp->os_failed_reopen) { 1033 NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE, 1034 "nfs4_is_otw_open_necessary: os_failed_reopen " 1035 "set on osp %p, cr %p, rp %s", (void *)osp, 1036 (void *)osp->os_open_owner->oo_cred, 1037 rnode4info(rp))); 1038 do_otw = 1; 1039 } 1040 1041 /* 1042 * check access/deny bits 1043 */ 1044 if (!do_otw && (flag & FREAD)) 1045 if (osp->os_share_acc_read == 0 && 1046 dt == OPEN_DELEGATE_NONE) 1047 do_otw = 1; 1048 1049 if (!do_otw && (flag & FWRITE)) 1050 if (osp->os_share_acc_write == 0 && 1051 dt != OPEN_DELEGATE_WRITE) 1052 do_otw = 1; 1053 1054 if (!do_otw) { 1055 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1056 "nfs4_is_otw_open_necessary: can skip this " 1057 "open OTW")); 1058 if (!just_been_created) { 1059 osp->os_open_ref_count++; 1060 if (flag & FREAD) 1061 osp->os_share_acc_read++; 1062 if (flag & FWRITE) 1063 osp->os_share_acc_write++; 1064 osp->os_share_deny_none++; 1065 } 1066 1067 /* 1068 * Need to reset this bitfield for the possible case 1069 * where we were going to OTW CLOSE the file, got a 1070 * non-recoverable error, and before we could retry 1071 * the CLOSE, OPENed the file again. 1072 */ 1073 ASSERT(osp->os_open_owner->oo_seqid_inuse); 1074 osp->os_final_close = 0; 1075 osp->os_force_close = 0; 1076 1077 mutex_exit(&osp->os_sync_lock); 1078 open_stream_rele(osp, rp); 1079 1080 #ifdef DEBUG 1081 bypass_otw[0]++; 1082 #endif 1083 1084 *errorp = 0; 1085 return (0); 1086 } 1087 mutex_exit(&osp->os_sync_lock); 1088 open_stream_rele(osp, rp); 1089 1090 } else if (dt != OPEN_DELEGATE_NONE) { 1091 /* 1092 * Even if there isn't an open_stream yet, we may still be 1093 * able to bypass the otw open if the client owns a delegation. 1094 * 1095 * If you are asking for for WRITE, but I only have 1096 * a read delegation, then you still have to go otw. 1097 */ 1098 1099 if (flag & FWRITE && dt == OPEN_DELEGATE_READ) 1100 return (1); 1101 1102 /* 1103 * TODO - evaluate the nfsace4 1104 */ 1105 1106 /* 1107 * Check the access flags to make sure the caller 1108 * had permission. 1109 */ 1110 if (flag & FREAD && !(acc & VREAD)) 1111 return (1); 1112 1113 if (flag & FWRITE && !(acc & VWRITE)) 1114 return (1); 1115 1116 /* 1117 * create_open_stream will add a reference to oop, 1118 * this will prevent the open_owner_rele done in 1119 * nfs4open_otw from destroying the open_owner. 1120 */ 1121 1122 /* returns with 'os_sync_lock' held */ 1123 osp = create_open_stream(oop, rp); 1124 if (osp == NULL) 1125 return (1); 1126 1127 osp->open_stateid = rp->r_deleg_stateid; 1128 osp->os_delegation = 1; 1129 1130 if (flag & FREAD) 1131 osp->os_share_acc_read++; 1132 if (flag & FWRITE) 1133 osp->os_share_acc_write++; 1134 1135 osp->os_share_deny_none++; 1136 mutex_exit(&osp->os_sync_lock); 1137 1138 open_stream_rele(osp, rp); 1139 1140 mutex_enter(&oop->oo_lock); 1141 oop->oo_just_created = NFS4_PERM_CREATED; 1142 mutex_exit(&oop->oo_lock); 1143 1144 ASSERT(rsp != NULL); 1145 if (rsp->rs_sp != NULL) { 1146 mutex_enter(&rsp->rs_sp->s_lock); 1147 nfs4_inc_state_ref_count_nolock(rsp->rs_sp, 1148 VTOMI4(vp)); 1149 mutex_exit(&rsp->rs_sp->s_lock); 1150 } 1151 #ifdef DEBUG 1152 bypass_otw[1]++; 1153 #endif 1154 1155 *errorp = 0; 1156 return (0); 1157 } 1158 1159 return (1); 1160 } 1161 1162 static open_delegation_type4 1163 get_dtype(rnode4_t *rp) 1164 { 1165 open_delegation_type4 dt; 1166 1167 mutex_enter(&rp->r_statev4_lock); 1168 ASSERT(!rp->r_deleg_return_inprog); 1169 if (rp->r_deleg_return_pending) 1170 dt = OPEN_DELEGATE_NONE; 1171 else 1172 dt = rp->r_deleg_type; 1173 mutex_exit(&rp->r_statev4_lock); 1174 1175 return (dt); 1176 } 1177 1178 /* 1179 * Fill in *locker with the lock state arguments for a LOCK call. If 1180 * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL. 1181 * Caller must already hold the necessary seqid sync lock(s). 1182 */ 1183 1184 void 1185 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop, 1186 nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker) 1187 { 1188 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1189 if (lop->lo_just_created == NFS4_JUST_CREATED) { 1190 /* this is a new lock request */ 1191 open_to_lock_owner4 *nown; 1192 1193 ASSERT(oop != NULL); 1194 ASSERT(osp != NULL); 1195 1196 locker->new_lock_owner = TRUE; 1197 nown = &locker->locker4_u.open_owner; 1198 nown->open_seqid = nfs4_get_open_seqid(oop) + 1; 1199 mutex_enter(&osp->os_sync_lock); 1200 nown->open_stateid = osp->open_stateid; 1201 mutex_exit(&osp->os_sync_lock); 1202 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */ 1203 1204 nown->lock_owner.clientid = clientid; 1205 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name); 1206 nown->lock_owner.owner_val = 1207 kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP); 1208 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val, 1209 nown->lock_owner.owner_len); 1210 } else { 1211 exist_lock_owner4 *eown; 1212 /* have an existing lock owner */ 1213 1214 locker->new_lock_owner = FALSE; 1215 eown = &locker->locker4_u.lock_owner; 1216 mutex_enter(&lop->lo_lock); 1217 eown->lock_stateid = lop->lock_stateid; 1218 mutex_exit(&lop->lo_lock); 1219 eown->lock_seqid = lop->lock_seqid + 1; 1220 } 1221 } 1222 1223 /* 1224 * This starts our use of the lock owner's lock seqid by setting 1225 * the lo_flags to NFS4_LOCK_SEQID_INUSE. We will wait (forever) 1226 * with a cv_wait() until we are woken up. 1227 * 1228 * Return values: 1229 * 0 no problems 1230 * EAGAIN caller should retry (like a recovery retry) 1231 */ 1232 int 1233 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi) 1234 { 1235 int error = 0; 1236 #ifdef DEBUG 1237 static int ops = 0; /* fault injection */ 1238 #endif 1239 1240 #ifdef DEBUG 1241 if (seqid_sync_faults && curthread != mi->mi_recovthread && 1242 ++ops % 7 == 0) 1243 return (EAGAIN); 1244 #endif 1245 1246 mutex_enter(&mi->mi_lock); 1247 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1248 curthread != mi->mi_recovthread) 1249 error = EAGAIN; 1250 mutex_exit(&mi->mi_lock); 1251 if (error != 0) 1252 goto done; 1253 1254 mutex_enter(&lop->lo_lock); 1255 1256 ASSERT(lop->lo_seqid_holder != curthread); 1257 while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) { 1258 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1259 "nfs4_start_lock_seqid_sync: waiting on cv")); 1260 1261 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock); 1262 } 1263 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: " 1264 "NFS4_LOCK_SEQID_INUSE")); 1265 1266 lop->lo_flags |= NFS4_LOCK_SEQID_INUSE; 1267 lop->lo_seqid_holder = curthread; 1268 mutex_exit(&lop->lo_lock); 1269 1270 mutex_enter(&mi->mi_lock); 1271 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1272 curthread != mi->mi_recovthread) 1273 error = EAGAIN; 1274 mutex_exit(&mi->mi_lock); 1275 1276 if (error == EAGAIN) 1277 nfs4_end_lock_seqid_sync(lop); 1278 1279 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1280 "nfs4_start_lock_seqid_sync: error=%d", error)); 1281 1282 done: 1283 return (error); 1284 } 1285 1286 /* 1287 * This ends our use of the lock owner's lock seqid by setting 1288 * the appropiate flags and issuing a cv_signal to wake up another 1289 * thread waiting to use the lock seqid. 1290 */ 1291 void 1292 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop) 1293 { 1294 mutex_enter(&lop->lo_lock); 1295 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1296 ASSERT(lop->lo_seqid_holder == curthread); 1297 lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE; 1298 lop->lo_seqid_holder = NULL; 1299 cv_broadcast(&lop->lo_cv_seqid_sync); 1300 mutex_exit(&lop->lo_lock); 1301 } 1302 1303 /* 1304 * Returns a reference to a lock owner via lopp, which has its lock seqid 1305 * synchronization started. 1306 * If the lock owner is in the 'just_created' state, then we return its open 1307 * owner and open stream and start the open seqid synchronization. 1308 * 1309 * Return value: 1310 * NFS4_OK no problems 1311 * NFS4ERR_DELAY there is lost state to recover; caller should retry 1312 * NFS4ERR_IO no open stream 1313 */ 1314 nfsstat4 1315 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr, 1316 nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp, 1317 nfs4_lock_owner_t **lopp) 1318 { 1319 nfs4_lock_owner_t *lop, *next_lop; 1320 mntinfo4_t *mi; 1321 int error = 0; 1322 nfsstat4 stat; 1323 1324 mi = VTOMI4(RTOV4(rp)); 1325 1326 mutex_enter(&rp->r_statev4_lock); 1327 1328 lop = rp->r_lo_head.lo_next_rnode; 1329 while (lop != &rp->r_lo_head) { 1330 mutex_enter(&lop->lo_lock); 1331 if (lop->lo_pid == pid && lop->lo_valid != 0) { 1332 /* Found a matching lock owner */ 1333 NFS4_DEBUG(nfs4_client_state_debug, 1334 (CE_NOTE, "nfs4_find_or_create_lock_owner: " 1335 "got a match")); 1336 lop->lo_ref_count++; 1337 break; 1338 } 1339 next_lop = lop->lo_next_rnode; 1340 mutex_exit(&lop->lo_lock); 1341 lop = next_lop; 1342 } 1343 1344 if (lop == &rp->r_lo_head) { 1345 /* create temporary lock owner */ 1346 lop = create_lock_owner(rp, pid); 1347 } 1348 mutex_exit(&rp->r_statev4_lock); 1349 1350 /* Have a locked down lock owner struct now */ 1351 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1352 /* This is an existing lock owner */ 1353 *oopp = NULL; 1354 *ospp = NULL; 1355 } else { 1356 /* Lock owner doesn't exist yet */ 1357 1358 /* First grab open owner seqid synchronization */ 1359 mutex_exit(&lop->lo_lock); 1360 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1361 if (*oopp == NULL) 1362 goto kill_new_lop; 1363 error = nfs4_start_open_seqid_sync(*oopp, mi); 1364 if (error == EAGAIN) { 1365 stat = NFS4ERR_DELAY; 1366 goto failed; 1367 } 1368 *ospp = find_open_stream(*oopp, rp); 1369 if (*ospp == NULL) { 1370 nfs4_end_open_seqid_sync(*oopp); 1371 goto kill_new_lop; 1372 } 1373 if ((*ospp)->os_failed_reopen) { 1374 mutex_exit(&(*ospp)->os_sync_lock); 1375 NFS4_DEBUG((nfs4_open_stream_debug || 1376 nfs4_client_lock_debug), (CE_NOTE, 1377 "nfs4_find_or_create_lock_owner: os_failed_reopen;" 1378 "osp %p, cr %p, rp %s", (void *)(*ospp), 1379 (void *)cr, rnode4info(rp))); 1380 nfs4_end_open_seqid_sync(*oopp); 1381 stat = NFS4ERR_IO; 1382 goto failed; 1383 } 1384 mutex_exit(&(*ospp)->os_sync_lock); 1385 1386 /* 1387 * Now see if the lock owner has become permanent while we 1388 * had released our lock. 1389 */ 1390 mutex_enter(&lop->lo_lock); 1391 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1392 nfs4_end_open_seqid_sync(*oopp); 1393 open_stream_rele(*ospp, rp); 1394 open_owner_rele(*oopp); 1395 *oopp = NULL; 1396 *ospp = NULL; 1397 } 1398 } 1399 mutex_exit(&lop->lo_lock); 1400 1401 error = nfs4_start_lock_seqid_sync(lop, mi); 1402 if (error == EAGAIN) { 1403 if (*oopp != NULL) 1404 nfs4_end_open_seqid_sync(*oopp); 1405 stat = NFS4ERR_DELAY; 1406 goto failed; 1407 } 1408 ASSERT(error == 0); 1409 1410 *lopp = lop; 1411 return (NFS4_OK); 1412 1413 kill_new_lop: 1414 /* 1415 * A previous CLOSE was attempted but got EINTR, but the application 1416 * continued to use the unspecified state file descriptor. But now the 1417 * open stream is gone (which could also destroy the open owner), hence 1418 * we can no longer continue. The calling function should return EIO 1419 * to the application. 1420 */ 1421 NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug, 1422 (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created " 1423 "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp), 1424 (void *)(*ospp))); 1425 1426 nfs4_rnode_remove_lock_owner(rp, lop); 1427 stat = NFS4ERR_IO; 1428 1429 failed: 1430 lock_owner_rele(lop); 1431 if (*oopp) { 1432 open_owner_rele(*oopp); 1433 *oopp = NULL; 1434 } 1435 if (*ospp) { 1436 open_stream_rele(*ospp, rp); 1437 *ospp = NULL; 1438 } 1439 return (stat); 1440 } 1441 1442 /* 1443 * This function grabs a recently freed open owner off of the freed open 1444 * owner list if there is a match on the cred 'cr'. It returns NULL if no 1445 * such match is found. It will set the 'oo_ref_count' and 'oo_valid' back 1446 * to both 1 (sane values) in the case a match is found. 1447 */ 1448 static nfs4_open_owner_t * 1449 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp, 1450 mntinfo4_t *mi) 1451 { 1452 nfs4_open_owner_t *foop; 1453 1454 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1455 "find_freed_open_owner: cred %p", (void*)cr)); 1456 1457 ASSERT(mutex_owned(&mi->mi_lock)); 1458 ASSERT(mutex_owned(&bucketp->b_lock)); 1459 1460 /* got hash bucket, search through freed open owners */ 1461 for (foop = list_head(&mi->mi_foo_list); foop != NULL; 1462 foop = list_next(&mi->mi_foo_list, foop)) { 1463 if (!crcmp(foop->oo_cred, cr)) { 1464 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1465 "find_freed_open_owner: got a match open owner " 1466 "%p", (void *)foop)); 1467 foop->oo_ref_count = 1; 1468 foop->oo_valid = 1; 1469 list_remove(&mi->mi_foo_list, foop); 1470 mi->mi_foo_num--; 1471 1472 /* now add the struct into the cred hash table */ 1473 list_insert_head(&bucketp->b_oo_hash_list, foop); 1474 return (foop); 1475 } 1476 } 1477 1478 return (NULL); 1479 } 1480 1481 /* 1482 * Insert the newly freed 'oop' into the mi's freed oop list, 1483 * always at the head of the list. If we've already reached 1484 * our maximum allowed number of freed open owners (mi_foo_max), 1485 * then remove the LRU open owner on the list (namely the tail). 1486 */ 1487 static void 1488 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi) 1489 { 1490 nfs4_open_owner_t *lru_foop; 1491 1492 if (mi->mi_foo_num < mi->mi_foo_max) { 1493 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1494 "nfs4_free_open_owner: num free %d, max free %d, " 1495 "insert open owner %p for mntinfo4 %p", 1496 mi->mi_foo_num, mi->mi_foo_max, (void *)oop, 1497 (void *)mi)); 1498 list_insert_head(&mi->mi_foo_list, oop); 1499 mi->mi_foo_num++; 1500 return; 1501 } 1502 1503 /* need to replace a freed open owner */ 1504 1505 lru_foop = list_tail(&mi->mi_foo_list); 1506 1507 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1508 "nfs4_free_open_owner: destroy %p, insert %p", 1509 (void *)lru_foop, (void *)oop)); 1510 1511 list_remove(&mi->mi_foo_list, lru_foop); 1512 nfs4_destroy_open_owner(lru_foop); 1513 1514 /* head always has latest freed oop */ 1515 list_insert_head(&mi->mi_foo_list, oop); 1516 } 1517 1518 void 1519 nfs4_destroy_open_owner(nfs4_open_owner_t *oop) 1520 { 1521 ASSERT(oop != NULL); 1522 1523 crfree(oop->oo_cred); 1524 if (oop->oo_cred_otw) 1525 crfree(oop->oo_cred_otw); 1526 mutex_destroy(&oop->oo_lock); 1527 cv_destroy(&oop->oo_cv_seqid_sync); 1528 kmem_free(oop, sizeof (*oop)); 1529 } 1530 1531 seqid4 1532 nfs4_get_open_seqid(nfs4_open_owner_t *oop) 1533 { 1534 ASSERT(oop->oo_seqid_inuse); 1535 return (oop->oo_seqid); 1536 } 1537 1538 /* 1539 * This set's the open seqid for a <open owner/ mntinfo4> pair. 1540 */ 1541 void 1542 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop, 1543 nfs4_tag_type_t tag_type) 1544 { 1545 ASSERT(oop->oo_seqid_inuse); 1546 oop->oo_seqid = seqid; 1547 oop->oo_last_good_seqid = seqid; 1548 oop->oo_last_good_op = tag_type; 1549 } 1550 1551 /* 1552 * This bumps the current open seqid for the open owner 'oop'. 1553 */ 1554 void 1555 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop, 1556 nfs4_tag_type_t tag_type) 1557 { 1558 ASSERT(oop->oo_seqid_inuse); 1559 oop->oo_seqid++; 1560 oop->oo_last_good_seqid = oop->oo_seqid; 1561 oop->oo_last_good_op = tag_type; 1562 } 1563 1564 /* 1565 * If no open owner was provided, this function takes the cred to find an 1566 * open owner within the given mntinfo4_t. Either way we return the 1567 * open owner's OTW credential if it exists; otherwise returns the 1568 * supplied 'cr'. 1569 * 1570 * A hold is put on the returned credential, and it is up to the caller 1571 * to free the cred. 1572 */ 1573 cred_t * 1574 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop) 1575 { 1576 cred_t *ret_cr; 1577 nfs4_open_owner_t *oop = provided_oop; 1578 1579 if (oop == NULL) 1580 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1581 if (oop != NULL) { 1582 mutex_enter(&oop->oo_lock); 1583 if (oop->oo_cred_otw) 1584 ret_cr = oop->oo_cred_otw; 1585 else 1586 ret_cr = cr; 1587 crhold(ret_cr); 1588 mutex_exit(&oop->oo_lock); 1589 if (provided_oop == NULL) 1590 open_owner_rele(oop); 1591 } else { 1592 ret_cr = cr; 1593 crhold(ret_cr); 1594 } 1595 return (ret_cr); 1596 } 1597 1598 /* 1599 * Retrieves the next open stream in the rnode's list if an open stream 1600 * is provided; otherwise gets the first open stream in the list. 1601 * The open owner for that open stream is then retrieved, and if its 1602 * oo_cred_otw exists then it is returned; otherwise the provided 'cr' 1603 * is returned. *osp is set to the 'found' open stream. 1604 * 1605 * Note: we don't set *osp to the open stream retrieved via the 1606 * optimized check since that won't necessarily be at the beginning 1607 * of the rnode list, and if that osp doesn't work we'd like to 1608 * check _all_ open streams (starting from the beginning of the 1609 * rnode list). 1610 */ 1611 cred_t * 1612 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr, 1613 nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time) 1614 { 1615 nfs4_open_stream_t *next_osp = NULL; 1616 cred_t *ret_cr; 1617 1618 ASSERT(cr != NULL); 1619 /* 1620 * As an optimization, try to find the open owner 1621 * for the cred provided since that's most likely 1622 * to work. 1623 */ 1624 if (*first_time) { 1625 nfs4_open_owner_t *oop; 1626 1627 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp))); 1628 if (oop) { 1629 next_osp = find_open_stream(oop, rp); 1630 if (next_osp) 1631 mutex_exit(&next_osp->os_sync_lock); 1632 open_owner_rele(oop); 1633 } else { 1634 next_osp = NULL; 1635 } 1636 } else { 1637 int delay_rele = 0; 1638 1639 /* return the next open stream for this rnode */ 1640 mutex_enter(&rp->r_os_lock); 1641 /* Now, no one can add or delete to rp's open streams list */ 1642 1643 if (*osp) { 1644 next_osp = list_next(&rp->r_open_streams, *osp); 1645 /* 1646 * Delay the rele of *osp until after we drop 1647 * r_os_lock to not deadlock with oo_lock 1648 * via an open_stream_rele()->open_owner_rele(). 1649 */ 1650 delay_rele = 1; 1651 } else { 1652 next_osp = list_head(&rp->r_open_streams); 1653 } 1654 if (next_osp) { 1655 nfs4_open_stream_t *tmp_osp; 1656 1657 /* find the next valid open stream */ 1658 mutex_enter(&next_osp->os_sync_lock); 1659 while (next_osp && !next_osp->os_valid) { 1660 tmp_osp = 1661 list_next(&rp->r_open_streams, next_osp); 1662 mutex_exit(&next_osp->os_sync_lock); 1663 next_osp = tmp_osp; 1664 if (next_osp) 1665 mutex_enter(&next_osp->os_sync_lock); 1666 } 1667 if (next_osp) { 1668 next_osp->os_ref_count++; 1669 mutex_exit(&next_osp->os_sync_lock); 1670 } 1671 } 1672 mutex_exit(&rp->r_os_lock); 1673 if (delay_rele) 1674 open_stream_rele(*osp, rp); 1675 } 1676 1677 if (next_osp) { 1678 nfs4_open_owner_t *oop; 1679 1680 oop = next_osp->os_open_owner; 1681 mutex_enter(&oop->oo_lock); 1682 if (oop->oo_cred_otw) 1683 ret_cr = oop->oo_cred_otw; 1684 else 1685 ret_cr = cr; 1686 crhold(ret_cr); 1687 mutex_exit(&oop->oo_lock); 1688 if (*first_time) { 1689 open_stream_rele(next_osp, rp); 1690 *osp = NULL; 1691 } else 1692 *osp = next_osp; 1693 } else { 1694 /* just return the cred provided to us */ 1695 if (*first_time != TRUE) 1696 *last_time = TRUE; 1697 *osp = NULL; 1698 ret_cr = cr; 1699 crhold(ret_cr); 1700 } 1701 1702 if (*first_time) 1703 *first_time = FALSE; 1704 return (ret_cr); 1705 } 1706 1707 void 1708 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp) 1709 { 1710 bzero(&sid_tp->d_sid, sizeof (stateid4)); 1711 bzero(&sid_tp->l_sid, sizeof (stateid4)); 1712 bzero(&sid_tp->o_sid, sizeof (stateid4)); 1713 sid_tp->cur_sid_type = NO_SID; 1714 } 1715 1716 void 1717 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp) 1718 { 1719 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1720 "nfs4_save_stateid: saved %s stateid", 1721 sid_tp->cur_sid_type == DEL_SID ? "delegation" : 1722 sid_tp->cur_sid_type == LOCK_SID ? "lock" : 1723 sid_tp->cur_sid_type == OPEN_SID ? "open" : "special")); 1724 1725 switch (sid_tp->cur_sid_type) { 1726 case DEL_SID: 1727 sid_tp->d_sid = *s1; 1728 break; 1729 case LOCK_SID: 1730 sid_tp->l_sid = *s1; 1731 break; 1732 case OPEN_SID: 1733 sid_tp->o_sid = *s1; 1734 break; 1735 case SPEC_SID: 1736 default: 1737 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal " 1738 "stateid type %d", sid_tp->cur_sid_type); 1739 } 1740 } 1741 1742 /* 1743 * We got NFS4ERR_BAD_SEQID. Setup some arguments to pass to recovery. 1744 * Caller is responsible for freeing. 1745 */ 1746 nfs4_bseqid_entry_t * 1747 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop, 1748 vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid) 1749 { 1750 nfs4_bseqid_entry_t *bsep; 1751 1752 bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP); 1753 bsep->bs_oop = oop; 1754 bsep->bs_lop = lop; 1755 bsep->bs_vp = vp; 1756 bsep->bs_pid = pid; 1757 bsep->bs_tag = tag; 1758 bsep->bs_seqid = seqid; 1759 1760 return (bsep); 1761 } 1762 1763 void 1764 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 1765 nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr, 1766 vnode_t *vp, int access_close, int deny_close) 1767 { 1768 lost_rqstp->lr_putfirst = FALSE; 1769 1770 ASSERT(vp != NULL); 1771 if (error == ETIMEDOUT || error == EINTR || 1772 NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 1773 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 1774 "nfs4open_dg_save_lost_rqst: error %d", error)); 1775 1776 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE; 1777 /* 1778 * The vp is held and rele'd via the recovery code. 1779 * See nfs4_save_lost_rqst. 1780 */ 1781 lost_rqstp->lr_vp = vp; 1782 lost_rqstp->lr_dvp = NULL; 1783 lost_rqstp->lr_oop = oop; 1784 lost_rqstp->lr_osp = osp; 1785 lost_rqstp->lr_lop = NULL; 1786 lost_rqstp->lr_cr = cr; 1787 lost_rqstp->lr_flk = NULL; 1788 lost_rqstp->lr_dg_acc = access_close; 1789 lost_rqstp->lr_dg_deny = deny_close; 1790 lost_rqstp->lr_putfirst = FALSE; 1791 } else { 1792 lost_rqstp->lr_op = 0; 1793 } 1794 } 1795 1796 /* 1797 * Change the access and deny bits of an OPEN. 1798 * If recovery is needed, *recov_credpp is set to the cred used OTW, 1799 * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW. 1800 */ 1801 void 1802 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop, 1803 nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp, 1804 nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp) 1805 { 1806 mntinfo4_t *mi; 1807 int downgrade_acc, downgrade_deny; 1808 int new_acc, new_deny; 1809 COMPOUND4args_clnt args; 1810 COMPOUND4res_clnt res; 1811 OPEN_DOWNGRADE4res *odg_res; 1812 nfs_argop4 argop[3]; 1813 nfs_resop4 *resop; 1814 rnode4_t *rp; 1815 bool_t needrecov = FALSE; 1816 int doqueue = 1; 1817 seqid4 seqid = 0; 1818 cred_t *cred_otw; 1819 hrtime_t t; 1820 1821 ASSERT(mutex_owned(&osp->os_sync_lock)); 1822 #if DEBUG 1823 mutex_enter(&oop->oo_lock); 1824 ASSERT(oop->oo_seqid_inuse); 1825 mutex_exit(&oop->oo_lock); 1826 #endif 1827 1828 1829 if (access_close == 0 && deny_close == 0) { 1830 nfs4_error_zinit(ep); 1831 return; 1832 } 1833 1834 cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop); 1835 1836 cred_retry: 1837 nfs4_error_zinit(ep); 1838 downgrade_acc = 0; 1839 downgrade_deny = 0; 1840 mi = VTOMI4(vp); 1841 rp = VTOR4(vp); 1842 1843 /* 1844 * Check to see if the open stream got closed before we go OTW, 1845 * now that we have acquired the 'os_sync_lock'. 1846 */ 1847 if (!osp->os_valid) { 1848 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1849 " open stream has already been closed, return success")); 1850 /* error has already been set */ 1851 goto no_args_out; 1852 } 1853 1854 /* If the file failed recovery, just quit. */ 1855 mutex_enter(&rp->r_statelock); 1856 if (rp->r_flags & R4RECOVERR) { 1857 mutex_exit(&rp->r_statelock); 1858 ep->error = EIO; 1859 goto no_args_out; 1860 } 1861 mutex_exit(&rp->r_statelock); 1862 1863 seqid = nfs4_get_open_seqid(oop) + 1; 1864 1865 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1866 "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"", 1867 access_close, osp->os_share_acc_read, osp->os_share_acc_write)); 1868 1869 /* If we're closing the last READ, need to downgrade */ 1870 if ((access_close & FREAD) && (osp->os_share_acc_read == 1)) 1871 downgrade_acc |= OPEN4_SHARE_ACCESS_READ; 1872 1873 /* if we're closing the last WRITE, need to downgrade */ 1874 if ((access_close & FWRITE) && (osp->os_share_acc_write == 1)) 1875 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE; 1876 1877 downgrade_deny = OPEN4_SHARE_DENY_NONE; 1878 1879 new_acc = 0; 1880 new_deny = 0; 1881 1882 /* set our new access and deny share bits */ 1883 if ((osp->os_share_acc_read > 0) && 1884 !(downgrade_acc & OPEN4_SHARE_ACCESS_READ)) 1885 new_acc |= OPEN4_SHARE_ACCESS_READ; 1886 if ((osp->os_share_acc_write > 0) && 1887 !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE)) 1888 new_acc |= OPEN4_SHARE_ACCESS_WRITE; 1889 1890 new_deny = OPEN4_SHARE_DENY_NONE; 1891 1892 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1893 "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny)); 1894 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1895 "new acc 0x%x deny 0x%x", new_acc, new_deny)); 1896 1897 /* 1898 * Check to see if we aren't actually doing any downgrade or 1899 * if this is the last 'close' but the file is still mmapped. 1900 * Skip this if this a lost request resend so we don't decrement 1901 * the osp's share counts more than once. 1902 */ 1903 if (!lrp && 1904 ((downgrade_acc == 0 && downgrade_deny == 0) || 1905 (new_acc == 0 && new_deny == 0))) { 1906 /* 1907 * No downgrade to do, but still need to 1908 * update osp's os_share_* counts. 1909 */ 1910 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, 1911 "nfs4_open_downgrade: just lower the osp's count by %s", 1912 (access_close & FREAD) && (access_close & FWRITE) ? 1913 "read and write" : (access_close & FREAD) ? "read" : 1914 (access_close & FWRITE) ? "write" : "bogus")); 1915 if (access_close & FREAD) 1916 osp->os_share_acc_read--; 1917 if (access_close & FWRITE) 1918 osp->os_share_acc_write--; 1919 osp->os_share_deny_none--; 1920 nfs4_error_zinit(ep); 1921 1922 goto no_args_out; 1923 } 1924 1925 if (osp->os_orig_oo_name != oop->oo_name) { 1926 ep->error = EIO; 1927 goto no_args_out; 1928 } 1929 1930 /* setup the COMPOUND args */ 1931 if (lrp) 1932 args.ctag = TAG_OPEN_DG_LOST; 1933 else 1934 args.ctag = TAG_OPEN_DG; 1935 1936 args.array_len = 3; 1937 args.array = argop; 1938 1939 /* putfh */ 1940 argop[0].argop = OP_CPUTFH; 1941 argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 1942 1943 argop[1].argop = OP_GETATTR; 1944 argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 1945 argop[1].nfs_argop4_u.opgetattr.mi = mi; 1946 1947 ASSERT(mutex_owned(&osp->os_sync_lock)); 1948 ASSERT(osp->os_delegation == FALSE); 1949 1950 /* open downgrade */ 1951 argop[2].argop = OP_OPEN_DOWNGRADE; 1952 argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid; 1953 argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc; 1954 argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny; 1955 argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid; 1956 1957 t = gethrtime(); 1958 1959 rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep); 1960 1961 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 1962 nfs4_set_open_seqid(seqid, oop, args.ctag); 1963 1964 if ((ep->error == EACCES || 1965 (ep->error == 0 && res.status == NFS4ERR_ACCESS)) && 1966 cred_otw != cr) { 1967 crfree(cred_otw); 1968 cred_otw = cr; 1969 crhold(cred_otw); 1970 if (!ep->error) 1971 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1972 goto cred_retry; 1973 } 1974 1975 needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp); 1976 1977 if (needrecov && recov_credpp) { 1978 *recov_credpp = cred_otw; 1979 crhold(*recov_credpp); 1980 if (recov_seqidp) 1981 *recov_seqidp = seqid; 1982 } 1983 1984 if (!ep->error && !res.status) { 1985 /* get the open downgrade results */ 1986 resop = &res.array[2]; 1987 odg_res = &resop->nfs_resop4_u.opopen_downgrade; 1988 1989 osp->open_stateid = odg_res->open_stateid; 1990 1991 /* set the open streams new access/deny bits */ 1992 if (access_close & FREAD) 1993 osp->os_share_acc_read--; 1994 if (access_close & FWRITE) 1995 osp->os_share_acc_write--; 1996 osp->os_share_deny_none--; 1997 osp->os_dc_openacc = new_acc; 1998 1999 nfs4_attr_cache(vp, 2000 &res.array[1].nfs_resop4_u.opgetattr.ga_res, 2001 t, cred_otw, TRUE, NULL); 2002 } 2003 2004 if (!ep->error) 2005 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2006 2007 no_args_out: 2008 crfree(cred_otw); 2009 } 2010 2011 /* 2012 * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out 2013 * because the filesystem was forcibly unmounted) then we don't know if we 2014 * potentially left state dangling on the server, therefore the recovery 2015 * framework makes this call to resend the OPEN request and then undo it. 2016 */ 2017 void 2018 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp, 2019 nfs4_error_t *ep) 2020 { 2021 COMPOUND4args_clnt args; 2022 COMPOUND4res_clnt res; 2023 nfs_argop4 argop[4]; 2024 GETFH4res *gf_res = NULL; 2025 OPEN4cargs *open_args; 2026 OPEN4res *op_res; 2027 char *destcfp; 2028 int destclen; 2029 nfs4_ga_res_t *garp; 2030 vnode_t *dvp = NULL, *vp = NULL; 2031 rnode4_t *rp = NULL, *drp = NULL; 2032 cred_t *cr = NULL; 2033 seqid4 seqid; 2034 nfs4_open_owner_t *oop = NULL; 2035 nfs4_open_stream_t *osp = NULL; 2036 component4 *srcfp; 2037 open_claim_type4 claim; 2038 mntinfo4_t *mi; 2039 int doqueue = 1; 2040 bool_t retry_open = FALSE; 2041 int created_osp = 0; 2042 hrtime_t t; 2043 char *failed_msg = ""; 2044 int fh_different; 2045 2046 nfs4_error_zinit(ep); 2047 2048 cr = resend_rqstp->lr_cr; 2049 dvp = resend_rqstp->lr_dvp; 2050 2051 vp = *vpp; 2052 if (vp) { 2053 ASSERT(nfs4_consistent_type(vp)); 2054 rp = VTOR4(vp); 2055 } 2056 2057 if (rp) { 2058 /* If the file failed recovery, just quit. */ 2059 mutex_enter(&rp->r_statelock); 2060 if (rp->r_flags & R4RECOVERR) { 2061 mutex_exit(&rp->r_statelock); 2062 ep->error = EIO; 2063 return; 2064 } 2065 mutex_exit(&rp->r_statelock); 2066 } 2067 2068 if (dvp) { 2069 drp = VTOR4(dvp); 2070 /* If the parent directory failed recovery, just quit. */ 2071 mutex_enter(&drp->r_statelock); 2072 if (drp->r_flags & R4RECOVERR) { 2073 mutex_exit(&drp->r_statelock); 2074 ep->error = EIO; 2075 return; 2076 } 2077 mutex_exit(&drp->r_statelock); 2078 } 2079 2080 claim = resend_rqstp->lr_oclaim; 2081 ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR); 2082 2083 args.ctag = TAG_OPEN_LOST; 2084 args.array_len = 4; 2085 args.array = argop; 2086 2087 argop[0].argop = OP_CPUTFH; 2088 if (claim == CLAIM_DELEGATE_CUR) { 2089 ASSERT(vp != NULL); 2090 2091 mi = VTOMI4(vp); 2092 /* 2093 * if this is a file mount then 2094 * use the mntinfo parentfh 2095 */ 2096 argop[0].nfs_argop4_u.opcputfh.sfh = 2097 (vp->v_flag & VROOT) ? mi->mi_srvparentfh : 2098 VTOSV(vp)->sv_dfh; 2099 args.ctag = TAG_REOPEN_LOST; 2100 } else { 2101 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh; 2102 mi = VTOMI4(dvp); 2103 } 2104 2105 argop[1].argop = OP_COPEN; 2106 open_args = &argop[1].nfs_argop4_u.opcopen; 2107 open_args->claim = claim; 2108 2109 /* 2110 * If we sent over a OPEN with CREATE then the only 2111 * thing we care about is to not leave dangling state 2112 * on the server, not whether the file we potentially 2113 * created remains on the server. So even though the 2114 * lost open request specified a CREATE, we only wish 2115 * to do a non-CREATE OPEN. 2116 */ 2117 open_args->opentype = OPEN4_NOCREATE; 2118 2119 srcfp = &resend_rqstp->lr_ofile; 2120 destclen = srcfp->utf8string_len; 2121 destcfp = kmem_alloc(destclen + 1, KM_SLEEP); 2122 bcopy(srcfp->utf8string_val, destcfp, destclen); 2123 destcfp[destclen] = '\0'; 2124 if (claim == CLAIM_DELEGATE_CUR) { 2125 open_args->open_claim4_u.delegate_cur_info.delegate_stateid = 2126 resend_rqstp->lr_ostateid; 2127 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp; 2128 } else { 2129 open_args->open_claim4_u.cfile = destcfp; 2130 } 2131 2132 open_args->share_access = resend_rqstp->lr_oacc; 2133 open_args->share_deny = resend_rqstp->lr_odeny; 2134 oop = resend_rqstp->lr_oop; 2135 ASSERT(oop != NULL); 2136 2137 open_args->owner.clientid = mi2clientid(mi); 2138 /* this length never changes */ 2139 open_args->owner.owner_len = sizeof (oop->oo_name); 2140 open_args->owner.owner_val = 2141 kmem_alloc(open_args->owner.owner_len, KM_SLEEP); 2142 2143 ep->error = nfs4_start_open_seqid_sync(oop, mi); 2144 ASSERT(ep->error == 0); /* recov thread always succeeds */ 2145 /* 2146 * We can get away with not saving the seqid upon detection 2147 * of a lost request, and now just use the open owner's current 2148 * seqid since we only allow one op OTW per seqid and lost 2149 * requests are saved FIFO. 2150 */ 2151 seqid = nfs4_get_open_seqid(oop) + 1; 2152 open_args->seqid = seqid; 2153 2154 bcopy(&oop->oo_name, open_args->owner.owner_val, 2155 open_args->owner.owner_len); 2156 2157 /* getfh */ 2158 argop[2].argop = OP_GETFH; 2159 2160 /* Construct the getattr part of the compound */ 2161 argop[3].argop = OP_GETATTR; 2162 argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 2163 argop[3].nfs_argop4_u.opgetattr.mi = mi; 2164 2165 res.array = NULL; 2166 2167 t = gethrtime(); 2168 2169 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2170 2171 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 2172 nfs4_set_open_seqid(seqid, oop, args.ctag); 2173 2174 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2175 "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status)); 2176 2177 if (ep->error || res.status) 2178 goto err_out; 2179 2180 op_res = &res.array[1].nfs_resop4_u.opopen; 2181 gf_res = &res.array[2].nfs_resop4_u.opgetfh; 2182 garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res; 2183 2184 if (!vp) { 2185 int rnode_err = 0; 2186 nfs4_sharedfh_t *sfh; 2187 2188 /* 2189 * If we can't decode all the attributes they are not usable, 2190 * just make the vnode. 2191 */ 2192 2193 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp)); 2194 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp, 2195 fn_get(VTOSV(dvp)->sv_name, 2196 open_args->open_claim4_u.cfile)); 2197 sfh4_rele(&sfh); 2198 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2199 "nfs4_resend_open_otw: made vp %p for file %s", 2200 (void *)(*vpp), open_args->open_claim4_u.cfile)); 2201 2202 if (ep->error) 2203 PURGE_ATTRCACHE4(*vpp); 2204 2205 /* 2206 * For the newly created *vpp case, make sure the rnode 2207 * isn't bad before using it. 2208 */ 2209 mutex_enter(&(VTOR4(*vpp))->r_statelock); 2210 if (VTOR4(*vpp)->r_flags & R4RECOVERR) 2211 rnode_err = EIO; 2212 mutex_exit(&(VTOR4(*vpp))->r_statelock); 2213 2214 if (rnode_err) { 2215 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2216 "nfs4_resend_open_otw: rp %p is bad", 2217 (void *)VTOR4(*vpp))); 2218 ep->error = rnode_err; 2219 goto err_out; 2220 } 2221 2222 vp = *vpp; 2223 rp = VTOR4(vp); 2224 } 2225 2226 if (claim == CLAIM_DELEGATE_CUR) { 2227 /* 2228 * Check if the path we reopened really is the same 2229 * file. We could end up in a situation were the file 2230 * was removed and a new file created with the same name. 2231 */ 2232 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); 2233 fh_different = 2234 (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0); 2235 if (fh_different) { 2236 if (mi->mi_fh_expire_type == FH4_PERSISTENT || 2237 mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) { 2238 /* Oops, we don't have the same file */ 2239 if (mi->mi_fh_expire_type == FH4_PERSISTENT) 2240 failed_msg = 2241 "Couldn't reopen: Persistant " 2242 "file handle changed"; 2243 else 2244 failed_msg = 2245 "Couldn't reopen: Volatile " 2246 "(no expire on open) file handle " 2247 "changed"; 2248 2249 nfs4_end_open_seqid_sync(oop); 2250 kmem_free(destcfp, destclen + 1); 2251 nfs4args_copen_free(open_args); 2252 (void) xdr_free(xdr_COMPOUND4res_clnt, 2253 (caddr_t)&res); 2254 nfs_rw_exit(&mi->mi_fh_lock); 2255 nfs4_fail_recov(vp, failed_msg, ep->error, 2256 ep->stat); 2257 return; 2258 } else { 2259 /* 2260 * We have volatile file handles that don't 2261 * compare. If the fids are the same then we 2262 * assume that the file handle expired but the 2263 * renode still refers to the same file object. 2264 * 2265 * First check that we have fids or not. 2266 * If we don't we have a dumb server so we will 2267 * just assume every thing is ok for now. 2268 */ 2269 if (!ep->error && 2270 garp->n4g_va.va_mask & AT_NODEID && 2271 rp->r_attr.va_mask & AT_NODEID && 2272 rp->r_attr.va_nodeid != 2273 garp->n4g_va.va_nodeid) { 2274 /* 2275 * We have fids, but they don't 2276 * compare. So kill the file. 2277 */ 2278 failed_msg = 2279 "Couldn't reopen: file handle " 2280 "changed due to mismatched fids"; 2281 nfs4_end_open_seqid_sync(oop); 2282 kmem_free(destcfp, destclen + 1); 2283 nfs4args_copen_free(open_args); 2284 (void) xdr_free(xdr_COMPOUND4res_clnt, 2285 (caddr_t)&res); 2286 nfs_rw_exit(&mi->mi_fh_lock); 2287 nfs4_fail_recov(vp, failed_msg, 2288 ep->error, ep->stat); 2289 return; 2290 } else { 2291 /* 2292 * We have volatile file handles that 2293 * refers to the same file (at least 2294 * they have the same fid) or we don't 2295 * have fids so we can't tell. :(. We'll 2296 * be a kind and accepting client so 2297 * we'll update the rnode's file 2298 * handle with the otw handle. 2299 * 2300 * We need to drop mi->mi_fh_lock since 2301 * sh4_update acquires it. Since there 2302 * is only one recovery thread there is 2303 * no race. 2304 */ 2305 nfs_rw_exit(&mi->mi_fh_lock); 2306 sfh4_update(rp->r_fh, &gf_res->object); 2307 } 2308 } 2309 } else { 2310 nfs_rw_exit(&mi->mi_fh_lock); 2311 } 2312 } 2313 2314 ASSERT(nfs4_consistent_type(vp)); 2315 2316 if (op_res->rflags & OPEN4_RESULT_CONFIRM) 2317 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE, 2318 &retry_open, oop, TRUE, ep, NULL); 2319 if (ep->error || ep->stat) { 2320 nfs4_end_open_seqid_sync(oop); 2321 kmem_free(destcfp, destclen + 1); 2322 nfs4args_copen_free(open_args); 2323 if (!ep->error) 2324 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2325 return; 2326 } 2327 2328 if (claim == CLAIM_DELEGATE_CUR) { 2329 /* 2330 * Doing a reopen here so the osp should already exist. 2331 * If not, something changed or went very wrong. 2332 * 2333 * returns with 'os_sync_lock' held 2334 */ 2335 osp = find_open_stream(oop, rp); 2336 if (!osp) { 2337 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2338 "nfs4_resend_open_otw: couldn't find osp")); 2339 ep->error = EINVAL; 2340 goto err_out; 2341 } 2342 osp->os_open_ref_count++; 2343 } else { 2344 mutex_enter(&oop->oo_lock); 2345 oop->oo_just_created = NFS4_PERM_CREATED; 2346 mutex_exit(&oop->oo_lock); 2347 2348 /* returns with 'os_sync_lock' held */ 2349 osp = find_or_create_open_stream(oop, rp, &created_osp); 2350 if (!osp) { 2351 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2352 "nfs4_resend_open_otw: couldn't create osp")); 2353 ep->error = EINVAL; 2354 goto err_out; 2355 } 2356 } 2357 2358 osp->open_stateid = op_res->stateid; 2359 osp->os_delegation = FALSE; 2360 /* 2361 * Need to reset this bitfield for the possible case where we were 2362 * going to OTW CLOSE the file, got a non-recoverable error, and before 2363 * we could retry the CLOSE, OPENed the file again. 2364 */ 2365 ASSERT(osp->os_open_owner->oo_seqid_inuse); 2366 osp->os_final_close = 0; 2367 osp->os_force_close = 0; 2368 2369 if (claim != CLAIM_DELEGATE_CUR) { 2370 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ) 2371 osp->os_share_acc_read++; 2372 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE) 2373 osp->os_share_acc_write++; 2374 osp->os_share_deny_none++; 2375 } 2376 2377 mutex_exit(&osp->os_sync_lock); 2378 if (created_osp) 2379 nfs4_inc_state_ref_count(mi); 2380 open_stream_rele(osp, rp); 2381 2382 nfs4_end_open_seqid_sync(oop); 2383 2384 /* accept delegation, if any */ 2385 nfs4_delegation_accept(rp, claim, op_res, garp, cr); 2386 2387 kmem_free(destcfp, destclen + 1); 2388 nfs4args_copen_free(open_args); 2389 2390 if (claim == CLAIM_DELEGATE_CUR) 2391 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL); 2392 else 2393 PURGE_ATTRCACHE4(vp); 2394 2395 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2396 2397 ASSERT(nfs4_consistent_type(vp)); 2398 2399 return; 2400 2401 err_out: 2402 nfs4_end_open_seqid_sync(oop); 2403 kmem_free(destcfp, destclen + 1); 2404 nfs4args_copen_free(open_args); 2405 if (!ep->error) 2406 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2407 } 2408