1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #include <nfs/nfs4_clnt.h> 31 #include <nfs/rnode4.h> 32 #include <sys/systm.h> 33 #include <sys/cmn_err.h> 34 #include <sys/atomic.h> 35 36 static void nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *); 37 static nfs4_open_owner_t *find_freed_open_owner(cred_t *, 38 nfs4_oo_hash_bucket_t *, mntinfo4_t *); 39 static open_delegation_type4 get_dtype(rnode4_t *); 40 41 #ifdef DEBUG 42 int nfs4_client_foo_debug = 0x0; 43 int nfs4_client_open_dg = 0x0; 44 /* 45 * If this is non-zero, the lockowner and openowner seqid sync primitives 46 * will intermittently return errors. 47 */ 48 static int seqid_sync_faults = 0; 49 #endif 50 51 stateid4 clnt_special0 = { 52 0, 53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 54 }; 55 56 stateid4 clnt_special1 = { 57 0xffffffff, 58 { 59 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 60 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 61 (char)0xff, (char)0xff, (char)0xff, (char)0xff 62 } 63 }; 64 65 /* finds hash bucket and locks it */ 66 static nfs4_oo_hash_bucket_t * 67 lock_bucket(cred_t *cr, mntinfo4_t *mi) 68 { 69 nfs4_oo_hash_bucket_t *bucketp; 70 uint32_t hash_key; 71 72 hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr)) 73 % NFS4_NUM_OO_BUCKETS; 74 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: " 75 "hash_key %d for cred %p", hash_key, (void*)cr)); 76 77 ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS); 78 ASSERT(mi != NULL); 79 ASSERT(mutex_owned(&mi->mi_lock)); 80 81 bucketp = &(mi->mi_oo_list[hash_key]); 82 mutex_enter(&bucketp->b_lock); 83 return (bucketp); 84 } 85 86 /* unlocks hash bucket pointed by bucket_ptr */ 87 static void 88 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp) 89 { 90 mutex_exit(&bucketp->b_lock); 91 } 92 93 /* 94 * Removes the lock owner from the rnode's lock_owners list and frees the 95 * corresponding reference. 96 */ 97 void 98 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop) 99 { 100 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 101 "nfs4_rnode_remove_lock_owner")); 102 103 mutex_enter(&rp->r_statev4_lock); 104 105 if (lop->lo_next_rnode == NULL) { 106 /* already removed from list */ 107 mutex_exit(&rp->r_statev4_lock); 108 return; 109 } 110 111 ASSERT(lop->lo_prev_rnode != NULL); 112 113 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 114 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 115 116 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 117 118 mutex_exit(&rp->r_statev4_lock); 119 120 /* 121 * This would be an appropriate place for 122 * RELEASE_LOCKOWNER. For now, this is overkill 123 * because in the common case, close is going to 124 * release any lockowners anyway. 125 */ 126 lock_owner_rele(lop); 127 } 128 129 /* 130 * Remove all lock owners from the rnode's lock_owners list. Frees up 131 * their references from the list. 132 */ 133 134 void 135 nfs4_flush_lock_owners(rnode4_t *rp) 136 { 137 nfs4_lock_owner_t *lop; 138 139 mutex_enter(&rp->r_statev4_lock); 140 while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) { 141 lop = rp->r_lo_head.lo_next_rnode; 142 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 143 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 144 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 145 lock_owner_rele(lop); 146 } 147 mutex_exit(&rp->r_statev4_lock); 148 } 149 150 void 151 nfs4_clear_open_streams(rnode4_t *rp) 152 { 153 nfs4_open_stream_t *osp; 154 155 mutex_enter(&rp->r_os_lock); 156 while ((osp = list_head(&rp->r_open_streams)) != NULL) { 157 open_owner_rele(osp->os_open_owner); 158 list_remove(&rp->r_open_streams, osp); 159 mutex_destroy(&osp->os_sync_lock); 160 osp->os_open_owner = NULL; 161 kmem_free(osp, sizeof (*osp)); 162 } 163 mutex_exit(&rp->r_os_lock); 164 } 165 166 void 167 open_owner_hold(nfs4_open_owner_t *oop) 168 { 169 mutex_enter(&oop->oo_lock); 170 oop->oo_ref_count++; 171 mutex_exit(&oop->oo_lock); 172 } 173 174 /* 175 * Frees the open owner if the ref count hits zero. 176 */ 177 void 178 open_owner_rele(nfs4_open_owner_t *oop) 179 { 180 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 181 "open_owner_rele")); 182 183 mutex_enter(&oop->oo_lock); 184 oop->oo_ref_count--; 185 if (oop->oo_ref_count == 0) { 186 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 187 "open_owner_rele: freeing open owner")); 188 oop->oo_valid = 0; 189 mutex_exit(&oop->oo_lock); 190 /* 191 * Ok, we don't destroy the open owner, nor do we put it on 192 * the mntinfo4's free list just yet. We are lazy about it 193 * and let callers to find_open_owner() do that to keep locking 194 * simple. 195 */ 196 } else { 197 mutex_exit(&oop->oo_lock); 198 } 199 } 200 201 void 202 open_stream_hold(nfs4_open_stream_t *osp) 203 { 204 mutex_enter(&osp->os_sync_lock); 205 osp->os_ref_count++; 206 mutex_exit(&osp->os_sync_lock); 207 } 208 209 /* 210 * Frees the open stream and removes it from the rnode4's open streams list if 211 * the ref count drops to zero. 212 */ 213 void 214 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp) 215 { 216 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 217 "open_stream_rele")); 218 219 ASSERT(!mutex_owned(&rp->r_os_lock)); 220 221 mutex_enter(&osp->os_sync_lock); 222 ASSERT(osp->os_ref_count > 0); 223 osp->os_ref_count--; 224 if (osp->os_ref_count == 0) { 225 nfs4_open_owner_t *tmp_oop; 226 227 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 228 "open_stream_rele: freeing open stream")); 229 osp->os_valid = 0; 230 tmp_oop = osp->os_open_owner; 231 mutex_exit(&osp->os_sync_lock); 232 233 /* now see if we need to destroy the open owner */ 234 open_owner_rele(tmp_oop); 235 236 mutex_enter(&rp->r_os_lock); 237 list_remove(&rp->r_open_streams, osp); 238 mutex_exit(&rp->r_os_lock); 239 240 /* free up osp */ 241 mutex_destroy(&osp->os_sync_lock); 242 osp->os_open_owner = NULL; 243 kmem_free(osp, sizeof (*osp)); 244 } else { 245 mutex_exit(&osp->os_sync_lock); 246 } 247 } 248 249 void 250 lock_owner_hold(nfs4_lock_owner_t *lop) 251 { 252 mutex_enter(&lop->lo_lock); 253 lop->lo_ref_count++; 254 mutex_exit(&lop->lo_lock); 255 } 256 257 /* 258 * Frees the lock owner if the ref count hits zero and 259 * the structure no longer has no locks. 260 */ 261 void 262 lock_owner_rele(nfs4_lock_owner_t *lop) 263 { 264 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 265 "lock_owner_rele")); 266 267 mutex_enter(&lop->lo_lock); 268 lop->lo_ref_count--; 269 if (lop->lo_ref_count == 0) { 270 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 271 "lock_owner_rele: freeing lock owner: " 272 "%x", lop->lo_pid)); 273 lop->lo_valid = 0; 274 /* 275 * If there are no references, the lock_owner should 276 * already be off the rnode's list. 277 */ 278 ASSERT(lop->lo_next_rnode == NULL); 279 ASSERT(lop->lo_prev_rnode == NULL); 280 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE)); 281 ASSERT(lop->lo_seqid_holder == NULL); 282 mutex_exit(&lop->lo_lock); 283 284 /* free up lop */ 285 cv_destroy(&lop->lo_cv_seqid_sync); 286 mutex_destroy(&lop->lo_lock); 287 kmem_free(lop, sizeof (*lop)); 288 } else { 289 mutex_exit(&lop->lo_lock); 290 } 291 } 292 293 /* 294 * This increments the open owner ref count if found. 295 * The argument 'just_created' determines whether we are looking for open 296 * owners with the 'oo_just_created' flag set or not. 297 */ 298 nfs4_open_owner_t * 299 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi) 300 { 301 nfs4_open_owner_t *oop = NULL, *next_oop; 302 nfs4_oo_hash_bucket_t *bucketp; 303 304 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 305 "find_open_owner: cred %p, just_created %d", 306 (void*)cr, just_created)); 307 308 ASSERT(mi != NULL); 309 ASSERT(mutex_owned(&mi->mi_lock)); 310 311 bucketp = lock_bucket(cr, mi); 312 313 /* got hash bucket, search through open owners */ 314 for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) { 315 mutex_enter(&oop->oo_lock); 316 if (!crcmp(oop->oo_cred, cr) && 317 (oop->oo_just_created == just_created || 318 just_created == NFS4_JUST_CREATED)) { 319 /* match */ 320 if (oop->oo_valid == 0) { 321 /* reactivate the open owner */ 322 oop->oo_valid = 1; 323 ASSERT(oop->oo_ref_count == 0); 324 } 325 oop->oo_ref_count++; 326 mutex_exit(&oop->oo_lock); 327 unlock_bucket(bucketp); 328 return (oop); 329 } 330 next_oop = list_next(&bucketp->b_oo_hash_list, oop); 331 if (oop->oo_valid == 0) { 332 list_remove(&bucketp->b_oo_hash_list, oop); 333 334 /* 335 * Now we go ahead and put this open owner 336 * on the freed list. This is our lazy method. 337 */ 338 nfs4_free_open_owner(oop, mi); 339 } 340 341 mutex_exit(&oop->oo_lock); 342 oop = next_oop; 343 } 344 345 /* search through recently freed open owners */ 346 oop = find_freed_open_owner(cr, bucketp, mi); 347 348 unlock_bucket(bucketp); 349 350 return (oop); 351 } 352 353 nfs4_open_owner_t * 354 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi) 355 { 356 nfs4_open_owner_t *oop; 357 358 mutex_enter(&mi->mi_lock); 359 oop = find_open_owner_nolock(cr, just_created, mi); 360 mutex_exit(&mi->mi_lock); 361 362 return (oop); 363 } 364 365 /* 366 * This increments osp's ref count if found. 367 * Returns with 'os_sync_lock' held. 368 */ 369 nfs4_open_stream_t * 370 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 371 { 372 nfs4_open_stream_t *osp; 373 374 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 375 "find_open_stream")); 376 377 mutex_enter(&rp->r_os_lock); 378 /* Now, no one can add or delete to rp's open streams list */ 379 for (osp = list_head(&rp->r_open_streams); osp != NULL; 380 osp = list_next(&rp->r_open_streams, osp)) { 381 mutex_enter(&osp->os_sync_lock); 382 if (osp->os_open_owner == oop && osp->os_valid != 0) { 383 /* match */ 384 NFS4_DEBUG(nfs4_client_state_debug, 385 (CE_NOTE, "find_open_stream " 386 "got a match")); 387 388 osp->os_ref_count++; 389 mutex_exit(&rp->r_os_lock); 390 return (osp); 391 } 392 mutex_exit(&osp->os_sync_lock); 393 } 394 395 mutex_exit(&rp->r_os_lock); 396 return (NULL); 397 } 398 399 /* 400 * Find the lock owner for the given file and process ID. If "which" is 401 * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid 402 * from the server. 403 * 404 * This increments the lock owner's ref count if found. Returns NULL if 405 * there was no match. 406 */ 407 nfs4_lock_owner_t * 408 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which) 409 { 410 nfs4_lock_owner_t *lop, *next_lop; 411 412 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 413 "find_lock_owner: pid %x, which %d", pid, which)); 414 415 ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID); 416 417 /* search by pid */ 418 mutex_enter(&rp->r_statev4_lock); 419 420 lop = rp->r_lo_head.lo_next_rnode; 421 while (lop != &rp->r_lo_head) { 422 mutex_enter(&lop->lo_lock); 423 if (lop->lo_pid == pid && lop->lo_valid != 0 && 424 !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) { 425 if (which == LOWN_ANY || 426 lop->lo_just_created != NFS4_JUST_CREATED) { 427 /* Found a matching lock owner */ 428 NFS4_DEBUG(nfs4_client_state_debug, 429 (CE_NOTE, "find_lock_owner: " 430 "got a match")); 431 432 lop->lo_ref_count++; 433 mutex_exit(&lop->lo_lock); 434 mutex_exit(&rp->r_statev4_lock); 435 return (lop); 436 } 437 } 438 next_lop = lop->lo_next_rnode; 439 mutex_exit(&lop->lo_lock); 440 lop = next_lop; 441 } 442 443 mutex_exit(&rp->r_statev4_lock); 444 return (NULL); 445 } 446 447 /* 448 * This returns the delegation stateid as 'sid'. Returns 1 if a successful 449 * delegation stateid was found, otherwise returns 0. 450 */ 451 452 static int 453 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid) 454 { 455 ASSERT(!mutex_owned(&rp->r_statev4_lock)); 456 457 mutex_enter(&rp->r_statev4_lock); 458 if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) || 459 (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) && 460 !rp->r_deleg_return_pending) { 461 462 *sid = rp->r_deleg_stateid; 463 mutex_exit(&rp->r_statev4_lock); 464 return (1); 465 } 466 mutex_exit(&rp->r_statev4_lock); 467 return (0); 468 } 469 470 /* 471 * This returns the lock stateid as 'sid'. Returns 1 if a successful lock 472 * stateid was found, otherwise returns 0. 473 */ 474 static int 475 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid) 476 { 477 nfs4_lock_owner_t *lop; 478 479 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 480 481 if (lop) { 482 /* 483 * Found a matching lock owner, so use a lock 484 * stateid rather than an open stateid. 485 */ 486 mutex_enter(&lop->lo_lock); 487 *sid = lop->lock_stateid; 488 mutex_exit(&lop->lo_lock); 489 lock_owner_rele(lop); 490 return (1); 491 } 492 493 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 494 "nfs4_get_lock_stateid: no lop")); 495 return (0); 496 } 497 498 /* 499 * This returns the open stateid as 'sid'. Returns 1 if a successful open 500 * stateid was found, otherwise returns 0. 501 * 502 * Once the stateid is returned to the caller, it is no longer protected; 503 * so the caller must be prepared to handle OLD/BAD_STATEID where 504 * appropiate. 505 */ 506 static int 507 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid) 508 { 509 nfs4_open_owner_t *oop; 510 nfs4_open_stream_t *osp; 511 512 ASSERT(mi != NULL); 513 514 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 515 if (!oop) { 516 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 517 "nfs4_get_open_stateid: no oop")); 518 return (0); 519 } 520 521 osp = find_open_stream(oop, rp); 522 open_owner_rele(oop); 523 if (!osp) { 524 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 525 "nfs4_get_open_stateid: no osp")); 526 return (0); 527 } 528 529 if (osp->os_failed_reopen) { 530 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 531 "nfs4_get_open_stateid: osp %p failed reopen", 532 (void *)osp)); 533 mutex_exit(&osp->os_sync_lock); 534 open_stream_rele(osp, rp); 535 return (0); 536 } 537 *sid = osp->open_stateid; 538 mutex_exit(&osp->os_sync_lock); 539 open_stream_rele(osp, rp); 540 return (1); 541 } 542 543 /* 544 * Returns the delegation stateid if this 'op' is OP_WRITE and the 545 * delegation we hold is a write delegation, OR this 'op' is not 546 * OP_WRITE and we have a delegation held (read or write), otherwise 547 * returns the lock stateid if there is a lock owner, otherwise 548 * returns the open stateid if there is a open stream, otherwise 549 * returns special stateid <seqid = 0, other = 0>. 550 * 551 * Used for WRITE operations. 552 */ 553 stateid4 554 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 555 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp) 556 { 557 stateid4 sid; 558 559 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 560 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 561 sid_tp->cur_sid_type = DEL_SID; 562 return (sid); 563 } 564 } 565 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 566 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 567 sid_tp->cur_sid_type = LOCK_SID; 568 return (sid); 569 } 570 } 571 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 572 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 573 sid_tp->cur_sid_type = OPEN_SID; 574 return (sid); 575 } 576 } 577 bzero(&sid, sizeof (stateid4)); 578 sid_tp->cur_sid_type = SPEC_SID; 579 return (sid); 580 } 581 582 /* 583 * Returns the delegation stateid if this 'op' is OP_WRITE and the 584 * delegation we hold is a write delegation, OR this 'op' is not 585 * OP_WRITE and we have a delegation held (read or write), otherwise 586 * returns the lock stateid if there is a lock owner, otherwise 587 * returns the open stateid if there is a open stream, otherwise 588 * returns special stateid <seqid = 0, other = 0>. 589 * 590 * This also updates which stateid we are using in 'sid_tp', skips 591 * previously attempted stateids, and skips checking higher priority 592 * stateids than the current level as dictated by 'sid_tp->cur_sid_type' 593 * for async reads. 594 * 595 * Used for READ and SETATTR operations. 596 */ 597 stateid4 598 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 599 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read) 600 { 601 stateid4 sid; 602 603 /* 604 * For asynchronous READs, do not attempt to retry from the start of 605 * the stateid priority list, just continue from where you last left 606 * off. 607 */ 608 if (async_read) { 609 switch (sid_tp->cur_sid_type) { 610 case NO_SID: 611 break; 612 case DEL_SID: 613 goto lock_stateid; 614 case LOCK_SID: 615 goto open_stateid; 616 case OPEN_SID: 617 goto special_stateid; 618 case SPEC_SID: 619 default: 620 cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current " 621 "stateid type %d", sid_tp->cur_sid_type); 622 } 623 } 624 625 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 626 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 627 sid_tp->cur_sid_type = DEL_SID; 628 return (sid); 629 } 630 } 631 lock_stateid: 632 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 633 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 634 sid_tp->cur_sid_type = LOCK_SID; 635 return (sid); 636 } 637 } 638 open_stateid: 639 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 640 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 641 sid_tp->cur_sid_type = OPEN_SID; 642 return (sid); 643 } 644 } 645 special_stateid: 646 bzero(&sid, sizeof (stateid4)); 647 sid_tp->cur_sid_type = SPEC_SID; 648 return (sid); 649 } 650 651 void 652 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid) 653 { 654 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 655 "nfs4_set_lock_stateid")); 656 657 ASSERT(lop); 658 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 659 660 mutex_enter(&lop->lo_lock); 661 lop->lock_stateid = stateid; 662 mutex_exit(&lop->lo_lock); 663 } 664 665 /* 666 * Sequence number used when a new open owner is needed. 667 * This is used so as to not confuse the server. Since a open owner 668 * is based off of cred, a cred could be re-used quickly, and the server 669 * may not release all state for a cred. 670 */ 671 static uint64_t open_owner_seq_num = 0; 672 673 uint64_t 674 nfs4_get_new_oo_name(void) 675 { 676 return (atomic_add_64_nv(&open_owner_seq_num, 1)); 677 } 678 679 /* 680 * Create a new open owner and add it to the open owner hash table. 681 */ 682 nfs4_open_owner_t * 683 create_open_owner(cred_t *cr, mntinfo4_t *mi) 684 { 685 nfs4_open_owner_t *oop; 686 nfs4_oo_hash_bucket_t *bucketp; 687 688 oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP); 689 /* 690 * Make sure the cred doesn't go away when we put this open owner 691 * on the free list, as well as make crcmp() a valid check. 692 */ 693 crhold(cr); 694 oop->oo_cred = cr; 695 mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL); 696 oop->oo_ref_count = 1; 697 oop->oo_valid = 1; 698 oop->oo_just_created = NFS4_JUST_CREATED; 699 oop->oo_seqid = 0; 700 oop->oo_seqid_inuse = 0; 701 oop->oo_last_good_seqid = 0; 702 oop->oo_last_good_op = TAG_NONE; 703 oop->oo_cred_otw = NULL; 704 cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 705 706 /* 707 * A Solaris open_owner is <oo_seq_num> 708 */ 709 oop->oo_name = nfs4_get_new_oo_name(); 710 711 /* now add the struct into the cred hash table */ 712 ASSERT(mutex_owned(&mi->mi_lock)); 713 bucketp = lock_bucket(cr, mi); 714 list_insert_head(&bucketp->b_oo_hash_list, oop); 715 unlock_bucket(bucketp); 716 717 return (oop); 718 } 719 720 /* 721 * Create a new open stream and it to the rnode's list. 722 * Increments the ref count on oop. 723 * Returns with 'os_sync_lock' held. 724 */ 725 nfs4_open_stream_t * 726 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 727 { 728 nfs4_open_stream_t *osp; 729 730 #ifdef DEBUG 731 mutex_enter(&oop->oo_lock); 732 ASSERT(oop->oo_seqid_inuse); 733 mutex_exit(&oop->oo_lock); 734 #endif 735 736 osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP); 737 osp->os_open_ref_count = 1; 738 osp->os_mapcnt = 0; 739 osp->os_ref_count = 2; 740 osp->os_valid = 1; 741 osp->os_open_owner = oop; 742 osp->os_orig_oo_name = oop->oo_name; 743 bzero(&osp->open_stateid, sizeof (stateid4)); 744 osp->os_share_acc_read = 0; 745 osp->os_share_acc_write = 0; 746 osp->os_mmap_read = 0; 747 osp->os_mmap_write = 0; 748 osp->os_share_deny_none = 0; 749 osp->os_share_deny_read = 0; 750 osp->os_share_deny_write = 0; 751 osp->os_delegation = 0; 752 osp->os_dc_openacc = 0; 753 osp->os_final_close = 0; 754 osp->os_pending_close = 0; 755 osp->os_failed_reopen = 0; 756 osp->os_force_close = 0; 757 mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL); 758 759 /* open owner gets a reference */ 760 open_owner_hold(oop); 761 762 /* now add the open stream to rp */ 763 mutex_enter(&rp->r_os_lock); 764 mutex_enter(&osp->os_sync_lock); 765 list_insert_head(&rp->r_open_streams, osp); 766 mutex_exit(&rp->r_os_lock); 767 768 return (osp); 769 } 770 771 /* 772 * Returns an open stream with 'os_sync_lock' held. 773 * If the open stream is found (rather than created), its 774 * 'os_open_ref_count' is bumped. 775 * 776 * There is no race with two threads entering this function 777 * and creating two open streams for the same <oop, rp> pair. 778 * This is because the open seqid sync must be acquired, thus 779 * only allowing one thread in at a time. 780 */ 781 nfs4_open_stream_t * 782 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp, 783 int *created_osp) 784 { 785 nfs4_open_stream_t *osp; 786 787 #ifdef DEBUG 788 mutex_enter(&oop->oo_lock); 789 ASSERT(oop->oo_seqid_inuse); 790 mutex_exit(&oop->oo_lock); 791 #endif 792 793 osp = find_open_stream(oop, rp); 794 if (!osp) { 795 osp = create_open_stream(oop, rp); 796 if (osp) 797 *created_osp = 1; 798 } else { 799 *created_osp = 0; 800 osp->os_open_ref_count++; 801 } 802 803 return (osp); 804 } 805 806 static uint64_t lock_owner_seq_num = 0; 807 808 /* 809 * Create a new lock owner and add it to the rnode's list. 810 * Assumes the rnode's r_statev4_lock is held. 811 * The created lock owner has a reference count of 2: one for the list and 812 * one for the caller to use. Returns the lock owner locked down. 813 */ 814 nfs4_lock_owner_t * 815 create_lock_owner(rnode4_t *rp, pid_t pid) 816 { 817 nfs4_lock_owner_t *lop; 818 819 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 820 "create_lock_owner: pid %x", pid)); 821 822 ASSERT(mutex_owned(&rp->r_statev4_lock)); 823 824 lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP); 825 lop->lo_ref_count = 2; 826 lop->lo_valid = 1; 827 bzero(&lop->lock_stateid, sizeof (stateid4)); 828 lop->lo_pid = pid; 829 lop->lock_seqid = 0; 830 lop->lo_pending_rqsts = 0; 831 lop->lo_just_created = NFS4_JUST_CREATED; 832 lop->lo_flags = 0; 833 lop->lo_seqid_holder = NULL; 834 835 /* 836 * A Solaris lock_owner is <seq_num><pid> 837 */ 838 lop->lock_owner_name.ln_seq_num = 839 atomic_add_64_nv(&lock_owner_seq_num, 1); 840 lop->lock_owner_name.ln_pid = pid; 841 842 cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 843 mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL); 844 845 mutex_enter(&lop->lo_lock); 846 847 /* now add the lock owner to rp */ 848 lop->lo_prev_rnode = &rp->r_lo_head; 849 lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode; 850 rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop; 851 rp->r_lo_head.lo_next_rnode = lop; 852 853 return (lop); 854 855 } 856 857 /* 858 * This sets the lock seqid of a lock owner. 859 */ 860 void 861 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop) 862 { 863 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 864 "nfs4_set_lock_seqid")); 865 866 ASSERT(lop != NULL); 867 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 868 869 lop->lock_seqid = seqid; 870 } 871 872 static void 873 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid) 874 { 875 nfs4_lo_name_t *cast_namep; 876 877 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 878 "nfs4_set_new_lock_owner_args")); 879 880 owner->owner_len = sizeof (*cast_namep); 881 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 882 /* 883 * A Solaris lock_owner is <seq_num><pid> 884 */ 885 cast_namep = (nfs4_lo_name_t *)owner->owner_val; 886 cast_namep->ln_seq_num = atomic_add_64_nv(&lock_owner_seq_num, 1); 887 cast_namep->ln_pid = pid; 888 } 889 890 /* 891 * Fill in the lock owner args. 892 */ 893 void 894 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid) 895 { 896 nfs4_lock_owner_t *lop; 897 898 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 899 "nfs4_setlockowner_args")); 900 901 /* This increments lop's ref count */ 902 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 903 904 if (!lop) 905 goto make_up_args; 906 907 mutex_enter(&lop->lo_lock); 908 owner->owner_len = sizeof (lop->lock_owner_name); 909 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 910 bcopy(&lop->lock_owner_name, owner->owner_val, 911 owner->owner_len); 912 mutex_exit(&lop->lo_lock); 913 lock_owner_rele(lop); 914 return; 915 916 make_up_args: 917 nfs4_set_new_lock_owner_args(owner, pid); 918 } 919 920 /* 921 * This ends our use of the open owner's open seqid by setting 922 * the appropiate flags and issuing a cv_signal to wake up another 923 * thread waiting to use the open seqid. 924 */ 925 926 void 927 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop) 928 { 929 mutex_enter(&oop->oo_lock); 930 ASSERT(oop->oo_seqid_inuse); 931 oop->oo_seqid_inuse = 0; 932 cv_broadcast(&oop->oo_cv_seqid_sync); 933 mutex_exit(&oop->oo_lock); 934 } 935 936 /* 937 * This starts our use of the open owner's open seqid by setting 938 * the oo_seqid_inuse to true. We will wait (forever) with a 939 * cv_wait() until we are woken up. 940 * 941 * Return values: 942 * 0 no problems 943 * EAGAIN caller should retry (like a recovery retry) 944 */ 945 int 946 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi) 947 { 948 int error = 0; 949 #ifdef DEBUG 950 static int ops = 0; /* fault injection */ 951 #endif 952 953 #ifdef DEBUG 954 if (seqid_sync_faults && curthread != mi->mi_recovthread && 955 ++ops % 5 == 0) 956 return (EAGAIN); 957 #endif 958 959 mutex_enter(&mi->mi_lock); 960 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 961 curthread != mi->mi_recovthread) 962 error = EAGAIN; 963 mutex_exit(&mi->mi_lock); 964 if (error != 0) 965 goto done; 966 967 mutex_enter(&oop->oo_lock); 968 969 while (oop->oo_seqid_inuse) { 970 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 971 "nfs4_start_open_seqid_sync waiting on cv")); 972 973 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock); 974 } 975 976 oop->oo_seqid_inuse = 1; 977 978 mutex_exit(&oop->oo_lock); 979 980 mutex_enter(&mi->mi_lock); 981 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 982 curthread != mi->mi_recovthread) 983 error = EAGAIN; 984 mutex_exit(&mi->mi_lock); 985 986 if (error == EAGAIN) 987 nfs4_end_open_seqid_sync(oop); 988 989 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 990 "nfs4_start_open_seqid_sync: error=%d", error)); 991 992 done: 993 return (error); 994 } 995 996 #ifdef DEBUG 997 int bypass_otw[2]; 998 #endif 999 1000 /* 1001 * Checks to see if the OPEN OTW is necessary that is, if it's already 1002 * been opened with the same access and deny bits we are now asking for. 1003 * Note, this assumes that *vpp is a rnode. 1004 */ 1005 int 1006 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp, 1007 int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp) 1008 { 1009 rnode4_t *rp; 1010 nfs4_open_stream_t *osp; 1011 open_delegation_type4 dt; 1012 1013 rp = VTOR4(vp); 1014 1015 /* 1016 * Grab the delegation type. This function is protected against 1017 * the delegation being returned by virtue of start_op (called 1018 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode, 1019 * delegreturn requires this lock in write mode to proceed. 1020 */ 1021 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER)); 1022 dt = get_dtype(rp); 1023 1024 /* returns with 'os_sync_lock' held */ 1025 osp = find_open_stream(oop, rp); 1026 1027 if (osp) { 1028 uint32_t do_otw = 0; 1029 1030 if (osp->os_failed_reopen) { 1031 NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE, 1032 "nfs4_is_otw_open_necessary: os_failed_reopen " 1033 "set on osp %p, cr %p, rp %s", (void *)osp, 1034 (void *)osp->os_open_owner->oo_cred, 1035 rnode4info(rp))); 1036 do_otw = 1; 1037 } 1038 1039 /* 1040 * check access/deny bits 1041 */ 1042 if (!do_otw && (flag & FREAD)) 1043 if (osp->os_share_acc_read == 0 && 1044 dt == OPEN_DELEGATE_NONE) 1045 do_otw = 1; 1046 1047 if (!do_otw && (flag & FWRITE)) 1048 if (osp->os_share_acc_write == 0 && 1049 dt != OPEN_DELEGATE_WRITE) 1050 do_otw = 1; 1051 1052 if (!do_otw) { 1053 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1054 "nfs4_is_otw_open_necessary: can skip this " 1055 "open OTW")); 1056 if (!just_been_created) { 1057 osp->os_open_ref_count++; 1058 if (flag & FREAD) 1059 osp->os_share_acc_read++; 1060 if (flag & FWRITE) 1061 osp->os_share_acc_write++; 1062 osp->os_share_deny_none++; 1063 } 1064 1065 /* 1066 * Need to reset this bitfield for the possible case 1067 * where we were going to OTW CLOSE the file, got a 1068 * non-recoverable error, and before we could retry 1069 * the CLOSE, OPENed the file again. 1070 */ 1071 ASSERT(osp->os_open_owner->oo_seqid_inuse); 1072 osp->os_final_close = 0; 1073 osp->os_force_close = 0; 1074 1075 mutex_exit(&osp->os_sync_lock); 1076 open_stream_rele(osp, rp); 1077 1078 #ifdef DEBUG 1079 bypass_otw[0]++; 1080 #endif 1081 1082 *errorp = 0; 1083 return (0); 1084 } 1085 mutex_exit(&osp->os_sync_lock); 1086 open_stream_rele(osp, rp); 1087 1088 } else if (dt != OPEN_DELEGATE_NONE) { 1089 /* 1090 * Even if there isn't an open_stream yet, we may still be 1091 * able to bypass the otw open if the client owns a delegation. 1092 * 1093 * If you are asking for for WRITE, but I only have 1094 * a read delegation, then you still have to go otw. 1095 */ 1096 1097 if (flag & FWRITE && dt == OPEN_DELEGATE_READ) 1098 return (1); 1099 1100 /* 1101 * TODO - evaluate the nfsace4 1102 */ 1103 1104 /* 1105 * Check the access flags to make sure the caller 1106 * had permission. 1107 */ 1108 if (flag & FREAD && !(acc & VREAD)) 1109 return (1); 1110 1111 if (flag & FWRITE && !(acc & VWRITE)) 1112 return (1); 1113 1114 /* 1115 * create_open_stream will add a reference to oop, 1116 * this will prevent the open_owner_rele done in 1117 * nfs4open_otw from destroying the open_owner. 1118 */ 1119 1120 /* returns with 'os_sync_lock' held */ 1121 osp = create_open_stream(oop, rp); 1122 if (osp == NULL) 1123 return (1); 1124 1125 osp->open_stateid = rp->r_deleg_stateid; 1126 osp->os_delegation = 1; 1127 1128 if (flag & FREAD) 1129 osp->os_share_acc_read++; 1130 if (flag & FWRITE) 1131 osp->os_share_acc_write++; 1132 1133 osp->os_share_deny_none++; 1134 mutex_exit(&osp->os_sync_lock); 1135 1136 open_stream_rele(osp, rp); 1137 1138 mutex_enter(&oop->oo_lock); 1139 oop->oo_just_created = NFS4_PERM_CREATED; 1140 mutex_exit(&oop->oo_lock); 1141 1142 ASSERT(rsp != NULL); 1143 if (rsp->rs_sp != NULL) { 1144 mutex_enter(&rsp->rs_sp->s_lock); 1145 nfs4_inc_state_ref_count_nolock(rsp->rs_sp, 1146 VTOMI4(vp)); 1147 mutex_exit(&rsp->rs_sp->s_lock); 1148 } 1149 #ifdef DEBUG 1150 bypass_otw[1]++; 1151 #endif 1152 1153 *errorp = 0; 1154 return (0); 1155 } 1156 1157 return (1); 1158 } 1159 1160 static open_delegation_type4 1161 get_dtype(rnode4_t *rp) 1162 { 1163 open_delegation_type4 dt; 1164 1165 mutex_enter(&rp->r_statev4_lock); 1166 ASSERT(!rp->r_deleg_return_inprog); 1167 if (rp->r_deleg_return_pending) 1168 dt = OPEN_DELEGATE_NONE; 1169 else 1170 dt = rp->r_deleg_type; 1171 mutex_exit(&rp->r_statev4_lock); 1172 1173 return (dt); 1174 } 1175 1176 /* 1177 * Fill in *locker with the lock state arguments for a LOCK call. If 1178 * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL. 1179 * Caller must already hold the necessary seqid sync lock(s). 1180 */ 1181 1182 void 1183 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop, 1184 nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker) 1185 { 1186 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1187 if (lop->lo_just_created == NFS4_JUST_CREATED) { 1188 /* this is a new lock request */ 1189 open_to_lock_owner4 *nown; 1190 1191 ASSERT(oop != NULL); 1192 ASSERT(osp != NULL); 1193 1194 locker->new_lock_owner = TRUE; 1195 nown = &locker->locker4_u.open_owner; 1196 nown->open_seqid = nfs4_get_open_seqid(oop) + 1; 1197 mutex_enter(&osp->os_sync_lock); 1198 nown->open_stateid = osp->open_stateid; 1199 mutex_exit(&osp->os_sync_lock); 1200 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */ 1201 1202 nown->lock_owner.clientid = clientid; 1203 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name); 1204 nown->lock_owner.owner_val = 1205 kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP); 1206 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val, 1207 nown->lock_owner.owner_len); 1208 } else { 1209 exist_lock_owner4 *eown; 1210 /* have an existing lock owner */ 1211 1212 locker->new_lock_owner = FALSE; 1213 eown = &locker->locker4_u.lock_owner; 1214 mutex_enter(&lop->lo_lock); 1215 eown->lock_stateid = lop->lock_stateid; 1216 mutex_exit(&lop->lo_lock); 1217 eown->lock_seqid = lop->lock_seqid + 1; 1218 } 1219 } 1220 1221 /* 1222 * This starts our use of the lock owner's lock seqid by setting 1223 * the lo_flags to NFS4_LOCK_SEQID_INUSE. We will wait (forever) 1224 * with a cv_wait() until we are woken up. 1225 * 1226 * Return values: 1227 * 0 no problems 1228 * EAGAIN caller should retry (like a recovery retry) 1229 */ 1230 int 1231 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi) 1232 { 1233 int error = 0; 1234 #ifdef DEBUG 1235 static int ops = 0; /* fault injection */ 1236 #endif 1237 1238 #ifdef DEBUG 1239 if (seqid_sync_faults && curthread != mi->mi_recovthread && 1240 ++ops % 7 == 0) 1241 return (EAGAIN); 1242 #endif 1243 1244 mutex_enter(&mi->mi_lock); 1245 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1246 curthread != mi->mi_recovthread) 1247 error = EAGAIN; 1248 mutex_exit(&mi->mi_lock); 1249 if (error != 0) 1250 goto done; 1251 1252 mutex_enter(&lop->lo_lock); 1253 1254 ASSERT(lop->lo_seqid_holder != curthread); 1255 while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) { 1256 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1257 "nfs4_start_lock_seqid_sync: waiting on cv")); 1258 1259 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock); 1260 } 1261 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: " 1262 "NFS4_LOCK_SEQID_INUSE")); 1263 1264 lop->lo_flags |= NFS4_LOCK_SEQID_INUSE; 1265 lop->lo_seqid_holder = curthread; 1266 mutex_exit(&lop->lo_lock); 1267 1268 mutex_enter(&mi->mi_lock); 1269 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1270 curthread != mi->mi_recovthread) 1271 error = EAGAIN; 1272 mutex_exit(&mi->mi_lock); 1273 1274 if (error == EAGAIN) 1275 nfs4_end_lock_seqid_sync(lop); 1276 1277 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1278 "nfs4_start_lock_seqid_sync: error=%d", error)); 1279 1280 done: 1281 return (error); 1282 } 1283 1284 /* 1285 * This ends our use of the lock owner's lock seqid by setting 1286 * the appropiate flags and issuing a cv_signal to wake up another 1287 * thread waiting to use the lock seqid. 1288 */ 1289 void 1290 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop) 1291 { 1292 mutex_enter(&lop->lo_lock); 1293 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1294 ASSERT(lop->lo_seqid_holder == curthread); 1295 lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE; 1296 lop->lo_seqid_holder = NULL; 1297 cv_broadcast(&lop->lo_cv_seqid_sync); 1298 mutex_exit(&lop->lo_lock); 1299 } 1300 1301 /* 1302 * Returns a reference to a lock owner via lopp, which has its lock seqid 1303 * synchronization started. 1304 * If the lock owner is in the 'just_created' state, then we return its open 1305 * owner and open stream and start the open seqid synchronization. 1306 * 1307 * Return value: 1308 * NFS4_OK no problems 1309 * NFS4ERR_DELAY there is lost state to recover; caller should retry 1310 * NFS4ERR_IO no open stream 1311 */ 1312 nfsstat4 1313 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr, 1314 nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp, 1315 nfs4_lock_owner_t **lopp) 1316 { 1317 nfs4_lock_owner_t *lop, *next_lop; 1318 mntinfo4_t *mi; 1319 int error = 0; 1320 nfsstat4 stat; 1321 1322 mi = VTOMI4(RTOV4(rp)); 1323 1324 mutex_enter(&rp->r_statev4_lock); 1325 1326 lop = rp->r_lo_head.lo_next_rnode; 1327 while (lop != &rp->r_lo_head) { 1328 mutex_enter(&lop->lo_lock); 1329 if (lop->lo_pid == pid && lop->lo_valid != 0) { 1330 /* Found a matching lock owner */ 1331 NFS4_DEBUG(nfs4_client_state_debug, 1332 (CE_NOTE, "nfs4_find_or_create_lock_owner: " 1333 "got a match")); 1334 lop->lo_ref_count++; 1335 break; 1336 } 1337 next_lop = lop->lo_next_rnode; 1338 mutex_exit(&lop->lo_lock); 1339 lop = next_lop; 1340 } 1341 1342 if (lop == &rp->r_lo_head) { 1343 /* create temporary lock owner */ 1344 lop = create_lock_owner(rp, pid); 1345 } 1346 mutex_exit(&rp->r_statev4_lock); 1347 1348 /* Have a locked down lock owner struct now */ 1349 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1350 /* This is an existing lock owner */ 1351 *oopp = NULL; 1352 *ospp = NULL; 1353 } else { 1354 /* Lock owner doesn't exist yet */ 1355 1356 /* First grab open owner seqid synchronization */ 1357 mutex_exit(&lop->lo_lock); 1358 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1359 if (*oopp == NULL) 1360 goto kill_new_lop; 1361 error = nfs4_start_open_seqid_sync(*oopp, mi); 1362 if (error == EAGAIN) { 1363 stat = NFS4ERR_DELAY; 1364 goto failed; 1365 } 1366 *ospp = find_open_stream(*oopp, rp); 1367 if (*ospp == NULL) { 1368 nfs4_end_open_seqid_sync(*oopp); 1369 goto kill_new_lop; 1370 } 1371 if ((*ospp)->os_failed_reopen) { 1372 mutex_exit(&(*ospp)->os_sync_lock); 1373 NFS4_DEBUG((nfs4_open_stream_debug || 1374 nfs4_client_lock_debug), (CE_NOTE, 1375 "nfs4_find_or_create_lock_owner: os_failed_reopen;" 1376 "osp %p, cr %p, rp %s", (void *)(*ospp), 1377 (void *)cr, rnode4info(rp))); 1378 nfs4_end_open_seqid_sync(*oopp); 1379 stat = NFS4ERR_IO; 1380 goto failed; 1381 } 1382 mutex_exit(&(*ospp)->os_sync_lock); 1383 1384 /* 1385 * Now see if the lock owner has become permanent while we 1386 * had released our lock. 1387 */ 1388 mutex_enter(&lop->lo_lock); 1389 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1390 nfs4_end_open_seqid_sync(*oopp); 1391 open_stream_rele(*ospp, rp); 1392 open_owner_rele(*oopp); 1393 *oopp = NULL; 1394 *ospp = NULL; 1395 } 1396 } 1397 mutex_exit(&lop->lo_lock); 1398 1399 error = nfs4_start_lock_seqid_sync(lop, mi); 1400 if (error == EAGAIN) { 1401 if (*oopp != NULL) 1402 nfs4_end_open_seqid_sync(*oopp); 1403 stat = NFS4ERR_DELAY; 1404 goto failed; 1405 } 1406 ASSERT(error == 0); 1407 1408 *lopp = lop; 1409 return (NFS4_OK); 1410 1411 kill_new_lop: 1412 /* 1413 * A previous CLOSE was attempted but got EINTR, but the application 1414 * continued to use the unspecified state file descriptor. But now the 1415 * open stream is gone (which could also destroy the open owner), hence 1416 * we can no longer continue. The calling function should return EIO 1417 * to the application. 1418 */ 1419 NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug, 1420 (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created " 1421 "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp), 1422 (void *)(*ospp))); 1423 1424 nfs4_rnode_remove_lock_owner(rp, lop); 1425 stat = NFS4ERR_IO; 1426 1427 failed: 1428 lock_owner_rele(lop); 1429 if (*oopp) { 1430 open_owner_rele(*oopp); 1431 *oopp = NULL; 1432 } 1433 if (*ospp) { 1434 open_stream_rele(*ospp, rp); 1435 *ospp = NULL; 1436 } 1437 return (stat); 1438 } 1439 1440 /* 1441 * This function grabs a recently freed open owner off of the freed open 1442 * owner list if there is a match on the cred 'cr'. It returns NULL if no 1443 * such match is found. It will set the 'oo_ref_count' and 'oo_valid' back 1444 * to both 1 (sane values) in the case a match is found. 1445 */ 1446 static nfs4_open_owner_t * 1447 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp, 1448 mntinfo4_t *mi) 1449 { 1450 nfs4_open_owner_t *foop; 1451 1452 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1453 "find_freed_open_owner: cred %p", (void*)cr)); 1454 1455 ASSERT(mutex_owned(&mi->mi_lock)); 1456 ASSERT(mutex_owned(&bucketp->b_lock)); 1457 1458 /* got hash bucket, search through freed open owners */ 1459 for (foop = list_head(&mi->mi_foo_list); foop != NULL; 1460 foop = list_next(&mi->mi_foo_list, foop)) { 1461 if (!crcmp(foop->oo_cred, cr)) { 1462 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1463 "find_freed_open_owner: got a match open owner " 1464 "%p", (void *)foop)); 1465 foop->oo_ref_count = 1; 1466 foop->oo_valid = 1; 1467 list_remove(&mi->mi_foo_list, foop); 1468 mi->mi_foo_num--; 1469 1470 /* now add the struct into the cred hash table */ 1471 list_insert_head(&bucketp->b_oo_hash_list, foop); 1472 return (foop); 1473 } 1474 } 1475 1476 return (NULL); 1477 } 1478 1479 /* 1480 * Insert the newly freed 'oop' into the mi's freed oop list, 1481 * always at the head of the list. If we've already reached 1482 * our maximum allowed number of freed open owners (mi_foo_max), 1483 * then remove the LRU open owner on the list (namely the tail). 1484 */ 1485 static void 1486 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi) 1487 { 1488 nfs4_open_owner_t *lru_foop; 1489 1490 if (mi->mi_foo_num < mi->mi_foo_max) { 1491 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1492 "nfs4_free_open_owner: num free %d, max free %d, " 1493 "insert open owner %p for mntinfo4 %p", 1494 mi->mi_foo_num, mi->mi_foo_max, (void *)oop, 1495 (void *)mi)); 1496 list_insert_head(&mi->mi_foo_list, oop); 1497 mi->mi_foo_num++; 1498 return; 1499 } 1500 1501 /* need to replace a freed open owner */ 1502 1503 lru_foop = list_tail(&mi->mi_foo_list); 1504 1505 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1506 "nfs4_free_open_owner: destroy %p, insert %p", 1507 (void *)lru_foop, (void *)oop)); 1508 1509 list_remove(&mi->mi_foo_list, lru_foop); 1510 nfs4_destroy_open_owner(lru_foop); 1511 1512 /* head always has latest freed oop */ 1513 list_insert_head(&mi->mi_foo_list, oop); 1514 } 1515 1516 void 1517 nfs4_destroy_open_owner(nfs4_open_owner_t *oop) 1518 { 1519 ASSERT(oop != NULL); 1520 1521 crfree(oop->oo_cred); 1522 if (oop->oo_cred_otw) 1523 crfree(oop->oo_cred_otw); 1524 mutex_destroy(&oop->oo_lock); 1525 cv_destroy(&oop->oo_cv_seqid_sync); 1526 kmem_free(oop, sizeof (*oop)); 1527 } 1528 1529 seqid4 1530 nfs4_get_open_seqid(nfs4_open_owner_t *oop) 1531 { 1532 ASSERT(oop->oo_seqid_inuse); 1533 return (oop->oo_seqid); 1534 } 1535 1536 /* 1537 * This set's the open seqid for a <open owner/ mntinfo4> pair. 1538 */ 1539 void 1540 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop, 1541 nfs4_tag_type_t tag_type) 1542 { 1543 ASSERT(oop->oo_seqid_inuse); 1544 oop->oo_seqid = seqid; 1545 oop->oo_last_good_seqid = seqid; 1546 oop->oo_last_good_op = tag_type; 1547 } 1548 1549 /* 1550 * This bumps the current open seqid for the open owner 'oop'. 1551 */ 1552 void 1553 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop, 1554 nfs4_tag_type_t tag_type) 1555 { 1556 ASSERT(oop->oo_seqid_inuse); 1557 oop->oo_seqid++; 1558 oop->oo_last_good_seqid = oop->oo_seqid; 1559 oop->oo_last_good_op = tag_type; 1560 } 1561 1562 /* 1563 * If no open owner was provided, this function takes the cred to find an 1564 * open owner within the given mntinfo4_t. Either way we return the 1565 * open owner's OTW credential if it exists; otherwise returns the 1566 * supplied 'cr'. 1567 * 1568 * A hold is put on the returned credential, and it is up to the caller 1569 * to free the cred. 1570 */ 1571 cred_t * 1572 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop) 1573 { 1574 cred_t *ret_cr; 1575 nfs4_open_owner_t *oop = provided_oop; 1576 1577 if (oop == NULL) 1578 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1579 if (oop != NULL) { 1580 mutex_enter(&oop->oo_lock); 1581 if (oop->oo_cred_otw) 1582 ret_cr = oop->oo_cred_otw; 1583 else 1584 ret_cr = cr; 1585 crhold(ret_cr); 1586 mutex_exit(&oop->oo_lock); 1587 if (provided_oop == NULL) 1588 open_owner_rele(oop); 1589 } else { 1590 ret_cr = cr; 1591 crhold(ret_cr); 1592 } 1593 return (ret_cr); 1594 } 1595 1596 /* 1597 * Retrieves the next open stream in the rnode's list if an open stream 1598 * is provided; otherwise gets the first open stream in the list. 1599 * The open owner for that open stream is then retrieved, and if its 1600 * oo_cred_otw exists then it is returned; otherwise the provided 'cr' 1601 * is returned. *osp is set to the 'found' open stream. 1602 * 1603 * Note: we don't set *osp to the open stream retrieved via the 1604 * optimized check since that won't necessarily be at the beginning 1605 * of the rnode list, and if that osp doesn't work we'd like to 1606 * check _all_ open streams (starting from the beginning of the 1607 * rnode list). 1608 */ 1609 cred_t * 1610 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr, 1611 nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time) 1612 { 1613 nfs4_open_stream_t *next_osp = NULL; 1614 cred_t *ret_cr; 1615 1616 ASSERT(cr != NULL); 1617 /* 1618 * As an optimization, try to find the open owner 1619 * for the cred provided since that's most likely 1620 * to work. 1621 */ 1622 if (*first_time) { 1623 nfs4_open_owner_t *oop; 1624 1625 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp))); 1626 if (oop) { 1627 next_osp = find_open_stream(oop, rp); 1628 if (next_osp) 1629 mutex_exit(&next_osp->os_sync_lock); 1630 open_owner_rele(oop); 1631 } 1632 } 1633 if (next_osp == NULL) { 1634 int delay_rele = 0; 1635 *first_time = FALSE; 1636 1637 /* return the next open stream for this rnode */ 1638 mutex_enter(&rp->r_os_lock); 1639 /* Now, no one can add or delete to rp's open streams list */ 1640 1641 if (*osp) { 1642 next_osp = list_next(&rp->r_open_streams, *osp); 1643 /* 1644 * Delay the rele of *osp until after we drop 1645 * r_os_lock to not deadlock with oo_lock 1646 * via an open_stream_rele()->open_owner_rele(). 1647 */ 1648 delay_rele = 1; 1649 } else { 1650 next_osp = list_head(&rp->r_open_streams); 1651 } 1652 if (next_osp) { 1653 nfs4_open_stream_t *tmp_osp; 1654 1655 /* find the next valid open stream */ 1656 mutex_enter(&next_osp->os_sync_lock); 1657 while (next_osp && !next_osp->os_valid) { 1658 tmp_osp = 1659 list_next(&rp->r_open_streams, next_osp); 1660 mutex_exit(&next_osp->os_sync_lock); 1661 next_osp = tmp_osp; 1662 if (next_osp) 1663 mutex_enter(&next_osp->os_sync_lock); 1664 } 1665 if (next_osp) { 1666 next_osp->os_ref_count++; 1667 mutex_exit(&next_osp->os_sync_lock); 1668 } 1669 } 1670 mutex_exit(&rp->r_os_lock); 1671 if (delay_rele) 1672 open_stream_rele(*osp, rp); 1673 } 1674 1675 if (next_osp) { 1676 nfs4_open_owner_t *oop; 1677 1678 oop = next_osp->os_open_owner; 1679 mutex_enter(&oop->oo_lock); 1680 if (oop->oo_cred_otw) 1681 ret_cr = oop->oo_cred_otw; 1682 else 1683 ret_cr = cr; 1684 crhold(ret_cr); 1685 mutex_exit(&oop->oo_lock); 1686 if (*first_time) { 1687 open_stream_rele(next_osp, rp); 1688 *osp = NULL; 1689 } else 1690 *osp = next_osp; 1691 } else { 1692 /* just return the cred provided to us */ 1693 *last_time = TRUE; 1694 *osp = NULL; 1695 ret_cr = cr; 1696 crhold(ret_cr); 1697 } 1698 1699 *first_time = FALSE; 1700 return (ret_cr); 1701 } 1702 1703 void 1704 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp) 1705 { 1706 bzero(&sid_tp->d_sid, sizeof (stateid4)); 1707 bzero(&sid_tp->l_sid, sizeof (stateid4)); 1708 bzero(&sid_tp->o_sid, sizeof (stateid4)); 1709 sid_tp->cur_sid_type = NO_SID; 1710 } 1711 1712 void 1713 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp) 1714 { 1715 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1716 "nfs4_save_stateid: saved %s stateid", 1717 sid_tp->cur_sid_type == DEL_SID ? "delegation" : 1718 sid_tp->cur_sid_type == LOCK_SID ? "lock" : 1719 sid_tp->cur_sid_type == OPEN_SID ? "open" : "special")); 1720 1721 switch (sid_tp->cur_sid_type) { 1722 case DEL_SID: 1723 sid_tp->d_sid = *s1; 1724 break; 1725 case LOCK_SID: 1726 sid_tp->l_sid = *s1; 1727 break; 1728 case OPEN_SID: 1729 sid_tp->o_sid = *s1; 1730 break; 1731 case SPEC_SID: 1732 default: 1733 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal " 1734 "stateid type %d", sid_tp->cur_sid_type); 1735 } 1736 } 1737 1738 /* 1739 * We got NFS4ERR_BAD_SEQID. Setup some arguments to pass to recovery. 1740 * Caller is responsible for freeing. 1741 */ 1742 nfs4_bseqid_entry_t * 1743 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop, 1744 vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid) 1745 { 1746 nfs4_bseqid_entry_t *bsep; 1747 1748 bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP); 1749 bsep->bs_oop = oop; 1750 bsep->bs_lop = lop; 1751 bsep->bs_vp = vp; 1752 bsep->bs_pid = pid; 1753 bsep->bs_tag = tag; 1754 bsep->bs_seqid = seqid; 1755 1756 return (bsep); 1757 } 1758 1759 void 1760 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 1761 nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr, 1762 vnode_t *vp, int access_close, int deny_close) 1763 { 1764 lost_rqstp->lr_putfirst = FALSE; 1765 1766 ASSERT(vp != NULL); 1767 if (error == ETIMEDOUT || error == EINTR || 1768 NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 1769 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 1770 "nfs4open_dg_save_lost_rqst: error %d", error)); 1771 1772 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE; 1773 /* 1774 * The vp is held and rele'd via the recovery code. 1775 * See nfs4_save_lost_rqst. 1776 */ 1777 lost_rqstp->lr_vp = vp; 1778 lost_rqstp->lr_dvp = NULL; 1779 lost_rqstp->lr_oop = oop; 1780 lost_rqstp->lr_osp = osp; 1781 lost_rqstp->lr_lop = NULL; 1782 lost_rqstp->lr_cr = cr; 1783 lost_rqstp->lr_flk = NULL; 1784 lost_rqstp->lr_dg_acc = access_close; 1785 lost_rqstp->lr_dg_deny = deny_close; 1786 lost_rqstp->lr_putfirst = FALSE; 1787 } else { 1788 lost_rqstp->lr_op = 0; 1789 } 1790 } 1791 1792 /* 1793 * Change the access and deny bits of an OPEN. 1794 * If recovery is needed, *recov_credpp is set to the cred used OTW, 1795 * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW. 1796 */ 1797 void 1798 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop, 1799 nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp, 1800 nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp) 1801 { 1802 mntinfo4_t *mi; 1803 int downgrade_acc, downgrade_deny; 1804 int new_acc, new_deny; 1805 COMPOUND4args_clnt args; 1806 COMPOUND4res_clnt res; 1807 OPEN_DOWNGRADE4res *odg_res; 1808 nfs_argop4 argop[3]; 1809 nfs_resop4 *resop; 1810 rnode4_t *rp; 1811 bool_t needrecov = FALSE; 1812 int doqueue = 1; 1813 seqid4 seqid = 0; 1814 cred_t *cred_otw; 1815 hrtime_t t; 1816 1817 ASSERT(mutex_owned(&osp->os_sync_lock)); 1818 #if DEBUG 1819 mutex_enter(&oop->oo_lock); 1820 ASSERT(oop->oo_seqid_inuse); 1821 mutex_exit(&oop->oo_lock); 1822 #endif 1823 1824 1825 if (access_close == 0 && deny_close == 0) { 1826 nfs4_error_zinit(ep); 1827 return; 1828 } 1829 1830 cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop); 1831 1832 cred_retry: 1833 nfs4_error_zinit(ep); 1834 downgrade_acc = 0; 1835 downgrade_deny = 0; 1836 mi = VTOMI4(vp); 1837 rp = VTOR4(vp); 1838 1839 /* 1840 * Check to see if the open stream got closed before we go OTW, 1841 * now that we have acquired the 'os_sync_lock'. 1842 */ 1843 if (!osp->os_valid) { 1844 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1845 " open stream has already been closed, return success")); 1846 /* error has already been set */ 1847 goto no_args_out; 1848 } 1849 1850 /* If the file failed recovery, just quit. */ 1851 mutex_enter(&rp->r_statelock); 1852 if (rp->r_flags & R4RECOVERR) { 1853 mutex_exit(&rp->r_statelock); 1854 ep->error = EIO; 1855 goto no_args_out; 1856 } 1857 mutex_exit(&rp->r_statelock); 1858 1859 seqid = nfs4_get_open_seqid(oop) + 1; 1860 1861 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1862 "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"", 1863 access_close, osp->os_share_acc_read, osp->os_share_acc_write)); 1864 1865 /* If we're closing the last READ, need to downgrade */ 1866 if ((access_close & FREAD) && (osp->os_share_acc_read == 1)) 1867 downgrade_acc |= OPEN4_SHARE_ACCESS_READ; 1868 1869 /* if we're closing the last WRITE, need to downgrade */ 1870 if ((access_close & FWRITE) && (osp->os_share_acc_write == 1)) 1871 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE; 1872 1873 downgrade_deny = OPEN4_SHARE_DENY_NONE; 1874 1875 new_acc = 0; 1876 new_deny = 0; 1877 1878 /* set our new access and deny share bits */ 1879 if ((osp->os_share_acc_read > 0) && 1880 !(downgrade_acc & OPEN4_SHARE_ACCESS_READ)) 1881 new_acc |= OPEN4_SHARE_ACCESS_READ; 1882 if ((osp->os_share_acc_write > 0) && 1883 !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE)) 1884 new_acc |= OPEN4_SHARE_ACCESS_WRITE; 1885 1886 new_deny = OPEN4_SHARE_DENY_NONE; 1887 1888 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1889 "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny)); 1890 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1891 "new acc 0x%x deny 0x%x", new_acc, new_deny)); 1892 1893 /* 1894 * Check to see if we aren't actually doing any downgrade or 1895 * if this is the last 'close' but the file is still mmapped. 1896 * Skip this if this a lost request resend so we don't decrement 1897 * the osp's share counts more than once. 1898 */ 1899 if (!lrp && 1900 ((downgrade_acc == 0 && downgrade_deny == 0) || 1901 (new_acc == 0 && new_deny == 0))) { 1902 /* 1903 * No downgrade to do, but still need to 1904 * update osp's os_share_* counts. 1905 */ 1906 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, 1907 "nfs4_open_downgrade: just lower the osp's count by %s", 1908 (access_close & FREAD) && (access_close & FWRITE) ? 1909 "read and write" : (access_close & FREAD) ? "read" : 1910 (access_close & FWRITE) ? "write" : "bogus")); 1911 if (access_close & FREAD) 1912 osp->os_share_acc_read--; 1913 if (access_close & FWRITE) 1914 osp->os_share_acc_write--; 1915 osp->os_share_deny_none--; 1916 nfs4_error_zinit(ep); 1917 1918 goto no_args_out; 1919 } 1920 1921 if (osp->os_orig_oo_name != oop->oo_name) { 1922 ep->error = EIO; 1923 goto no_args_out; 1924 } 1925 1926 /* setup the COMPOUND args */ 1927 if (lrp) 1928 args.ctag = TAG_OPEN_DG_LOST; 1929 else 1930 args.ctag = TAG_OPEN_DG; 1931 1932 args.array_len = 3; 1933 args.array = argop; 1934 1935 /* putfh */ 1936 argop[0].argop = OP_CPUTFH; 1937 argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 1938 1939 argop[1].argop = OP_GETATTR; 1940 argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 1941 argop[1].nfs_argop4_u.opgetattr.mi = mi; 1942 1943 ASSERT(mutex_owned(&osp->os_sync_lock)); 1944 ASSERT(osp->os_delegation == FALSE); 1945 1946 /* open downgrade */ 1947 argop[2].argop = OP_OPEN_DOWNGRADE; 1948 argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid; 1949 argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc; 1950 argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny; 1951 argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid; 1952 1953 t = gethrtime(); 1954 1955 rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep); 1956 1957 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 1958 nfs4_set_open_seqid(seqid, oop, args.ctag); 1959 1960 if ((ep->error == EACCES || 1961 (ep->error == 0 && res.status == NFS4ERR_ACCESS)) && 1962 cred_otw != cr) { 1963 crfree(cred_otw); 1964 cred_otw = cr; 1965 crhold(cred_otw); 1966 if (!ep->error) 1967 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1968 goto cred_retry; 1969 } 1970 1971 needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp); 1972 1973 if (needrecov && recov_credpp) { 1974 *recov_credpp = cred_otw; 1975 crhold(*recov_credpp); 1976 if (recov_seqidp) 1977 *recov_seqidp = seqid; 1978 } 1979 1980 if (!ep->error && !res.status) { 1981 /* get the open downgrade results */ 1982 resop = &res.array[2]; 1983 odg_res = &resop->nfs_resop4_u.opopen_downgrade; 1984 1985 osp->open_stateid = odg_res->open_stateid; 1986 1987 /* set the open streams new access/deny bits */ 1988 if (access_close & FREAD) 1989 osp->os_share_acc_read--; 1990 if (access_close & FWRITE) 1991 osp->os_share_acc_write--; 1992 osp->os_share_deny_none--; 1993 osp->os_dc_openacc = new_acc; 1994 1995 nfs4_attr_cache(vp, 1996 &res.array[1].nfs_resop4_u.opgetattr.ga_res, 1997 t, cred_otw, TRUE, NULL); 1998 } 1999 2000 if (!ep->error) 2001 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2002 2003 no_args_out: 2004 crfree(cred_otw); 2005 } 2006 2007 /* 2008 * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out 2009 * because the filesystem was forcibly unmounted) then we don't know if we 2010 * potentially left state dangling on the server, therefore the recovery 2011 * framework makes this call to resend the OPEN request and then undo it. 2012 */ 2013 void 2014 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp, 2015 nfs4_error_t *ep) 2016 { 2017 COMPOUND4args_clnt args; 2018 COMPOUND4res_clnt res; 2019 nfs_argop4 argop[4]; 2020 GETFH4res *gf_res = NULL; 2021 OPEN4cargs *open_args; 2022 OPEN4res *op_res; 2023 char *destcfp; 2024 int destclen; 2025 nfs4_ga_res_t *garp; 2026 vnode_t *dvp = NULL, *vp = NULL; 2027 rnode4_t *rp = NULL, *drp = NULL; 2028 cred_t *cr = NULL; 2029 seqid4 seqid; 2030 nfs4_open_owner_t *oop = NULL; 2031 nfs4_open_stream_t *osp = NULL; 2032 component4 *srcfp; 2033 open_claim_type4 claim; 2034 mntinfo4_t *mi; 2035 int doqueue = 1; 2036 bool_t retry_open = FALSE; 2037 int created_osp = 0; 2038 hrtime_t t; 2039 char *failed_msg = ""; 2040 int fh_different; 2041 int reopen = 0; 2042 2043 nfs4_error_zinit(ep); 2044 2045 cr = resend_rqstp->lr_cr; 2046 dvp = resend_rqstp->lr_dvp; 2047 2048 vp = *vpp; 2049 if (vp) { 2050 ASSERT(nfs4_consistent_type(vp)); 2051 rp = VTOR4(vp); 2052 } 2053 2054 if (rp) { 2055 /* If the file failed recovery, just quit. */ 2056 mutex_enter(&rp->r_statelock); 2057 if (rp->r_flags & R4RECOVERR) { 2058 mutex_exit(&rp->r_statelock); 2059 ep->error = EIO; 2060 return; 2061 } 2062 mutex_exit(&rp->r_statelock); 2063 } 2064 2065 if (dvp) { 2066 drp = VTOR4(dvp); 2067 /* If the parent directory failed recovery, just quit. */ 2068 mutex_enter(&drp->r_statelock); 2069 if (drp->r_flags & R4RECOVERR) { 2070 mutex_exit(&drp->r_statelock); 2071 ep->error = EIO; 2072 return; 2073 } 2074 mutex_exit(&drp->r_statelock); 2075 } else 2076 reopen = 1; /* NULL dvp means this is a reopen */ 2077 2078 claim = resend_rqstp->lr_oclaim; 2079 ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR); 2080 2081 args.ctag = TAG_OPEN_LOST; 2082 args.array_len = 4; 2083 args.array = argop; 2084 2085 argop[0].argop = OP_CPUTFH; 2086 if (reopen) { 2087 ASSERT(vp != NULL); 2088 2089 mi = VTOMI4(vp); 2090 /* 2091 * if this is a file mount then 2092 * use the mntinfo parentfh 2093 */ 2094 argop[0].nfs_argop4_u.opcputfh.sfh = 2095 (vp->v_flag & VROOT) ? mi->mi_srvparentfh : 2096 VTOSV(vp)->sv_dfh; 2097 args.ctag = TAG_REOPEN_LOST; 2098 } else { 2099 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh; 2100 mi = VTOMI4(dvp); 2101 } 2102 2103 argop[1].argop = OP_COPEN; 2104 open_args = &argop[1].nfs_argop4_u.opcopen; 2105 open_args->claim = claim; 2106 2107 /* 2108 * If we sent over a OPEN with CREATE then the only 2109 * thing we care about is to not leave dangling state 2110 * on the server, not whether the file we potentially 2111 * created remains on the server. So even though the 2112 * lost open request specified a CREATE, we only wish 2113 * to do a non-CREATE OPEN. 2114 */ 2115 open_args->opentype = OPEN4_NOCREATE; 2116 2117 srcfp = &resend_rqstp->lr_ofile; 2118 destclen = srcfp->utf8string_len; 2119 destcfp = kmem_alloc(destclen + 1, KM_SLEEP); 2120 bcopy(srcfp->utf8string_val, destcfp, destclen); 2121 destcfp[destclen] = '\0'; 2122 if (claim == CLAIM_DELEGATE_CUR) { 2123 open_args->open_claim4_u.delegate_cur_info.delegate_stateid = 2124 resend_rqstp->lr_ostateid; 2125 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp; 2126 } else { 2127 open_args->open_claim4_u.cfile = destcfp; 2128 } 2129 2130 open_args->share_access = resend_rqstp->lr_oacc; 2131 open_args->share_deny = resend_rqstp->lr_odeny; 2132 oop = resend_rqstp->lr_oop; 2133 ASSERT(oop != NULL); 2134 2135 open_args->owner.clientid = mi2clientid(mi); 2136 /* this length never changes */ 2137 open_args->owner.owner_len = sizeof (oop->oo_name); 2138 open_args->owner.owner_val = 2139 kmem_alloc(open_args->owner.owner_len, KM_SLEEP); 2140 2141 ep->error = nfs4_start_open_seqid_sync(oop, mi); 2142 ASSERT(ep->error == 0); /* recov thread always succeeds */ 2143 /* 2144 * We can get away with not saving the seqid upon detection 2145 * of a lost request, and now just use the open owner's current 2146 * seqid since we only allow one op OTW per seqid and lost 2147 * requests are saved FIFO. 2148 */ 2149 seqid = nfs4_get_open_seqid(oop) + 1; 2150 open_args->seqid = seqid; 2151 2152 bcopy(&oop->oo_name, open_args->owner.owner_val, 2153 open_args->owner.owner_len); 2154 2155 /* getfh */ 2156 argop[2].argop = OP_GETFH; 2157 2158 /* Construct the getattr part of the compound */ 2159 argop[3].argop = OP_GETATTR; 2160 argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 2161 argop[3].nfs_argop4_u.opgetattr.mi = mi; 2162 2163 res.array = NULL; 2164 2165 t = gethrtime(); 2166 2167 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2168 2169 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 2170 nfs4_set_open_seqid(seqid, oop, args.ctag); 2171 2172 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2173 "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status)); 2174 2175 if (ep->error || res.status) 2176 goto err_out; 2177 2178 op_res = &res.array[1].nfs_resop4_u.opopen; 2179 gf_res = &res.array[2].nfs_resop4_u.opgetfh; 2180 garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res; 2181 2182 if (!vp) { 2183 int rnode_err = 0; 2184 nfs4_sharedfh_t *sfh; 2185 2186 /* 2187 * If we can't decode all the attributes they are not usable, 2188 * just make the vnode. 2189 */ 2190 2191 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp)); 2192 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp, 2193 fn_get(VTOSV(dvp)->sv_name, 2194 open_args->open_claim4_u.cfile, sfh)); 2195 sfh4_rele(&sfh); 2196 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2197 "nfs4_resend_open_otw: made vp %p for file %s", 2198 (void *)(*vpp), open_args->open_claim4_u.cfile)); 2199 2200 if (ep->error) 2201 PURGE_ATTRCACHE4(*vpp); 2202 2203 /* 2204 * For the newly created *vpp case, make sure the rnode 2205 * isn't bad before using it. 2206 */ 2207 mutex_enter(&(VTOR4(*vpp))->r_statelock); 2208 if (VTOR4(*vpp)->r_flags & R4RECOVERR) 2209 rnode_err = EIO; 2210 mutex_exit(&(VTOR4(*vpp))->r_statelock); 2211 2212 if (rnode_err) { 2213 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2214 "nfs4_resend_open_otw: rp %p is bad", 2215 (void *)VTOR4(*vpp))); 2216 ep->error = rnode_err; 2217 goto err_out; 2218 } 2219 2220 vp = *vpp; 2221 rp = VTOR4(vp); 2222 } 2223 2224 if (reopen) { 2225 /* 2226 * Check if the path we reopened really is the same 2227 * file. We could end up in a situation were the file 2228 * was removed and a new file created with the same name. 2229 */ 2230 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); 2231 fh_different = 2232 (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0); 2233 if (fh_different) { 2234 if (mi->mi_fh_expire_type == FH4_PERSISTENT || 2235 mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) { 2236 /* Oops, we don't have the same file */ 2237 if (mi->mi_fh_expire_type == FH4_PERSISTENT) 2238 failed_msg = 2239 "Couldn't reopen: Persistant " 2240 "file handle changed"; 2241 else 2242 failed_msg = 2243 "Couldn't reopen: Volatile " 2244 "(no expire on open) file handle " 2245 "changed"; 2246 2247 nfs4_end_open_seqid_sync(oop); 2248 kmem_free(destcfp, destclen + 1); 2249 nfs4args_copen_free(open_args); 2250 (void) xdr_free(xdr_COMPOUND4res_clnt, 2251 (caddr_t)&res); 2252 nfs_rw_exit(&mi->mi_fh_lock); 2253 nfs4_fail_recov(vp, failed_msg, ep->error, 2254 ep->stat); 2255 return; 2256 } else { 2257 /* 2258 * We have volatile file handles that don't 2259 * compare. If the fids are the same then we 2260 * assume that the file handle expired but the 2261 * renode still refers to the same file object. 2262 * 2263 * First check that we have fids or not. 2264 * If we don't we have a dumb server so we will 2265 * just assume every thing is ok for now. 2266 */ 2267 if (!ep->error && 2268 garp->n4g_va.va_mask & AT_NODEID && 2269 rp->r_attr.va_mask & AT_NODEID && 2270 rp->r_attr.va_nodeid != 2271 garp->n4g_va.va_nodeid) { 2272 /* 2273 * We have fids, but they don't 2274 * compare. So kill the file. 2275 */ 2276 failed_msg = 2277 "Couldn't reopen: file handle " 2278 "changed due to mismatched fids"; 2279 nfs4_end_open_seqid_sync(oop); 2280 kmem_free(destcfp, destclen + 1); 2281 nfs4args_copen_free(open_args); 2282 (void) xdr_free(xdr_COMPOUND4res_clnt, 2283 (caddr_t)&res); 2284 nfs_rw_exit(&mi->mi_fh_lock); 2285 nfs4_fail_recov(vp, failed_msg, 2286 ep->error, ep->stat); 2287 return; 2288 } else { 2289 /* 2290 * We have volatile file handles that 2291 * refers to the same file (at least 2292 * they have the same fid) or we don't 2293 * have fids so we can't tell. :(. We'll 2294 * be a kind and accepting client so 2295 * we'll update the rnode's file 2296 * handle with the otw handle. 2297 * 2298 * We need to drop mi->mi_fh_lock since 2299 * sh4_update acquires it. Since there 2300 * is only one recovery thread there is 2301 * no race. 2302 */ 2303 nfs_rw_exit(&mi->mi_fh_lock); 2304 sfh4_update(rp->r_fh, &gf_res->object); 2305 } 2306 } 2307 } else { 2308 nfs_rw_exit(&mi->mi_fh_lock); 2309 } 2310 } 2311 2312 ASSERT(nfs4_consistent_type(vp)); 2313 2314 if (op_res->rflags & OPEN4_RESULT_CONFIRM) 2315 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE, 2316 &retry_open, oop, TRUE, ep, NULL); 2317 if (ep->error || ep->stat) { 2318 nfs4_end_open_seqid_sync(oop); 2319 kmem_free(destcfp, destclen + 1); 2320 nfs4args_copen_free(open_args); 2321 if (!ep->error) 2322 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2323 return; 2324 } 2325 2326 if (reopen) { 2327 /* 2328 * Doing a reopen here so the osp should already exist. 2329 * If not, something changed or went very wrong. 2330 * 2331 * returns with 'os_sync_lock' held 2332 */ 2333 osp = find_open_stream(oop, rp); 2334 if (!osp) { 2335 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2336 "nfs4_resend_open_otw: couldn't find osp")); 2337 ep->error = EINVAL; 2338 goto err_out; 2339 } 2340 osp->os_open_ref_count++; 2341 } else { 2342 mutex_enter(&oop->oo_lock); 2343 oop->oo_just_created = NFS4_PERM_CREATED; 2344 mutex_exit(&oop->oo_lock); 2345 2346 /* returns with 'os_sync_lock' held */ 2347 osp = find_or_create_open_stream(oop, rp, &created_osp); 2348 if (!osp) { 2349 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2350 "nfs4_resend_open_otw: couldn't create osp")); 2351 ep->error = EINVAL; 2352 goto err_out; 2353 } 2354 } 2355 2356 osp->open_stateid = op_res->stateid; 2357 osp->os_delegation = FALSE; 2358 /* 2359 * Need to reset this bitfield for the possible case where we were 2360 * going to OTW CLOSE the file, got a non-recoverable error, and before 2361 * we could retry the CLOSE, OPENed the file again. 2362 */ 2363 ASSERT(osp->os_open_owner->oo_seqid_inuse); 2364 osp->os_final_close = 0; 2365 osp->os_force_close = 0; 2366 2367 if (!reopen) { 2368 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ) 2369 osp->os_share_acc_read++; 2370 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE) 2371 osp->os_share_acc_write++; 2372 osp->os_share_deny_none++; 2373 } 2374 2375 mutex_exit(&osp->os_sync_lock); 2376 if (created_osp) 2377 nfs4_inc_state_ref_count(mi); 2378 open_stream_rele(osp, rp); 2379 2380 nfs4_end_open_seqid_sync(oop); 2381 2382 /* accept delegation, if any */ 2383 nfs4_delegation_accept(rp, claim, op_res, garp, cr); 2384 2385 kmem_free(destcfp, destclen + 1); 2386 nfs4args_copen_free(open_args); 2387 2388 if (claim == CLAIM_DELEGATE_CUR) 2389 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL); 2390 else 2391 PURGE_ATTRCACHE4(vp); 2392 2393 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2394 2395 ASSERT(nfs4_consistent_type(vp)); 2396 2397 return; 2398 2399 err_out: 2400 nfs4_end_open_seqid_sync(oop); 2401 kmem_free(destcfp, destclen + 1); 2402 nfs4args_copen_free(open_args); 2403 if (!ep->error) 2404 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2405 } 2406