1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #include <nfs/nfs4_clnt.h> 31 #include <nfs/rnode4.h> 32 #include <sys/systm.h> 33 #include <sys/cmn_err.h> 34 #include <sys/atomic.h> 35 36 static void nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *); 37 static nfs4_open_owner_t *find_freed_open_owner(cred_t *, 38 nfs4_oo_hash_bucket_t *, mntinfo4_t *); 39 static open_delegation_type4 get_dtype(rnode4_t *); 40 41 #ifdef DEBUG 42 int nfs4_client_foo_debug = 0x0; 43 int nfs4_client_open_dg = 0x0; 44 /* 45 * If this is non-zero, the lockowner and openowner seqid sync primitives 46 * will intermittently return errors. 47 */ 48 static int seqid_sync_faults = 0; 49 #endif 50 51 stateid4 clnt_special0 = { 52 0, 53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 54 }; 55 56 stateid4 clnt_special1 = { 57 0xffffffff, 58 { 59 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 60 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 61 (char)0xff, (char)0xff, (char)0xff, (char)0xff 62 } 63 }; 64 65 /* finds hash bucket and locks it */ 66 static nfs4_oo_hash_bucket_t * 67 lock_bucket(cred_t *cr, mntinfo4_t *mi) 68 { 69 nfs4_oo_hash_bucket_t *bucketp; 70 uint32_t hash_key; 71 72 hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr)) 73 % NFS4_NUM_OO_BUCKETS; 74 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: " 75 "hash_key %d for cred %p", hash_key, (void*)cr)); 76 77 ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS); 78 ASSERT(mi != NULL); 79 ASSERT(mutex_owned(&mi->mi_lock)); 80 81 bucketp = &(mi->mi_oo_list[hash_key]); 82 mutex_enter(&bucketp->b_lock); 83 return (bucketp); 84 } 85 86 /* unlocks hash bucket pointed by bucket_ptr */ 87 static void 88 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp) 89 { 90 mutex_exit(&bucketp->b_lock); 91 } 92 93 /* 94 * Removes the lock owner from the rnode's lock_owners list and frees the 95 * corresponding reference. 96 */ 97 void 98 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop) 99 { 100 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 101 "nfs4_rnode_remove_lock_owner")); 102 103 mutex_enter(&rp->r_statev4_lock); 104 105 if (lop->lo_next_rnode == NULL) { 106 /* already removed from list */ 107 mutex_exit(&rp->r_statev4_lock); 108 return; 109 } 110 111 ASSERT(lop->lo_prev_rnode != NULL); 112 113 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 114 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 115 116 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 117 118 mutex_exit(&rp->r_statev4_lock); 119 120 /* 121 * This would be an appropriate place for 122 * RELEASE_LOCKOWNER. For now, this is overkill 123 * because in the common case, close is going to 124 * release any lockowners anyway. 125 */ 126 lock_owner_rele(lop); 127 } 128 129 /* 130 * Remove all lock owners from the rnode's lock_owners list. Frees up 131 * their references from the list. 132 */ 133 134 void 135 nfs4_flush_lock_owners(rnode4_t *rp) 136 { 137 nfs4_lock_owner_t *lop; 138 139 mutex_enter(&rp->r_statev4_lock); 140 while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) { 141 lop = rp->r_lo_head.lo_next_rnode; 142 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 143 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 144 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 145 lock_owner_rele(lop); 146 } 147 mutex_exit(&rp->r_statev4_lock); 148 } 149 150 void 151 nfs4_clear_open_streams(rnode4_t *rp) 152 { 153 nfs4_open_stream_t *osp; 154 155 mutex_enter(&rp->r_os_lock); 156 while ((osp = list_head(&rp->r_open_streams)) != NULL) { 157 open_owner_rele(osp->os_open_owner); 158 list_remove(&rp->r_open_streams, osp); 159 mutex_destroy(&osp->os_sync_lock); 160 osp->os_open_owner = NULL; 161 kmem_free(osp, sizeof (*osp)); 162 } 163 mutex_exit(&rp->r_os_lock); 164 } 165 166 void 167 open_owner_hold(nfs4_open_owner_t *oop) 168 { 169 mutex_enter(&oop->oo_lock); 170 oop->oo_ref_count++; 171 mutex_exit(&oop->oo_lock); 172 } 173 174 /* 175 * Frees the open owner if the ref count hits zero. 176 */ 177 void 178 open_owner_rele(nfs4_open_owner_t *oop) 179 { 180 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 181 "open_owner_rele")); 182 183 mutex_enter(&oop->oo_lock); 184 oop->oo_ref_count--; 185 if (oop->oo_ref_count == 0) { 186 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 187 "open_owner_rele: freeing open owner")); 188 oop->oo_valid = 0; 189 mutex_exit(&oop->oo_lock); 190 /* 191 * Ok, we don't destroy the open owner, nor do we put it on 192 * the mntinfo4's free list just yet. We are lazy about it 193 * and let callers to find_open_owner() do that to keep locking 194 * simple. 195 */ 196 } else { 197 mutex_exit(&oop->oo_lock); 198 } 199 } 200 201 void 202 open_stream_hold(nfs4_open_stream_t *osp) 203 { 204 mutex_enter(&osp->os_sync_lock); 205 osp->os_ref_count++; 206 mutex_exit(&osp->os_sync_lock); 207 } 208 209 /* 210 * Frees the open stream and removes it from the rnode4's open streams list if 211 * the ref count drops to zero. 212 */ 213 void 214 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp) 215 { 216 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 217 "open_stream_rele")); 218 219 ASSERT(!mutex_owned(&rp->r_os_lock)); 220 221 mutex_enter(&osp->os_sync_lock); 222 ASSERT(osp->os_ref_count > 0); 223 osp->os_ref_count--; 224 if (osp->os_ref_count == 0) { 225 nfs4_open_owner_t *tmp_oop; 226 227 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 228 "open_stream_rele: freeing open stream")); 229 osp->os_valid = 0; 230 tmp_oop = osp->os_open_owner; 231 mutex_exit(&osp->os_sync_lock); 232 233 /* now see if we need to destroy the open owner */ 234 open_owner_rele(tmp_oop); 235 236 mutex_enter(&rp->r_os_lock); 237 list_remove(&rp->r_open_streams, osp); 238 mutex_exit(&rp->r_os_lock); 239 240 /* free up osp */ 241 mutex_destroy(&osp->os_sync_lock); 242 osp->os_open_owner = NULL; 243 kmem_free(osp, sizeof (*osp)); 244 } else { 245 mutex_exit(&osp->os_sync_lock); 246 } 247 } 248 249 void 250 lock_owner_hold(nfs4_lock_owner_t *lop) 251 { 252 mutex_enter(&lop->lo_lock); 253 lop->lo_ref_count++; 254 mutex_exit(&lop->lo_lock); 255 } 256 257 /* 258 * Frees the lock owner if the ref count hits zero and 259 * the structure no longer has no locks. 260 */ 261 void 262 lock_owner_rele(nfs4_lock_owner_t *lop) 263 { 264 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 265 "lock_owner_rele")); 266 267 mutex_enter(&lop->lo_lock); 268 lop->lo_ref_count--; 269 if (lop->lo_ref_count == 0) { 270 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 271 "lock_owner_rele: freeing lock owner: " 272 "%x", lop->lo_pid)); 273 lop->lo_valid = 0; 274 /* 275 * If there are no references, the lock_owner should 276 * already be off the rnode's list. 277 */ 278 ASSERT(lop->lo_next_rnode == NULL); 279 ASSERT(lop->lo_prev_rnode == NULL); 280 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE)); 281 ASSERT(lop->lo_seqid_holder == NULL); 282 mutex_exit(&lop->lo_lock); 283 284 /* free up lop */ 285 cv_destroy(&lop->lo_cv_seqid_sync); 286 mutex_destroy(&lop->lo_lock); 287 kmem_free(lop, sizeof (*lop)); 288 } else { 289 mutex_exit(&lop->lo_lock); 290 } 291 } 292 293 /* 294 * This increments the open owner ref count if found. 295 * The argument 'just_created' determines whether we are looking for open 296 * owners with the 'oo_just_created' flag set or not. 297 */ 298 nfs4_open_owner_t * 299 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi) 300 { 301 nfs4_open_owner_t *oop = NULL, *next_oop; 302 nfs4_oo_hash_bucket_t *bucketp; 303 304 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 305 "find_open_owner: cred %p, just_created %d", 306 (void*)cr, just_created)); 307 308 ASSERT(mi != NULL); 309 ASSERT(mutex_owned(&mi->mi_lock)); 310 311 bucketp = lock_bucket(cr, mi); 312 313 /* got hash bucket, search through open owners */ 314 for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) { 315 mutex_enter(&oop->oo_lock); 316 if (!crcmp(oop->oo_cred, cr) && 317 (oop->oo_just_created == just_created || 318 just_created == NFS4_JUST_CREATED)) { 319 /* match */ 320 if (oop->oo_valid == 0) { 321 /* reactivate the open owner */ 322 oop->oo_valid = 1; 323 ASSERT(oop->oo_ref_count == 0); 324 } 325 oop->oo_ref_count++; 326 mutex_exit(&oop->oo_lock); 327 unlock_bucket(bucketp); 328 return (oop); 329 } 330 next_oop = list_next(&bucketp->b_oo_hash_list, oop); 331 if (oop->oo_valid == 0) { 332 list_remove(&bucketp->b_oo_hash_list, oop); 333 334 /* 335 * Now we go ahead and put this open owner 336 * on the freed list. This is our lazy method. 337 */ 338 nfs4_free_open_owner(oop, mi); 339 } 340 341 mutex_exit(&oop->oo_lock); 342 oop = next_oop; 343 } 344 345 /* search through recently freed open owners */ 346 oop = find_freed_open_owner(cr, bucketp, mi); 347 348 unlock_bucket(bucketp); 349 350 return (oop); 351 } 352 353 nfs4_open_owner_t * 354 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi) 355 { 356 nfs4_open_owner_t *oop; 357 358 mutex_enter(&mi->mi_lock); 359 oop = find_open_owner_nolock(cr, just_created, mi); 360 mutex_exit(&mi->mi_lock); 361 362 return (oop); 363 } 364 365 /* 366 * This increments osp's ref count if found. 367 * Returns with 'os_sync_lock' held. 368 */ 369 nfs4_open_stream_t * 370 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 371 { 372 nfs4_open_stream_t *osp; 373 374 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 375 "find_open_stream")); 376 377 mutex_enter(&rp->r_os_lock); 378 /* Now, no one can add or delete to rp's open streams list */ 379 for (osp = list_head(&rp->r_open_streams); osp != NULL; 380 osp = list_next(&rp->r_open_streams, osp)) { 381 mutex_enter(&osp->os_sync_lock); 382 if (osp->os_open_owner == oop && osp->os_valid != 0) { 383 /* match */ 384 NFS4_DEBUG(nfs4_client_state_debug, 385 (CE_NOTE, "find_open_stream " 386 "got a match")); 387 388 osp->os_ref_count++; 389 mutex_exit(&rp->r_os_lock); 390 return (osp); 391 } 392 mutex_exit(&osp->os_sync_lock); 393 } 394 395 mutex_exit(&rp->r_os_lock); 396 return (NULL); 397 } 398 399 /* 400 * Find the lock owner for the given file and process ID. If "which" is 401 * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid 402 * from the server. 403 * 404 * This increments the lock owner's ref count if found. Returns NULL if 405 * there was no match. 406 */ 407 nfs4_lock_owner_t * 408 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which) 409 { 410 nfs4_lock_owner_t *lop, *next_lop; 411 412 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 413 "find_lock_owner: pid %x, which %d", pid, which)); 414 415 ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID); 416 417 /* search by pid */ 418 mutex_enter(&rp->r_statev4_lock); 419 420 lop = rp->r_lo_head.lo_next_rnode; 421 while (lop != &rp->r_lo_head) { 422 mutex_enter(&lop->lo_lock); 423 if (lop->lo_pid == pid && lop->lo_valid != 0 && 424 !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) { 425 if (which == LOWN_ANY || 426 lop->lo_just_created != NFS4_JUST_CREATED) { 427 /* Found a matching lock owner */ 428 NFS4_DEBUG(nfs4_client_state_debug, 429 (CE_NOTE, "find_lock_owner: " 430 "got a match")); 431 432 lop->lo_ref_count++; 433 mutex_exit(&lop->lo_lock); 434 mutex_exit(&rp->r_statev4_lock); 435 return (lop); 436 } 437 } 438 next_lop = lop->lo_next_rnode; 439 mutex_exit(&lop->lo_lock); 440 lop = next_lop; 441 } 442 443 mutex_exit(&rp->r_statev4_lock); 444 return (NULL); 445 } 446 447 /* 448 * This returns the delegation stateid as 'sid'. Returns 1 if a successful 449 * delegation stateid was found, otherwise returns 0. 450 */ 451 452 static int 453 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid) 454 { 455 ASSERT(!mutex_owned(&rp->r_statev4_lock)); 456 457 mutex_enter(&rp->r_statev4_lock); 458 if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) || 459 (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) && 460 !rp->r_deleg_return_pending) { 461 462 *sid = rp->r_deleg_stateid; 463 mutex_exit(&rp->r_statev4_lock); 464 return (1); 465 } 466 mutex_exit(&rp->r_statev4_lock); 467 return (0); 468 } 469 470 /* 471 * This returns the lock stateid as 'sid'. Returns 1 if a successful lock 472 * stateid was found, otherwise returns 0. 473 */ 474 static int 475 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid) 476 { 477 nfs4_lock_owner_t *lop; 478 479 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 480 481 if (lop) { 482 /* 483 * Found a matching lock owner, so use a lock 484 * stateid rather than an open stateid. 485 */ 486 mutex_enter(&lop->lo_lock); 487 *sid = lop->lock_stateid; 488 mutex_exit(&lop->lo_lock); 489 lock_owner_rele(lop); 490 return (1); 491 } 492 493 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 494 "nfs4_get_lock_stateid: no lop")); 495 return (0); 496 } 497 498 /* 499 * This returns the open stateid as 'sid'. Returns 1 if a successful open 500 * stateid was found, otherwise returns 0. 501 * 502 * Once the stateid is returned to the caller, it is no longer protected; 503 * so the caller must be prepared to handle OLD/BAD_STATEID where 504 * appropiate. 505 */ 506 static int 507 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid) 508 { 509 nfs4_open_owner_t *oop; 510 nfs4_open_stream_t *osp; 511 512 ASSERT(mi != NULL); 513 514 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 515 if (!oop) { 516 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 517 "nfs4_get_open_stateid: no oop")); 518 return (0); 519 } 520 521 osp = find_open_stream(oop, rp); 522 open_owner_rele(oop); 523 if (!osp) { 524 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 525 "nfs4_get_open_stateid: no osp")); 526 return (0); 527 } 528 529 if (osp->os_failed_reopen) { 530 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 531 "nfs4_get_open_stateid: osp %p failed reopen", 532 (void *)osp)); 533 mutex_exit(&osp->os_sync_lock); 534 open_stream_rele(osp, rp); 535 return (0); 536 } 537 *sid = osp->open_stateid; 538 mutex_exit(&osp->os_sync_lock); 539 open_stream_rele(osp, rp); 540 return (1); 541 } 542 543 /* 544 * Returns the delegation stateid if this 'op' is OP_WRITE and the 545 * delegation we hold is a write delegation, OR this 'op' is not 546 * OP_WRITE and we have a delegation held (read or write), otherwise 547 * returns the lock stateid if there is a lock owner, otherwise 548 * returns the open stateid if there is a open stream, otherwise 549 * returns special stateid <seqid = 0, other = 0>. 550 * 551 * Used for WRITE operations. 552 */ 553 stateid4 554 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 555 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp) 556 { 557 stateid4 sid; 558 559 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 560 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 561 sid_tp->cur_sid_type = DEL_SID; 562 return (sid); 563 } 564 } 565 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 566 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 567 sid_tp->cur_sid_type = LOCK_SID; 568 return (sid); 569 } 570 } 571 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 572 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 573 sid_tp->cur_sid_type = OPEN_SID; 574 return (sid); 575 } 576 } 577 bzero(&sid, sizeof (stateid4)); 578 sid_tp->cur_sid_type = SPEC_SID; 579 return (sid); 580 } 581 582 /* 583 * Returns the delegation stateid if this 'op' is OP_WRITE and the 584 * delegation we hold is a write delegation, OR this 'op' is not 585 * OP_WRITE and we have a delegation held (read or write), otherwise 586 * returns the lock stateid if there is a lock owner, otherwise 587 * returns the open stateid if there is a open stream, otherwise 588 * returns special stateid <seqid = 0, other = 0>. 589 * 590 * This also updates which stateid we are using in 'sid_tp', skips 591 * previously attempted stateids, and skips checking higher priority 592 * stateids than the current level as dictated by 'sid_tp->cur_sid_type' 593 * for async reads. 594 * 595 * Used for READ and SETATTR operations. 596 */ 597 stateid4 598 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 599 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read) 600 { 601 stateid4 sid; 602 603 /* 604 * For asynchronous READs, do not attempt to retry from the start of 605 * the stateid priority list, just continue from where you last left 606 * off. 607 */ 608 if (async_read) { 609 switch (sid_tp->cur_sid_type) { 610 case NO_SID: 611 break; 612 case DEL_SID: 613 goto lock_stateid; 614 case LOCK_SID: 615 goto open_stateid; 616 case OPEN_SID: 617 goto special_stateid; 618 case SPEC_SID: 619 default: 620 cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current " 621 "stateid type %d", sid_tp->cur_sid_type); 622 } 623 } 624 625 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 626 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 627 sid_tp->cur_sid_type = DEL_SID; 628 return (sid); 629 } 630 } 631 lock_stateid: 632 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 633 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 634 sid_tp->cur_sid_type = LOCK_SID; 635 return (sid); 636 } 637 } 638 open_stateid: 639 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 640 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 641 sid_tp->cur_sid_type = OPEN_SID; 642 return (sid); 643 } 644 } 645 special_stateid: 646 bzero(&sid, sizeof (stateid4)); 647 sid_tp->cur_sid_type = SPEC_SID; 648 return (sid); 649 } 650 651 void 652 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid) 653 { 654 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 655 "nfs4_set_lock_stateid")); 656 657 ASSERT(lop); 658 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 659 660 mutex_enter(&lop->lo_lock); 661 lop->lock_stateid = stateid; 662 mutex_exit(&lop->lo_lock); 663 } 664 665 /* 666 * Sequence number used when a new open owner is needed. 667 * This is used so as to not confuse the server. Since a open owner 668 * is based off of cred, a cred could be re-used quickly, and the server 669 * may not release all state for a cred. 670 */ 671 static uint64_t open_owner_seq_num = 0; 672 673 uint64_t 674 nfs4_get_new_oo_name(void) 675 { 676 return (atomic_add_64_nv(&open_owner_seq_num, 1)); 677 } 678 679 /* 680 * Create a new open owner and add it to the open owner hash table. 681 */ 682 nfs4_open_owner_t * 683 create_open_owner(cred_t *cr, mntinfo4_t *mi) 684 { 685 nfs4_open_owner_t *oop; 686 nfs4_oo_hash_bucket_t *bucketp; 687 688 oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP); 689 /* 690 * Make sure the cred doesn't go away when we put this open owner 691 * on the free list, as well as make crcmp() a valid check. 692 */ 693 crhold(cr); 694 oop->oo_cred = cr; 695 mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL); 696 oop->oo_ref_count = 1; 697 oop->oo_valid = 1; 698 oop->oo_just_created = NFS4_JUST_CREATED; 699 oop->oo_seqid = 0; 700 oop->oo_seqid_inuse = 0; 701 oop->oo_last_good_seqid = 0; 702 oop->oo_last_good_op = TAG_NONE; 703 oop->oo_cred_otw = NULL; 704 cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 705 706 /* 707 * A Solaris open_owner is <oo_seq_num> 708 */ 709 oop->oo_name = nfs4_get_new_oo_name(); 710 711 /* now add the struct into the cred hash table */ 712 ASSERT(mutex_owned(&mi->mi_lock)); 713 bucketp = lock_bucket(cr, mi); 714 list_insert_head(&bucketp->b_oo_hash_list, oop); 715 unlock_bucket(bucketp); 716 717 return (oop); 718 } 719 720 /* 721 * Create a new open stream and it to the rnode's list. 722 * Increments the ref count on oop. 723 * Returns with 'os_sync_lock' held. 724 */ 725 nfs4_open_stream_t * 726 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 727 { 728 nfs4_open_stream_t *osp; 729 730 #ifdef DEBUG 731 mutex_enter(&oop->oo_lock); 732 ASSERT(oop->oo_seqid_inuse); 733 mutex_exit(&oop->oo_lock); 734 #endif 735 736 osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP); 737 osp->os_open_ref_count = 1; 738 osp->os_mapcnt = 0; 739 osp->os_ref_count = 2; 740 osp->os_valid = 1; 741 osp->os_open_owner = oop; 742 osp->os_orig_oo_name = oop->oo_name; 743 bzero(&osp->open_stateid, sizeof (stateid4)); 744 osp->os_share_acc_read = 0; 745 osp->os_share_acc_write = 0; 746 osp->os_mmap_read = 0; 747 osp->os_mmap_write = 0; 748 osp->os_share_deny_none = 0; 749 osp->os_share_deny_read = 0; 750 osp->os_share_deny_write = 0; 751 osp->os_delegation = 0; 752 osp->os_dc_openacc = 0; 753 osp->os_final_close = 0; 754 osp->os_pending_close = 0; 755 osp->os_failed_reopen = 0; 756 osp->os_force_close = 0; 757 mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL); 758 759 /* open owner gets a reference */ 760 open_owner_hold(oop); 761 762 /* now add the open stream to rp */ 763 mutex_enter(&rp->r_os_lock); 764 mutex_enter(&osp->os_sync_lock); 765 list_insert_head(&rp->r_open_streams, osp); 766 mutex_exit(&rp->r_os_lock); 767 768 return (osp); 769 } 770 771 /* 772 * Returns an open stream with 'os_sync_lock' held. 773 * If the open stream is found (rather than created), its 774 * 'os_open_ref_count' is bumped. 775 * 776 * There is no race with two threads entering this function 777 * and creating two open streams for the same <oop, rp> pair. 778 * This is because the open seqid sync must be acquired, thus 779 * only allowing one thread in at a time. 780 */ 781 nfs4_open_stream_t * 782 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp, 783 int *created_osp) 784 { 785 nfs4_open_stream_t *osp; 786 787 #ifdef DEBUG 788 mutex_enter(&oop->oo_lock); 789 ASSERT(oop->oo_seqid_inuse); 790 mutex_exit(&oop->oo_lock); 791 #endif 792 793 osp = find_open_stream(oop, rp); 794 if (!osp) { 795 osp = create_open_stream(oop, rp); 796 if (osp) 797 *created_osp = 1; 798 } else { 799 *created_osp = 0; 800 osp->os_open_ref_count++; 801 } 802 803 return (osp); 804 } 805 806 static uint64_t lock_owner_seq_num = 0; 807 808 /* 809 * Create a new lock owner and add it to the rnode's list. 810 * Assumes the rnode's r_statev4_lock is held. 811 * The created lock owner has a reference count of 2: one for the list and 812 * one for the caller to use. Returns the lock owner locked down. 813 */ 814 nfs4_lock_owner_t * 815 create_lock_owner(rnode4_t *rp, pid_t pid) 816 { 817 nfs4_lock_owner_t *lop; 818 819 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 820 "create_lock_owner: pid %x", pid)); 821 822 ASSERT(mutex_owned(&rp->r_statev4_lock)); 823 824 lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP); 825 lop->lo_ref_count = 2; 826 lop->lo_valid = 1; 827 bzero(&lop->lock_stateid, sizeof (stateid4)); 828 lop->lo_pid = pid; 829 lop->lock_seqid = 0; 830 lop->lo_pending_rqsts = 0; 831 lop->lo_just_created = NFS4_JUST_CREATED; 832 lop->lo_flags = 0; 833 lop->lo_seqid_holder = NULL; 834 835 /* 836 * A Solaris lock_owner is <seq_num><pid> 837 */ 838 lop->lock_owner_name.ln_seq_num = 839 atomic_add_64_nv(&lock_owner_seq_num, 1); 840 lop->lock_owner_name.ln_pid = pid; 841 842 cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 843 mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL); 844 845 mutex_enter(&lop->lo_lock); 846 847 /* now add the lock owner to rp */ 848 lop->lo_prev_rnode = &rp->r_lo_head; 849 lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode; 850 rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop; 851 rp->r_lo_head.lo_next_rnode = lop; 852 853 return (lop); 854 855 } 856 857 /* 858 * This sets the lock seqid of a lock owner. 859 */ 860 void 861 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop) 862 { 863 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 864 "nfs4_set_lock_seqid")); 865 866 ASSERT(lop != NULL); 867 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 868 869 lop->lock_seqid = seqid; 870 } 871 872 static void 873 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid) 874 { 875 nfs4_lo_name_t *cast_namep; 876 877 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 878 "nfs4_set_new_lock_owner_args")); 879 880 owner->owner_len = sizeof (*cast_namep); 881 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 882 /* 883 * A Solaris lock_owner is <seq_num><pid> 884 */ 885 cast_namep = (nfs4_lo_name_t *)owner->owner_val; 886 cast_namep->ln_seq_num = atomic_add_64_nv(&lock_owner_seq_num, 1); 887 cast_namep->ln_pid = pid; 888 } 889 890 /* 891 * Fill in the lock owner args. 892 */ 893 void 894 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid) 895 { 896 nfs4_lock_owner_t *lop; 897 898 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 899 "nfs4_setlockowner_args")); 900 901 /* This increments lop's ref count */ 902 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 903 904 if (!lop) 905 goto make_up_args; 906 907 mutex_enter(&lop->lo_lock); 908 owner->owner_len = sizeof (lop->lock_owner_name); 909 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 910 bcopy(&lop->lock_owner_name, owner->owner_val, 911 owner->owner_len); 912 mutex_exit(&lop->lo_lock); 913 lock_owner_rele(lop); 914 return; 915 916 make_up_args: 917 nfs4_set_new_lock_owner_args(owner, pid); 918 } 919 920 /* 921 * This ends our use of the open owner's open seqid by setting 922 * the appropiate flags and issuing a cv_signal to wake up another 923 * thread waiting to use the open seqid. 924 */ 925 926 void 927 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop) 928 { 929 mutex_enter(&oop->oo_lock); 930 ASSERT(oop->oo_seqid_inuse); 931 oop->oo_seqid_inuse = 0; 932 cv_broadcast(&oop->oo_cv_seqid_sync); 933 mutex_exit(&oop->oo_lock); 934 } 935 936 /* 937 * This starts our use of the open owner's open seqid by setting 938 * the oo_seqid_inuse to true. We will wait (forever) with a 939 * cv_wait() until we are woken up. 940 * 941 * Return values: 942 * 0 no problems 943 * EAGAIN caller should retry (like a recovery retry) 944 */ 945 int 946 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi) 947 { 948 int error = 0; 949 #ifdef DEBUG 950 static int ops = 0; /* fault injection */ 951 #endif 952 953 #ifdef DEBUG 954 if (seqid_sync_faults && curthread != mi->mi_recovthread && 955 ++ops % 5 == 0) 956 return (EAGAIN); 957 #endif 958 959 mutex_enter(&mi->mi_lock); 960 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 961 curthread != mi->mi_recovthread) 962 error = EAGAIN; 963 mutex_exit(&mi->mi_lock); 964 if (error != 0) 965 goto done; 966 967 mutex_enter(&oop->oo_lock); 968 969 while (oop->oo_seqid_inuse) { 970 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 971 "nfs4_start_open_seqid_sync waiting on cv")); 972 973 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock); 974 } 975 976 oop->oo_seqid_inuse = 1; 977 978 mutex_exit(&oop->oo_lock); 979 980 mutex_enter(&mi->mi_lock); 981 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 982 curthread != mi->mi_recovthread) 983 error = EAGAIN; 984 mutex_exit(&mi->mi_lock); 985 986 if (error == EAGAIN) 987 nfs4_end_open_seqid_sync(oop); 988 989 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 990 "nfs4_start_open_seqid_sync: error=%d", error)); 991 992 done: 993 return (error); 994 } 995 996 #ifdef DEBUG 997 int bypass_otw[2]; 998 #endif 999 1000 /* 1001 * Checks to see if the OPEN OTW is necessary that is, if it's already 1002 * been opened with the same access and deny bits we are now asking for. 1003 * Note, this assumes that *vpp is a rnode. 1004 */ 1005 int 1006 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp, 1007 int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp) 1008 { 1009 rnode4_t *rp; 1010 nfs4_open_stream_t *osp; 1011 open_delegation_type4 dt; 1012 1013 rp = VTOR4(vp); 1014 1015 /* 1016 * Grab the delegation type. This function is protected against 1017 * the delegation being returned by virtue of start_op (called 1018 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode, 1019 * delegreturn requires this lock in write mode to proceed. 1020 */ 1021 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER)); 1022 dt = get_dtype(rp); 1023 1024 /* returns with 'os_sync_lock' held */ 1025 osp = find_open_stream(oop, rp); 1026 1027 if (osp) { 1028 uint32_t do_otw = 0; 1029 1030 if (osp->os_failed_reopen) { 1031 NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE, 1032 "nfs4_is_otw_open_necessary: os_failed_reopen " 1033 "set on osp %p, cr %p, rp %s", (void *)osp, 1034 (void *)osp->os_open_owner->oo_cred, 1035 rnode4info(rp))); 1036 do_otw = 1; 1037 } 1038 1039 /* 1040 * check access/deny bits 1041 */ 1042 if (!do_otw && (flag & FREAD)) 1043 if (osp->os_share_acc_read == 0 && 1044 dt == OPEN_DELEGATE_NONE) 1045 do_otw = 1; 1046 1047 if (!do_otw && (flag & FWRITE)) 1048 if (osp->os_share_acc_write == 0 && 1049 dt != OPEN_DELEGATE_WRITE) 1050 do_otw = 1; 1051 1052 if (!do_otw) { 1053 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1054 "nfs4_is_otw_open_necessary: can skip this " 1055 "open OTW")); 1056 if (!just_been_created) { 1057 osp->os_open_ref_count++; 1058 if (flag & FREAD) 1059 osp->os_share_acc_read++; 1060 if (flag & FWRITE) 1061 osp->os_share_acc_write++; 1062 osp->os_share_deny_none++; 1063 } 1064 1065 /* 1066 * Need to reset this bitfield for the possible case 1067 * where we were going to OTW CLOSE the file, got a 1068 * non-recoverable error, and before we could retry 1069 * the CLOSE, OPENed the file again. 1070 */ 1071 ASSERT(osp->os_open_owner->oo_seqid_inuse); 1072 osp->os_final_close = 0; 1073 osp->os_force_close = 0; 1074 1075 mutex_exit(&osp->os_sync_lock); 1076 open_stream_rele(osp, rp); 1077 1078 #ifdef DEBUG 1079 bypass_otw[0]++; 1080 #endif 1081 1082 *errorp = 0; 1083 return (0); 1084 } 1085 mutex_exit(&osp->os_sync_lock); 1086 open_stream_rele(osp, rp); 1087 1088 } else if (dt != OPEN_DELEGATE_NONE) { 1089 /* 1090 * Even if there isn't an open_stream yet, we may still be 1091 * able to bypass the otw open if the client owns a delegation. 1092 * 1093 * If you are asking for for WRITE, but I only have 1094 * a read delegation, then you still have to go otw. 1095 */ 1096 1097 if (flag & FWRITE && dt == OPEN_DELEGATE_READ) 1098 return (1); 1099 1100 /* 1101 * TODO - evaluate the nfsace4 1102 */ 1103 1104 /* 1105 * Check the access flags to make sure the caller 1106 * had permission. 1107 */ 1108 if (flag & FREAD && !(acc & VREAD)) 1109 return (1); 1110 1111 if (flag & FWRITE && !(acc & VWRITE)) 1112 return (1); 1113 1114 /* 1115 * create_open_stream will add a reference to oop, 1116 * this will prevent the open_owner_rele done in 1117 * nfs4open_otw from destroying the open_owner. 1118 */ 1119 1120 /* returns with 'os_sync_lock' held */ 1121 osp = create_open_stream(oop, rp); 1122 if (osp == NULL) 1123 return (1); 1124 1125 osp->open_stateid = rp->r_deleg_stateid; 1126 osp->os_delegation = 1; 1127 1128 if (flag & FREAD) 1129 osp->os_share_acc_read++; 1130 if (flag & FWRITE) 1131 osp->os_share_acc_write++; 1132 1133 osp->os_share_deny_none++; 1134 mutex_exit(&osp->os_sync_lock); 1135 1136 open_stream_rele(osp, rp); 1137 1138 mutex_enter(&oop->oo_lock); 1139 oop->oo_just_created = NFS4_PERM_CREATED; 1140 mutex_exit(&oop->oo_lock); 1141 1142 ASSERT(rsp != NULL); 1143 if (rsp->rs_sp != NULL) { 1144 mutex_enter(&rsp->rs_sp->s_lock); 1145 nfs4_inc_state_ref_count_nolock(rsp->rs_sp, 1146 VTOMI4(vp)); 1147 mutex_exit(&rsp->rs_sp->s_lock); 1148 } 1149 #ifdef DEBUG 1150 bypass_otw[1]++; 1151 #endif 1152 1153 *errorp = 0; 1154 return (0); 1155 } 1156 1157 return (1); 1158 } 1159 1160 static open_delegation_type4 1161 get_dtype(rnode4_t *rp) 1162 { 1163 open_delegation_type4 dt; 1164 1165 mutex_enter(&rp->r_statev4_lock); 1166 ASSERT(!rp->r_deleg_return_inprog); 1167 if (rp->r_deleg_return_pending) 1168 dt = OPEN_DELEGATE_NONE; 1169 else 1170 dt = rp->r_deleg_type; 1171 mutex_exit(&rp->r_statev4_lock); 1172 1173 return (dt); 1174 } 1175 1176 /* 1177 * Fill in *locker with the lock state arguments for a LOCK call. If 1178 * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL. 1179 * Caller must already hold the necessary seqid sync lock(s). 1180 */ 1181 1182 void 1183 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop, 1184 nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker) 1185 { 1186 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1187 if (lop->lo_just_created == NFS4_JUST_CREATED) { 1188 /* this is a new lock request */ 1189 open_to_lock_owner4 *nown; 1190 1191 ASSERT(oop != NULL); 1192 ASSERT(osp != NULL); 1193 1194 locker->new_lock_owner = TRUE; 1195 nown = &locker->locker4_u.open_owner; 1196 nown->open_seqid = nfs4_get_open_seqid(oop) + 1; 1197 mutex_enter(&osp->os_sync_lock); 1198 nown->open_stateid = osp->open_stateid; 1199 mutex_exit(&osp->os_sync_lock); 1200 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */ 1201 1202 nown->lock_owner.clientid = clientid; 1203 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name); 1204 nown->lock_owner.owner_val = 1205 kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP); 1206 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val, 1207 nown->lock_owner.owner_len); 1208 } else { 1209 exist_lock_owner4 *eown; 1210 /* have an existing lock owner */ 1211 1212 locker->new_lock_owner = FALSE; 1213 eown = &locker->locker4_u.lock_owner; 1214 mutex_enter(&lop->lo_lock); 1215 eown->lock_stateid = lop->lock_stateid; 1216 mutex_exit(&lop->lo_lock); 1217 eown->lock_seqid = lop->lock_seqid + 1; 1218 } 1219 } 1220 1221 /* 1222 * This starts our use of the lock owner's lock seqid by setting 1223 * the lo_flags to NFS4_LOCK_SEQID_INUSE. We will wait (forever) 1224 * with a cv_wait() until we are woken up. 1225 * 1226 * Return values: 1227 * 0 no problems 1228 * EAGAIN caller should retry (like a recovery retry) 1229 */ 1230 int 1231 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi) 1232 { 1233 int error = 0; 1234 #ifdef DEBUG 1235 static int ops = 0; /* fault injection */ 1236 #endif 1237 1238 #ifdef DEBUG 1239 if (seqid_sync_faults && curthread != mi->mi_recovthread && 1240 ++ops % 7 == 0) 1241 return (EAGAIN); 1242 #endif 1243 1244 mutex_enter(&mi->mi_lock); 1245 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1246 curthread != mi->mi_recovthread) 1247 error = EAGAIN; 1248 mutex_exit(&mi->mi_lock); 1249 if (error != 0) 1250 goto done; 1251 1252 mutex_enter(&lop->lo_lock); 1253 1254 ASSERT(lop->lo_seqid_holder != curthread); 1255 while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) { 1256 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1257 "nfs4_start_lock_seqid_sync: waiting on cv")); 1258 1259 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock); 1260 } 1261 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: " 1262 "NFS4_LOCK_SEQID_INUSE")); 1263 1264 lop->lo_flags |= NFS4_LOCK_SEQID_INUSE; 1265 lop->lo_seqid_holder = curthread; 1266 mutex_exit(&lop->lo_lock); 1267 1268 mutex_enter(&mi->mi_lock); 1269 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1270 curthread != mi->mi_recovthread) 1271 error = EAGAIN; 1272 mutex_exit(&mi->mi_lock); 1273 1274 if (error == EAGAIN) 1275 nfs4_end_lock_seqid_sync(lop); 1276 1277 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1278 "nfs4_start_lock_seqid_sync: error=%d", error)); 1279 1280 done: 1281 return (error); 1282 } 1283 1284 /* 1285 * This ends our use of the lock owner's lock seqid by setting 1286 * the appropiate flags and issuing a cv_signal to wake up another 1287 * thread waiting to use the lock seqid. 1288 */ 1289 void 1290 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop) 1291 { 1292 mutex_enter(&lop->lo_lock); 1293 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1294 ASSERT(lop->lo_seqid_holder == curthread); 1295 lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE; 1296 lop->lo_seqid_holder = NULL; 1297 cv_broadcast(&lop->lo_cv_seqid_sync); 1298 mutex_exit(&lop->lo_lock); 1299 } 1300 1301 /* 1302 * Returns a reference to a lock owner via lopp, which has its lock seqid 1303 * synchronization started. 1304 * If the lock owner is in the 'just_created' state, then we return its open 1305 * owner and open stream and start the open seqid synchronization. 1306 * 1307 * Return value: 1308 * NFS4_OK no problems 1309 * NFS4ERR_DELAY there is lost state to recover; caller should retry 1310 * NFS4ERR_IO no open stream 1311 */ 1312 nfsstat4 1313 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr, 1314 nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp, 1315 nfs4_lock_owner_t **lopp) 1316 { 1317 nfs4_lock_owner_t *lop, *next_lop; 1318 mntinfo4_t *mi; 1319 int error = 0; 1320 nfsstat4 stat; 1321 1322 mi = VTOMI4(RTOV4(rp)); 1323 1324 mutex_enter(&rp->r_statev4_lock); 1325 1326 lop = rp->r_lo_head.lo_next_rnode; 1327 while (lop != &rp->r_lo_head) { 1328 mutex_enter(&lop->lo_lock); 1329 if (lop->lo_pid == pid && lop->lo_valid != 0) { 1330 /* Found a matching lock owner */ 1331 NFS4_DEBUG(nfs4_client_state_debug, 1332 (CE_NOTE, "nfs4_find_or_create_lock_owner: " 1333 "got a match")); 1334 lop->lo_ref_count++; 1335 break; 1336 } 1337 next_lop = lop->lo_next_rnode; 1338 mutex_exit(&lop->lo_lock); 1339 lop = next_lop; 1340 } 1341 1342 if (lop == &rp->r_lo_head) { 1343 /* create temporary lock owner */ 1344 lop = create_lock_owner(rp, pid); 1345 } 1346 mutex_exit(&rp->r_statev4_lock); 1347 1348 /* Have a locked down lock owner struct now */ 1349 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1350 /* This is an existing lock owner */ 1351 *oopp = NULL; 1352 *ospp = NULL; 1353 } else { 1354 /* Lock owner doesn't exist yet */ 1355 1356 /* First grab open owner seqid synchronization */ 1357 mutex_exit(&lop->lo_lock); 1358 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1359 if (*oopp == NULL) 1360 goto kill_new_lop; 1361 error = nfs4_start_open_seqid_sync(*oopp, mi); 1362 if (error == EAGAIN) { 1363 stat = NFS4ERR_DELAY; 1364 goto failed; 1365 } 1366 *ospp = find_open_stream(*oopp, rp); 1367 if (*ospp == NULL) { 1368 nfs4_end_open_seqid_sync(*oopp); 1369 goto kill_new_lop; 1370 } 1371 if ((*ospp)->os_failed_reopen) { 1372 mutex_exit(&(*ospp)->os_sync_lock); 1373 NFS4_DEBUG((nfs4_open_stream_debug || 1374 nfs4_client_lock_debug), (CE_NOTE, 1375 "nfs4_find_or_create_lock_owner: os_failed_reopen;" 1376 "osp %p, cr %p, rp %s", (void *)(*ospp), 1377 (void *)cr, rnode4info(rp))); 1378 nfs4_end_open_seqid_sync(*oopp); 1379 stat = NFS4ERR_IO; 1380 goto failed; 1381 } 1382 mutex_exit(&(*ospp)->os_sync_lock); 1383 1384 /* 1385 * Now see if the lock owner has become permanent while we 1386 * had released our lock. 1387 */ 1388 mutex_enter(&lop->lo_lock); 1389 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1390 nfs4_end_open_seqid_sync(*oopp); 1391 open_stream_rele(*ospp, rp); 1392 open_owner_rele(*oopp); 1393 *oopp = NULL; 1394 *ospp = NULL; 1395 } 1396 } 1397 mutex_exit(&lop->lo_lock); 1398 1399 error = nfs4_start_lock_seqid_sync(lop, mi); 1400 if (error == EAGAIN) { 1401 if (*oopp != NULL) 1402 nfs4_end_open_seqid_sync(*oopp); 1403 stat = NFS4ERR_DELAY; 1404 goto failed; 1405 } 1406 ASSERT(error == 0); 1407 1408 *lopp = lop; 1409 return (NFS4_OK); 1410 1411 kill_new_lop: 1412 /* 1413 * A previous CLOSE was attempted but got EINTR, but the application 1414 * continued to use the unspecified state file descriptor. But now the 1415 * open stream is gone (which could also destroy the open owner), hence 1416 * we can no longer continue. The calling function should return EIO 1417 * to the application. 1418 */ 1419 NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug, 1420 (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created " 1421 "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp), 1422 (void *)(*ospp))); 1423 1424 nfs4_rnode_remove_lock_owner(rp, lop); 1425 stat = NFS4ERR_IO; 1426 1427 failed: 1428 lock_owner_rele(lop); 1429 if (*oopp) { 1430 open_owner_rele(*oopp); 1431 *oopp = NULL; 1432 } 1433 if (*ospp) { 1434 open_stream_rele(*ospp, rp); 1435 *ospp = NULL; 1436 } 1437 return (stat); 1438 } 1439 1440 /* 1441 * This function grabs a recently freed open owner off of the freed open 1442 * owner list if there is a match on the cred 'cr'. It returns NULL if no 1443 * such match is found. It will set the 'oo_ref_count' and 'oo_valid' back 1444 * to both 1 (sane values) in the case a match is found. 1445 */ 1446 static nfs4_open_owner_t * 1447 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp, 1448 mntinfo4_t *mi) 1449 { 1450 nfs4_open_owner_t *foop; 1451 1452 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1453 "find_freed_open_owner: cred %p", (void*)cr)); 1454 1455 ASSERT(mutex_owned(&mi->mi_lock)); 1456 ASSERT(mutex_owned(&bucketp->b_lock)); 1457 1458 /* got hash bucket, search through freed open owners */ 1459 for (foop = list_head(&mi->mi_foo_list); foop != NULL; 1460 foop = list_next(&mi->mi_foo_list, foop)) { 1461 if (!crcmp(foop->oo_cred, cr)) { 1462 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1463 "find_freed_open_owner: got a match open owner " 1464 "%p", (void *)foop)); 1465 foop->oo_ref_count = 1; 1466 foop->oo_valid = 1; 1467 list_remove(&mi->mi_foo_list, foop); 1468 mi->mi_foo_num--; 1469 1470 /* now add the struct into the cred hash table */ 1471 list_insert_head(&bucketp->b_oo_hash_list, foop); 1472 return (foop); 1473 } 1474 } 1475 1476 return (NULL); 1477 } 1478 1479 /* 1480 * Insert the newly freed 'oop' into the mi's freed oop list, 1481 * always at the head of the list. If we've already reached 1482 * our maximum allowed number of freed open owners (mi_foo_max), 1483 * then remove the LRU open owner on the list (namely the tail). 1484 */ 1485 static void 1486 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi) 1487 { 1488 nfs4_open_owner_t *lru_foop; 1489 1490 if (mi->mi_foo_num < mi->mi_foo_max) { 1491 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1492 "nfs4_free_open_owner: num free %d, max free %d, " 1493 "insert open owner %p for mntinfo4 %p", 1494 mi->mi_foo_num, mi->mi_foo_max, (void *)oop, 1495 (void *)mi)); 1496 list_insert_head(&mi->mi_foo_list, oop); 1497 mi->mi_foo_num++; 1498 return; 1499 } 1500 1501 /* need to replace a freed open owner */ 1502 1503 lru_foop = list_tail(&mi->mi_foo_list); 1504 1505 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1506 "nfs4_free_open_owner: destroy %p, insert %p", 1507 (void *)lru_foop, (void *)oop)); 1508 1509 list_remove(&mi->mi_foo_list, lru_foop); 1510 nfs4_destroy_open_owner(lru_foop); 1511 1512 /* head always has latest freed oop */ 1513 list_insert_head(&mi->mi_foo_list, oop); 1514 } 1515 1516 void 1517 nfs4_destroy_open_owner(nfs4_open_owner_t *oop) 1518 { 1519 ASSERT(oop != NULL); 1520 1521 crfree(oop->oo_cred); 1522 if (oop->oo_cred_otw) 1523 crfree(oop->oo_cred_otw); 1524 mutex_destroy(&oop->oo_lock); 1525 cv_destroy(&oop->oo_cv_seqid_sync); 1526 kmem_free(oop, sizeof (*oop)); 1527 } 1528 1529 seqid4 1530 nfs4_get_open_seqid(nfs4_open_owner_t *oop) 1531 { 1532 ASSERT(oop->oo_seqid_inuse); 1533 return (oop->oo_seqid); 1534 } 1535 1536 /* 1537 * This set's the open seqid for a <open owner/ mntinfo4> pair. 1538 */ 1539 void 1540 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop, 1541 nfs4_tag_type_t tag_type) 1542 { 1543 ASSERT(oop->oo_seqid_inuse); 1544 oop->oo_seqid = seqid; 1545 oop->oo_last_good_seqid = seqid; 1546 oop->oo_last_good_op = tag_type; 1547 } 1548 1549 /* 1550 * This bumps the current open seqid for the open owner 'oop'. 1551 */ 1552 void 1553 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop, 1554 nfs4_tag_type_t tag_type) 1555 { 1556 ASSERT(oop->oo_seqid_inuse); 1557 oop->oo_seqid++; 1558 oop->oo_last_good_seqid = oop->oo_seqid; 1559 oop->oo_last_good_op = tag_type; 1560 } 1561 1562 /* 1563 * If no open owner was provided, this function takes the cred to find an 1564 * open owner within the given mntinfo4_t. Either way we return the 1565 * open owner's OTW credential if it exists; otherwise returns the 1566 * supplied 'cr'. 1567 * 1568 * A hold is put on the returned credential, and it is up to the caller 1569 * to free the cred. 1570 */ 1571 cred_t * 1572 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop) 1573 { 1574 cred_t *ret_cr; 1575 nfs4_open_owner_t *oop = provided_oop; 1576 1577 if (oop == NULL) 1578 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1579 if (oop != NULL) { 1580 mutex_enter(&oop->oo_lock); 1581 if (oop->oo_cred_otw) 1582 ret_cr = oop->oo_cred_otw; 1583 else 1584 ret_cr = cr; 1585 crhold(ret_cr); 1586 mutex_exit(&oop->oo_lock); 1587 if (provided_oop == NULL) 1588 open_owner_rele(oop); 1589 } else { 1590 ret_cr = cr; 1591 crhold(ret_cr); 1592 } 1593 return (ret_cr); 1594 } 1595 1596 /* 1597 * Retrieves the next open stream in the rnode's list if an open stream 1598 * is provided; otherwise gets the first open stream in the list. 1599 * The open owner for that open stream is then retrieved, and if its 1600 * oo_cred_otw exists then it is returned; otherwise the provided 'cr' 1601 * is returned. *osp is set to the 'found' open stream. 1602 * 1603 * Note: we don't set *osp to the open stream retrieved via the 1604 * optimized check since that won't necessarily be at the beginning 1605 * of the rnode list, and if that osp doesn't work we'd like to 1606 * check _all_ open streams (starting from the beginning of the 1607 * rnode list). 1608 */ 1609 cred_t * 1610 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr, 1611 nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time) 1612 { 1613 nfs4_open_stream_t *next_osp = NULL; 1614 cred_t *ret_cr; 1615 1616 ASSERT(cr != NULL); 1617 /* 1618 * As an optimization, try to find the open owner 1619 * for the cred provided since that's most likely 1620 * to work. 1621 */ 1622 if (*first_time) { 1623 nfs4_open_owner_t *oop; 1624 1625 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp))); 1626 if (oop) { 1627 next_osp = find_open_stream(oop, rp); 1628 if (next_osp) 1629 mutex_exit(&next_osp->os_sync_lock); 1630 open_owner_rele(oop); 1631 } else { 1632 next_osp = NULL; 1633 } 1634 } else { 1635 int delay_rele = 0; 1636 1637 /* return the next open stream for this rnode */ 1638 mutex_enter(&rp->r_os_lock); 1639 /* Now, no one can add or delete to rp's open streams list */ 1640 1641 if (*osp) { 1642 next_osp = list_next(&rp->r_open_streams, *osp); 1643 /* 1644 * Delay the rele of *osp until after we drop 1645 * r_os_lock to not deadlock with oo_lock 1646 * via an open_stream_rele()->open_owner_rele(). 1647 */ 1648 delay_rele = 1; 1649 } else { 1650 next_osp = list_head(&rp->r_open_streams); 1651 } 1652 if (next_osp) { 1653 nfs4_open_stream_t *tmp_osp; 1654 1655 /* find the next valid open stream */ 1656 mutex_enter(&next_osp->os_sync_lock); 1657 while (next_osp && !next_osp->os_valid) { 1658 tmp_osp = 1659 list_next(&rp->r_open_streams, next_osp); 1660 mutex_exit(&next_osp->os_sync_lock); 1661 next_osp = tmp_osp; 1662 if (next_osp) 1663 mutex_enter(&next_osp->os_sync_lock); 1664 } 1665 if (next_osp) { 1666 next_osp->os_ref_count++; 1667 mutex_exit(&next_osp->os_sync_lock); 1668 } 1669 } 1670 mutex_exit(&rp->r_os_lock); 1671 if (delay_rele) 1672 open_stream_rele(*osp, rp); 1673 } 1674 1675 if (next_osp) { 1676 nfs4_open_owner_t *oop; 1677 1678 oop = next_osp->os_open_owner; 1679 mutex_enter(&oop->oo_lock); 1680 if (oop->oo_cred_otw) 1681 ret_cr = oop->oo_cred_otw; 1682 else 1683 ret_cr = cr; 1684 crhold(ret_cr); 1685 mutex_exit(&oop->oo_lock); 1686 if (*first_time) { 1687 open_stream_rele(next_osp, rp); 1688 *osp = NULL; 1689 } else 1690 *osp = next_osp; 1691 } else { 1692 /* just return the cred provided to us */ 1693 if (*first_time != TRUE) 1694 *last_time = TRUE; 1695 *osp = NULL; 1696 ret_cr = cr; 1697 crhold(ret_cr); 1698 } 1699 1700 if (*first_time) 1701 *first_time = FALSE; 1702 return (ret_cr); 1703 } 1704 1705 void 1706 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp) 1707 { 1708 bzero(&sid_tp->d_sid, sizeof (stateid4)); 1709 bzero(&sid_tp->l_sid, sizeof (stateid4)); 1710 bzero(&sid_tp->o_sid, sizeof (stateid4)); 1711 sid_tp->cur_sid_type = NO_SID; 1712 } 1713 1714 void 1715 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp) 1716 { 1717 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1718 "nfs4_save_stateid: saved %s stateid", 1719 sid_tp->cur_sid_type == DEL_SID ? "delegation" : 1720 sid_tp->cur_sid_type == LOCK_SID ? "lock" : 1721 sid_tp->cur_sid_type == OPEN_SID ? "open" : "special")); 1722 1723 switch (sid_tp->cur_sid_type) { 1724 case DEL_SID: 1725 sid_tp->d_sid = *s1; 1726 break; 1727 case LOCK_SID: 1728 sid_tp->l_sid = *s1; 1729 break; 1730 case OPEN_SID: 1731 sid_tp->o_sid = *s1; 1732 break; 1733 case SPEC_SID: 1734 default: 1735 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal " 1736 "stateid type %d", sid_tp->cur_sid_type); 1737 } 1738 } 1739 1740 /* 1741 * We got NFS4ERR_BAD_SEQID. Setup some arguments to pass to recovery. 1742 * Caller is responsible for freeing. 1743 */ 1744 nfs4_bseqid_entry_t * 1745 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop, 1746 vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid) 1747 { 1748 nfs4_bseqid_entry_t *bsep; 1749 1750 bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP); 1751 bsep->bs_oop = oop; 1752 bsep->bs_lop = lop; 1753 bsep->bs_vp = vp; 1754 bsep->bs_pid = pid; 1755 bsep->bs_tag = tag; 1756 bsep->bs_seqid = seqid; 1757 1758 return (bsep); 1759 } 1760 1761 void 1762 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 1763 nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr, 1764 vnode_t *vp, int access_close, int deny_close) 1765 { 1766 lost_rqstp->lr_putfirst = FALSE; 1767 1768 ASSERT(vp != NULL); 1769 if (error == ETIMEDOUT || error == EINTR || 1770 NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 1771 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 1772 "nfs4open_dg_save_lost_rqst: error %d", error)); 1773 1774 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE; 1775 /* 1776 * The vp is held and rele'd via the recovery code. 1777 * See nfs4_save_lost_rqst. 1778 */ 1779 lost_rqstp->lr_vp = vp; 1780 lost_rqstp->lr_dvp = NULL; 1781 lost_rqstp->lr_oop = oop; 1782 lost_rqstp->lr_osp = osp; 1783 lost_rqstp->lr_lop = NULL; 1784 lost_rqstp->lr_cr = cr; 1785 lost_rqstp->lr_flk = NULL; 1786 lost_rqstp->lr_dg_acc = access_close; 1787 lost_rqstp->lr_dg_deny = deny_close; 1788 lost_rqstp->lr_putfirst = FALSE; 1789 } else { 1790 lost_rqstp->lr_op = 0; 1791 } 1792 } 1793 1794 /* 1795 * Change the access and deny bits of an OPEN. 1796 * If recovery is needed, *recov_credpp is set to the cred used OTW, 1797 * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW. 1798 */ 1799 void 1800 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop, 1801 nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp, 1802 nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp) 1803 { 1804 mntinfo4_t *mi; 1805 int downgrade_acc, downgrade_deny; 1806 int new_acc, new_deny; 1807 COMPOUND4args_clnt args; 1808 COMPOUND4res_clnt res; 1809 OPEN_DOWNGRADE4res *odg_res; 1810 nfs_argop4 argop[3]; 1811 nfs_resop4 *resop; 1812 rnode4_t *rp; 1813 bool_t needrecov = FALSE; 1814 int doqueue = 1; 1815 seqid4 seqid = 0; 1816 cred_t *cred_otw; 1817 hrtime_t t; 1818 1819 ASSERT(mutex_owned(&osp->os_sync_lock)); 1820 #if DEBUG 1821 mutex_enter(&oop->oo_lock); 1822 ASSERT(oop->oo_seqid_inuse); 1823 mutex_exit(&oop->oo_lock); 1824 #endif 1825 1826 1827 if (access_close == 0 && deny_close == 0) { 1828 nfs4_error_zinit(ep); 1829 return; 1830 } 1831 1832 cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop); 1833 1834 cred_retry: 1835 nfs4_error_zinit(ep); 1836 downgrade_acc = 0; 1837 downgrade_deny = 0; 1838 mi = VTOMI4(vp); 1839 rp = VTOR4(vp); 1840 1841 /* 1842 * Check to see if the open stream got closed before we go OTW, 1843 * now that we have acquired the 'os_sync_lock'. 1844 */ 1845 if (!osp->os_valid) { 1846 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1847 " open stream has already been closed, return success")); 1848 /* error has already been set */ 1849 goto no_args_out; 1850 } 1851 1852 /* If the file failed recovery, just quit. */ 1853 mutex_enter(&rp->r_statelock); 1854 if (rp->r_flags & R4RECOVERR) { 1855 mutex_exit(&rp->r_statelock); 1856 ep->error = EIO; 1857 goto no_args_out; 1858 } 1859 mutex_exit(&rp->r_statelock); 1860 1861 seqid = nfs4_get_open_seqid(oop) + 1; 1862 1863 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1864 "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"", 1865 access_close, osp->os_share_acc_read, osp->os_share_acc_write)); 1866 1867 /* If we're closing the last READ, need to downgrade */ 1868 if ((access_close & FREAD) && (osp->os_share_acc_read == 1)) 1869 downgrade_acc |= OPEN4_SHARE_ACCESS_READ; 1870 1871 /* if we're closing the last WRITE, need to downgrade */ 1872 if ((access_close & FWRITE) && (osp->os_share_acc_write == 1)) 1873 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE; 1874 1875 downgrade_deny = OPEN4_SHARE_DENY_NONE; 1876 1877 new_acc = 0; 1878 new_deny = 0; 1879 1880 /* set our new access and deny share bits */ 1881 if ((osp->os_share_acc_read > 0) && 1882 !(downgrade_acc & OPEN4_SHARE_ACCESS_READ)) 1883 new_acc |= OPEN4_SHARE_ACCESS_READ; 1884 if ((osp->os_share_acc_write > 0) && 1885 !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE)) 1886 new_acc |= OPEN4_SHARE_ACCESS_WRITE; 1887 1888 new_deny = OPEN4_SHARE_DENY_NONE; 1889 1890 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1891 "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny)); 1892 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1893 "new acc 0x%x deny 0x%x", new_acc, new_deny)); 1894 1895 /* 1896 * Check to see if we aren't actually doing any downgrade or 1897 * if this is the last 'close' but the file is still mmapped. 1898 * Skip this if this a lost request resend so we don't decrement 1899 * the osp's share counts more than once. 1900 */ 1901 if (!lrp && 1902 ((downgrade_acc == 0 && downgrade_deny == 0) || 1903 (new_acc == 0 && new_deny == 0))) { 1904 /* 1905 * No downgrade to do, but still need to 1906 * update osp's os_share_* counts. 1907 */ 1908 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, 1909 "nfs4_open_downgrade: just lower the osp's count by %s", 1910 (access_close & FREAD) && (access_close & FWRITE) ? 1911 "read and write" : (access_close & FREAD) ? "read" : 1912 (access_close & FWRITE) ? "write" : "bogus")); 1913 if (access_close & FREAD) 1914 osp->os_share_acc_read--; 1915 if (access_close & FWRITE) 1916 osp->os_share_acc_write--; 1917 osp->os_share_deny_none--; 1918 nfs4_error_zinit(ep); 1919 1920 goto no_args_out; 1921 } 1922 1923 if (osp->os_orig_oo_name != oop->oo_name) { 1924 ep->error = EIO; 1925 goto no_args_out; 1926 } 1927 1928 /* setup the COMPOUND args */ 1929 if (lrp) 1930 args.ctag = TAG_OPEN_DG_LOST; 1931 else 1932 args.ctag = TAG_OPEN_DG; 1933 1934 args.array_len = 3; 1935 args.array = argop; 1936 1937 /* putfh */ 1938 argop[0].argop = OP_CPUTFH; 1939 argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 1940 1941 argop[1].argop = OP_GETATTR; 1942 argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 1943 argop[1].nfs_argop4_u.opgetattr.mi = mi; 1944 1945 ASSERT(mutex_owned(&osp->os_sync_lock)); 1946 ASSERT(osp->os_delegation == FALSE); 1947 1948 /* open downgrade */ 1949 argop[2].argop = OP_OPEN_DOWNGRADE; 1950 argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid; 1951 argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc; 1952 argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny; 1953 argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid; 1954 1955 t = gethrtime(); 1956 1957 rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep); 1958 1959 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 1960 nfs4_set_open_seqid(seqid, oop, args.ctag); 1961 1962 if ((ep->error == EACCES || 1963 (ep->error == 0 && res.status == NFS4ERR_ACCESS)) && 1964 cred_otw != cr) { 1965 crfree(cred_otw); 1966 cred_otw = cr; 1967 crhold(cred_otw); 1968 if (!ep->error) 1969 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1970 goto cred_retry; 1971 } 1972 1973 needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp); 1974 1975 if (needrecov && recov_credpp) { 1976 *recov_credpp = cred_otw; 1977 crhold(*recov_credpp); 1978 if (recov_seqidp) 1979 *recov_seqidp = seqid; 1980 } 1981 1982 if (!ep->error && !res.status) { 1983 /* get the open downgrade results */ 1984 resop = &res.array[2]; 1985 odg_res = &resop->nfs_resop4_u.opopen_downgrade; 1986 1987 osp->open_stateid = odg_res->open_stateid; 1988 1989 /* set the open streams new access/deny bits */ 1990 if (access_close & FREAD) 1991 osp->os_share_acc_read--; 1992 if (access_close & FWRITE) 1993 osp->os_share_acc_write--; 1994 osp->os_share_deny_none--; 1995 osp->os_dc_openacc = new_acc; 1996 1997 nfs4_attr_cache(vp, 1998 &res.array[1].nfs_resop4_u.opgetattr.ga_res, 1999 t, cred_otw, TRUE, NULL); 2000 } 2001 2002 if (!ep->error) 2003 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2004 2005 no_args_out: 2006 crfree(cred_otw); 2007 } 2008 2009 /* 2010 * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out 2011 * because the filesystem was forcibly unmounted) then we don't know if we 2012 * potentially left state dangling on the server, therefore the recovery 2013 * framework makes this call to resend the OPEN request and then undo it. 2014 */ 2015 void 2016 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp, 2017 nfs4_error_t *ep) 2018 { 2019 COMPOUND4args_clnt args; 2020 COMPOUND4res_clnt res; 2021 nfs_argop4 argop[4]; 2022 GETFH4res *gf_res = NULL; 2023 OPEN4cargs *open_args; 2024 OPEN4res *op_res; 2025 char *destcfp; 2026 int destclen; 2027 nfs4_ga_res_t *garp; 2028 vnode_t *dvp = NULL, *vp = NULL; 2029 rnode4_t *rp = NULL, *drp = NULL; 2030 cred_t *cr = NULL; 2031 seqid4 seqid; 2032 nfs4_open_owner_t *oop = NULL; 2033 nfs4_open_stream_t *osp = NULL; 2034 component4 *srcfp; 2035 open_claim_type4 claim; 2036 mntinfo4_t *mi; 2037 int doqueue = 1; 2038 bool_t retry_open = FALSE; 2039 int created_osp = 0; 2040 hrtime_t t; 2041 char *failed_msg = ""; 2042 int fh_different; 2043 int reopen = 0; 2044 2045 nfs4_error_zinit(ep); 2046 2047 cr = resend_rqstp->lr_cr; 2048 dvp = resend_rqstp->lr_dvp; 2049 2050 vp = *vpp; 2051 if (vp) { 2052 ASSERT(nfs4_consistent_type(vp)); 2053 rp = VTOR4(vp); 2054 } 2055 2056 if (rp) { 2057 /* If the file failed recovery, just quit. */ 2058 mutex_enter(&rp->r_statelock); 2059 if (rp->r_flags & R4RECOVERR) { 2060 mutex_exit(&rp->r_statelock); 2061 ep->error = EIO; 2062 return; 2063 } 2064 mutex_exit(&rp->r_statelock); 2065 } 2066 2067 if (dvp) { 2068 drp = VTOR4(dvp); 2069 /* If the parent directory failed recovery, just quit. */ 2070 mutex_enter(&drp->r_statelock); 2071 if (drp->r_flags & R4RECOVERR) { 2072 mutex_exit(&drp->r_statelock); 2073 ep->error = EIO; 2074 return; 2075 } 2076 mutex_exit(&drp->r_statelock); 2077 } else 2078 reopen = 1; /* NULL dvp means this is a reopen */ 2079 2080 claim = resend_rqstp->lr_oclaim; 2081 ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR); 2082 2083 args.ctag = TAG_OPEN_LOST; 2084 args.array_len = 4; 2085 args.array = argop; 2086 2087 argop[0].argop = OP_CPUTFH; 2088 if (reopen) { 2089 ASSERT(vp != NULL); 2090 2091 mi = VTOMI4(vp); 2092 /* 2093 * if this is a file mount then 2094 * use the mntinfo parentfh 2095 */ 2096 argop[0].nfs_argop4_u.opcputfh.sfh = 2097 (vp->v_flag & VROOT) ? mi->mi_srvparentfh : 2098 VTOSV(vp)->sv_dfh; 2099 args.ctag = TAG_REOPEN_LOST; 2100 } else { 2101 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh; 2102 mi = VTOMI4(dvp); 2103 } 2104 2105 argop[1].argop = OP_COPEN; 2106 open_args = &argop[1].nfs_argop4_u.opcopen; 2107 open_args->claim = claim; 2108 2109 /* 2110 * If we sent over a OPEN with CREATE then the only 2111 * thing we care about is to not leave dangling state 2112 * on the server, not whether the file we potentially 2113 * created remains on the server. So even though the 2114 * lost open request specified a CREATE, we only wish 2115 * to do a non-CREATE OPEN. 2116 */ 2117 open_args->opentype = OPEN4_NOCREATE; 2118 2119 srcfp = &resend_rqstp->lr_ofile; 2120 destclen = srcfp->utf8string_len; 2121 destcfp = kmem_alloc(destclen + 1, KM_SLEEP); 2122 bcopy(srcfp->utf8string_val, destcfp, destclen); 2123 destcfp[destclen] = '\0'; 2124 if (claim == CLAIM_DELEGATE_CUR) { 2125 open_args->open_claim4_u.delegate_cur_info.delegate_stateid = 2126 resend_rqstp->lr_ostateid; 2127 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp; 2128 } else { 2129 open_args->open_claim4_u.cfile = destcfp; 2130 } 2131 2132 open_args->share_access = resend_rqstp->lr_oacc; 2133 open_args->share_deny = resend_rqstp->lr_odeny; 2134 oop = resend_rqstp->lr_oop; 2135 ASSERT(oop != NULL); 2136 2137 open_args->owner.clientid = mi2clientid(mi); 2138 /* this length never changes */ 2139 open_args->owner.owner_len = sizeof (oop->oo_name); 2140 open_args->owner.owner_val = 2141 kmem_alloc(open_args->owner.owner_len, KM_SLEEP); 2142 2143 ep->error = nfs4_start_open_seqid_sync(oop, mi); 2144 ASSERT(ep->error == 0); /* recov thread always succeeds */ 2145 /* 2146 * We can get away with not saving the seqid upon detection 2147 * of a lost request, and now just use the open owner's current 2148 * seqid since we only allow one op OTW per seqid and lost 2149 * requests are saved FIFO. 2150 */ 2151 seqid = nfs4_get_open_seqid(oop) + 1; 2152 open_args->seqid = seqid; 2153 2154 bcopy(&oop->oo_name, open_args->owner.owner_val, 2155 open_args->owner.owner_len); 2156 2157 /* getfh */ 2158 argop[2].argop = OP_GETFH; 2159 2160 /* Construct the getattr part of the compound */ 2161 argop[3].argop = OP_GETATTR; 2162 argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 2163 argop[3].nfs_argop4_u.opgetattr.mi = mi; 2164 2165 res.array = NULL; 2166 2167 t = gethrtime(); 2168 2169 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2170 2171 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 2172 nfs4_set_open_seqid(seqid, oop, args.ctag); 2173 2174 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2175 "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status)); 2176 2177 if (ep->error || res.status) 2178 goto err_out; 2179 2180 op_res = &res.array[1].nfs_resop4_u.opopen; 2181 gf_res = &res.array[2].nfs_resop4_u.opgetfh; 2182 garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res; 2183 2184 if (!vp) { 2185 int rnode_err = 0; 2186 nfs4_sharedfh_t *sfh; 2187 2188 /* 2189 * If we can't decode all the attributes they are not usable, 2190 * just make the vnode. 2191 */ 2192 2193 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp)); 2194 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp, 2195 fn_get(VTOSV(dvp)->sv_name, 2196 open_args->open_claim4_u.cfile, sfh)); 2197 sfh4_rele(&sfh); 2198 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2199 "nfs4_resend_open_otw: made vp %p for file %s", 2200 (void *)(*vpp), open_args->open_claim4_u.cfile)); 2201 2202 if (ep->error) 2203 PURGE_ATTRCACHE4(*vpp); 2204 2205 /* 2206 * For the newly created *vpp case, make sure the rnode 2207 * isn't bad before using it. 2208 */ 2209 mutex_enter(&(VTOR4(*vpp))->r_statelock); 2210 if (VTOR4(*vpp)->r_flags & R4RECOVERR) 2211 rnode_err = EIO; 2212 mutex_exit(&(VTOR4(*vpp))->r_statelock); 2213 2214 if (rnode_err) { 2215 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2216 "nfs4_resend_open_otw: rp %p is bad", 2217 (void *)VTOR4(*vpp))); 2218 ep->error = rnode_err; 2219 goto err_out; 2220 } 2221 2222 vp = *vpp; 2223 rp = VTOR4(vp); 2224 } 2225 2226 if (reopen) { 2227 /* 2228 * Check if the path we reopened really is the same 2229 * file. We could end up in a situation were the file 2230 * was removed and a new file created with the same name. 2231 */ 2232 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); 2233 fh_different = 2234 (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0); 2235 if (fh_different) { 2236 if (mi->mi_fh_expire_type == FH4_PERSISTENT || 2237 mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) { 2238 /* Oops, we don't have the same file */ 2239 if (mi->mi_fh_expire_type == FH4_PERSISTENT) 2240 failed_msg = 2241 "Couldn't reopen: Persistant " 2242 "file handle changed"; 2243 else 2244 failed_msg = 2245 "Couldn't reopen: Volatile " 2246 "(no expire on open) file handle " 2247 "changed"; 2248 2249 nfs4_end_open_seqid_sync(oop); 2250 kmem_free(destcfp, destclen + 1); 2251 nfs4args_copen_free(open_args); 2252 (void) xdr_free(xdr_COMPOUND4res_clnt, 2253 (caddr_t)&res); 2254 nfs_rw_exit(&mi->mi_fh_lock); 2255 nfs4_fail_recov(vp, failed_msg, ep->error, 2256 ep->stat); 2257 return; 2258 } else { 2259 /* 2260 * We have volatile file handles that don't 2261 * compare. If the fids are the same then we 2262 * assume that the file handle expired but the 2263 * renode still refers to the same file object. 2264 * 2265 * First check that we have fids or not. 2266 * If we don't we have a dumb server so we will 2267 * just assume every thing is ok for now. 2268 */ 2269 if (!ep->error && 2270 garp->n4g_va.va_mask & AT_NODEID && 2271 rp->r_attr.va_mask & AT_NODEID && 2272 rp->r_attr.va_nodeid != 2273 garp->n4g_va.va_nodeid) { 2274 /* 2275 * We have fids, but they don't 2276 * compare. So kill the file. 2277 */ 2278 failed_msg = 2279 "Couldn't reopen: file handle " 2280 "changed due to mismatched fids"; 2281 nfs4_end_open_seqid_sync(oop); 2282 kmem_free(destcfp, destclen + 1); 2283 nfs4args_copen_free(open_args); 2284 (void) xdr_free(xdr_COMPOUND4res_clnt, 2285 (caddr_t)&res); 2286 nfs_rw_exit(&mi->mi_fh_lock); 2287 nfs4_fail_recov(vp, failed_msg, 2288 ep->error, ep->stat); 2289 return; 2290 } else { 2291 /* 2292 * We have volatile file handles that 2293 * refers to the same file (at least 2294 * they have the same fid) or we don't 2295 * have fids so we can't tell. :(. We'll 2296 * be a kind and accepting client so 2297 * we'll update the rnode's file 2298 * handle with the otw handle. 2299 * 2300 * We need to drop mi->mi_fh_lock since 2301 * sh4_update acquires it. Since there 2302 * is only one recovery thread there is 2303 * no race. 2304 */ 2305 nfs_rw_exit(&mi->mi_fh_lock); 2306 sfh4_update(rp->r_fh, &gf_res->object); 2307 } 2308 } 2309 } else { 2310 nfs_rw_exit(&mi->mi_fh_lock); 2311 } 2312 } 2313 2314 ASSERT(nfs4_consistent_type(vp)); 2315 2316 if (op_res->rflags & OPEN4_RESULT_CONFIRM) 2317 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE, 2318 &retry_open, oop, TRUE, ep, NULL); 2319 if (ep->error || ep->stat) { 2320 nfs4_end_open_seqid_sync(oop); 2321 kmem_free(destcfp, destclen + 1); 2322 nfs4args_copen_free(open_args); 2323 if (!ep->error) 2324 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2325 return; 2326 } 2327 2328 if (reopen) { 2329 /* 2330 * Doing a reopen here so the osp should already exist. 2331 * If not, something changed or went very wrong. 2332 * 2333 * returns with 'os_sync_lock' held 2334 */ 2335 osp = find_open_stream(oop, rp); 2336 if (!osp) { 2337 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2338 "nfs4_resend_open_otw: couldn't find osp")); 2339 ep->error = EINVAL; 2340 goto err_out; 2341 } 2342 osp->os_open_ref_count++; 2343 } else { 2344 mutex_enter(&oop->oo_lock); 2345 oop->oo_just_created = NFS4_PERM_CREATED; 2346 mutex_exit(&oop->oo_lock); 2347 2348 /* returns with 'os_sync_lock' held */ 2349 osp = find_or_create_open_stream(oop, rp, &created_osp); 2350 if (!osp) { 2351 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2352 "nfs4_resend_open_otw: couldn't create osp")); 2353 ep->error = EINVAL; 2354 goto err_out; 2355 } 2356 } 2357 2358 osp->open_stateid = op_res->stateid; 2359 osp->os_delegation = FALSE; 2360 /* 2361 * Need to reset this bitfield for the possible case where we were 2362 * going to OTW CLOSE the file, got a non-recoverable error, and before 2363 * we could retry the CLOSE, OPENed the file again. 2364 */ 2365 ASSERT(osp->os_open_owner->oo_seqid_inuse); 2366 osp->os_final_close = 0; 2367 osp->os_force_close = 0; 2368 2369 if (!reopen) { 2370 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ) 2371 osp->os_share_acc_read++; 2372 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE) 2373 osp->os_share_acc_write++; 2374 osp->os_share_deny_none++; 2375 } 2376 2377 mutex_exit(&osp->os_sync_lock); 2378 if (created_osp) 2379 nfs4_inc_state_ref_count(mi); 2380 open_stream_rele(osp, rp); 2381 2382 nfs4_end_open_seqid_sync(oop); 2383 2384 /* accept delegation, if any */ 2385 nfs4_delegation_accept(rp, claim, op_res, garp, cr); 2386 2387 kmem_free(destcfp, destclen + 1); 2388 nfs4args_copen_free(open_args); 2389 2390 if (claim == CLAIM_DELEGATE_CUR) 2391 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL); 2392 else 2393 PURGE_ATTRCACHE4(vp); 2394 2395 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2396 2397 ASSERT(nfs4_consistent_type(vp)); 2398 2399 return; 2400 2401 err_out: 2402 nfs4_end_open_seqid_sync(oop); 2403 kmem_free(destcfp, destclen + 1); 2404 nfs4args_copen_free(open_args); 2405 if (!ep->error) 2406 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2407 } 2408